1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
31 #include "stringpool.h"
38 #include "diagnostic.h"
41 #include "fold-const.h"
44 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
58 #include "tm-constrs.h"
61 #include "sched-int.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
80 /* This file should be included last. */
81 #include "target-def.h"
83 static rtx legitimize_dllimport_symbol (rtx, bool);
84 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
85 static rtx legitimize_pe_coff_symbol (rtx, bool);
86 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
88 #ifndef CHECK_STACK_LIMIT
89 #define CHECK_STACK_LIMIT (-1)
92 /* Return index of given mode in mult and division cost tables. */
93 #define MODE_INDEX(mode) \
94 ((mode) == QImode ? 0 \
95 : (mode) == HImode ? 1 \
96 : (mode) == SImode ? 2 \
97 : (mode) == DImode ? 3 \
100 /* Processor costs (relative to an add) */
101 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
102 #define COSTS_N_BYTES(N) ((N) * 2)
104 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
106 static stringop_algs ix86_size_memcpy[2] = {
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
109 static stringop_algs ix86_size_memset[2] = {
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
115 COSTS_N_BYTES (2), /* cost of an add instruction */
116 COSTS_N_BYTES (3), /* cost of a lea instruction */
117 COSTS_N_BYTES (2), /* variable shift costs */
118 COSTS_N_BYTES (3), /* constant shift costs */
119 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
120 COSTS_N_BYTES (3), /* HI */
121 COSTS_N_BYTES (3), /* SI */
122 COSTS_N_BYTES (3), /* DI */
123 COSTS_N_BYTES (5)}, /* other */
124 0, /* cost of multiply per each bit set */
125 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
126 COSTS_N_BYTES (3), /* HI */
127 COSTS_N_BYTES (3), /* SI */
128 COSTS_N_BYTES (3), /* DI */
129 COSTS_N_BYTES (5)}, /* other */
130 COSTS_N_BYTES (3), /* cost of movsx */
131 COSTS_N_BYTES (3), /* cost of movzx */
132 0, /* "large" insn */
134 2, /* cost for loading QImode using movzbl */
135 {2, 2, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 2, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {2, 2, 2}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {2, 2, 2}, /* cost of storing fp registers
143 in SFmode, DFmode and XFmode */
144 3, /* cost of moving MMX register */
145 {3, 3}, /* cost of loading MMX registers
146 in SImode and DImode */
147 {3, 3}, /* cost of storing MMX registers
148 in SImode and DImode */
149 3, /* cost of moving SSE register */
150 {3, 3, 3}, /* cost of loading SSE registers
151 in SImode, DImode and TImode */
152 {3, 3, 3}, /* cost of storing SSE registers
153 in SImode, DImode and TImode */
154 3, /* MMX or SSE register to integer */
155 0, /* size of l1 cache */
156 0, /* size of l2 cache */
157 0, /* size of prefetch block */
158 0, /* number of parallel prefetches */
160 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
161 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
162 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
163 COSTS_N_BYTES (2), /* cost of FABS instruction. */
164 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
165 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
168 1, /* scalar_stmt_cost. */
169 1, /* scalar load_cost. */
170 1, /* scalar_store_cost. */
171 1, /* vec_stmt_cost. */
172 1, /* vec_to_scalar_cost. */
173 1, /* scalar_to_vec_cost. */
174 1, /* vec_align_load_cost. */
175 1, /* vec_unalign_load_cost. */
176 1, /* vec_store_cost. */
177 1, /* cond_taken_branch_cost. */
178 1, /* cond_not_taken_branch_cost. */
181 /* Processor costs (relative to an add) */
182 static stringop_algs i386_memcpy[2] = {
183 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
184 DUMMY_STRINGOP_ALGS};
185 static stringop_algs i386_memset[2] = {
186 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
187 DUMMY_STRINGOP_ALGS};
190 struct processor_costs i386_cost = { /* 386 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (6), /* HI */
197 COSTS_N_INSNS (6), /* SI */
198 COSTS_N_INSNS (6), /* DI */
199 COSTS_N_INSNS (6)}, /* other */
200 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (23), /* HI */
203 COSTS_N_INSNS (23), /* SI */
204 COSTS_N_INSNS (23), /* DI */
205 COSTS_N_INSNS (23)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of l1 cache */
232 0, /* size of l2 cache */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
236 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
237 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
238 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
239 COSTS_N_INSNS (22), /* cost of FABS instruction. */
240 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
241 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
244 1, /* scalar_stmt_cost. */
245 1, /* scalar load_cost. */
246 1, /* scalar_store_cost. */
247 1, /* vec_stmt_cost. */
248 1, /* vec_to_scalar_cost. */
249 1, /* scalar_to_vec_cost. */
250 1, /* vec_align_load_cost. */
251 2, /* vec_unalign_load_cost. */
252 1, /* vec_store_cost. */
253 3, /* cond_taken_branch_cost. */
254 1, /* cond_not_taken_branch_cost. */
257 static stringop_algs i486_memcpy[2] = {
258 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
259 DUMMY_STRINGOP_ALGS};
260 static stringop_algs i486_memset[2] = {
261 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
262 DUMMY_STRINGOP_ALGS};
265 struct processor_costs i486_cost = { /* 486 specific costs */
266 COSTS_N_INSNS (1), /* cost of an add instruction */
267 COSTS_N_INSNS (1), /* cost of a lea instruction */
268 COSTS_N_INSNS (3), /* variable shift costs */
269 COSTS_N_INSNS (2), /* constant shift costs */
270 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
271 COSTS_N_INSNS (12), /* HI */
272 COSTS_N_INSNS (12), /* SI */
273 COSTS_N_INSNS (12), /* DI */
274 COSTS_N_INSNS (12)}, /* other */
275 1, /* cost of multiply per each bit set */
276 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
277 COSTS_N_INSNS (40), /* HI */
278 COSTS_N_INSNS (40), /* SI */
279 COSTS_N_INSNS (40), /* DI */
280 COSTS_N_INSNS (40)}, /* other */
281 COSTS_N_INSNS (3), /* cost of movsx */
282 COSTS_N_INSNS (2), /* cost of movzx */
283 15, /* "large" insn */
285 4, /* cost for loading QImode using movzbl */
286 {2, 4, 2}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 4, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {8, 8, 8}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {8, 8, 8}, /* cost of storing fp registers
294 in SFmode, DFmode and XFmode */
295 2, /* cost of moving MMX register */
296 {4, 8}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {4, 8}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {4, 8, 16}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {4, 8, 16}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 3, /* MMX or SSE register to integer */
306 4, /* size of l1 cache. 486 has 8kB cache
307 shared for code and data, so 4kB is
308 not really precise. */
309 4, /* size of l2 cache */
310 0, /* size of prefetch block */
311 0, /* number of parallel prefetches */
313 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
314 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
315 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
316 COSTS_N_INSNS (3), /* cost of FABS instruction. */
317 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
318 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
321 1, /* scalar_stmt_cost. */
322 1, /* scalar load_cost. */
323 1, /* scalar_store_cost. */
324 1, /* vec_stmt_cost. */
325 1, /* vec_to_scalar_cost. */
326 1, /* scalar_to_vec_cost. */
327 1, /* vec_align_load_cost. */
328 2, /* vec_unalign_load_cost. */
329 1, /* vec_store_cost. */
330 3, /* cond_taken_branch_cost. */
331 1, /* cond_not_taken_branch_cost. */
334 static stringop_algs pentium_memcpy[2] = {
335 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
336 DUMMY_STRINGOP_ALGS};
337 static stringop_algs pentium_memset[2] = {
338 {libcall, {{-1, rep_prefix_4_byte, false}}},
339 DUMMY_STRINGOP_ALGS};
342 struct processor_costs pentium_cost = {
343 COSTS_N_INSNS (1), /* cost of an add instruction */
344 COSTS_N_INSNS (1), /* cost of a lea instruction */
345 COSTS_N_INSNS (4), /* variable shift costs */
346 COSTS_N_INSNS (1), /* constant shift costs */
347 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
348 COSTS_N_INSNS (11), /* HI */
349 COSTS_N_INSNS (11), /* SI */
350 COSTS_N_INSNS (11), /* DI */
351 COSTS_N_INSNS (11)}, /* other */
352 0, /* cost of multiply per each bit set */
353 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
354 COSTS_N_INSNS (25), /* HI */
355 COSTS_N_INSNS (25), /* SI */
356 COSTS_N_INSNS (25), /* DI */
357 COSTS_N_INSNS (25)}, /* other */
358 COSTS_N_INSNS (3), /* cost of movsx */
359 COSTS_N_INSNS (2), /* cost of movzx */
360 8, /* "large" insn */
362 6, /* cost for loading QImode using movzbl */
363 {2, 4, 2}, /* cost of loading integer registers
364 in QImode, HImode and SImode.
365 Relative to reg-reg move (2). */
366 {2, 4, 2}, /* cost of storing integer registers */
367 2, /* cost of reg,reg fld/fst */
368 {2, 2, 6}, /* cost of loading fp registers
369 in SFmode, DFmode and XFmode */
370 {4, 4, 6}, /* cost of storing fp registers
371 in SFmode, DFmode and XFmode */
372 8, /* cost of moving MMX register */
373 {8, 8}, /* cost of loading MMX registers
374 in SImode and DImode */
375 {8, 8}, /* cost of storing MMX registers
376 in SImode and DImode */
377 2, /* cost of moving SSE register */
378 {4, 8, 16}, /* cost of loading SSE registers
379 in SImode, DImode and TImode */
380 {4, 8, 16}, /* cost of storing SSE registers
381 in SImode, DImode and TImode */
382 3, /* MMX or SSE register to integer */
383 8, /* size of l1 cache. */
384 8, /* size of l2 cache */
385 0, /* size of prefetch block */
386 0, /* number of parallel prefetches */
388 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
389 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
390 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
391 COSTS_N_INSNS (1), /* cost of FABS instruction. */
392 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
393 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
396 1, /* scalar_stmt_cost. */
397 1, /* scalar load_cost. */
398 1, /* scalar_store_cost. */
399 1, /* vec_stmt_cost. */
400 1, /* vec_to_scalar_cost. */
401 1, /* scalar_to_vec_cost. */
402 1, /* vec_align_load_cost. */
403 2, /* vec_unalign_load_cost. */
404 1, /* vec_store_cost. */
405 3, /* cond_taken_branch_cost. */
406 1, /* cond_not_taken_branch_cost. */
410 struct processor_costs lakemont_cost = {
411 COSTS_N_INSNS (1), /* cost of an add instruction */
412 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
413 COSTS_N_INSNS (1), /* variable shift costs */
414 COSTS_N_INSNS (1), /* constant shift costs */
415 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
416 COSTS_N_INSNS (11), /* HI */
417 COSTS_N_INSNS (11), /* SI */
418 COSTS_N_INSNS (11), /* DI */
419 COSTS_N_INSNS (11)}, /* other */
420 0, /* cost of multiply per each bit set */
421 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
422 COSTS_N_INSNS (25), /* HI */
423 COSTS_N_INSNS (25), /* SI */
424 COSTS_N_INSNS (25), /* DI */
425 COSTS_N_INSNS (25)}, /* other */
426 COSTS_N_INSNS (3), /* cost of movsx */
427 COSTS_N_INSNS (2), /* cost of movzx */
428 8, /* "large" insn */
430 6, /* cost for loading QImode using movzbl */
431 {2, 4, 2}, /* cost of loading integer registers
432 in QImode, HImode and SImode.
433 Relative to reg-reg move (2). */
434 {2, 4, 2}, /* cost of storing integer registers */
435 2, /* cost of reg,reg fld/fst */
436 {2, 2, 6}, /* cost of loading fp registers
437 in SFmode, DFmode and XFmode */
438 {4, 4, 6}, /* cost of storing fp registers
439 in SFmode, DFmode and XFmode */
440 8, /* cost of moving MMX register */
441 {8, 8}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {8, 8}, /* cost of storing MMX registers
444 in SImode and DImode */
445 2, /* cost of moving SSE register */
446 {4, 8, 16}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {4, 8, 16}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 3, /* MMX or SSE register to integer */
451 8, /* size of l1 cache. */
452 8, /* size of l2 cache */
453 0, /* size of prefetch block */
454 0, /* number of parallel prefetches */
456 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
457 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
458 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
459 COSTS_N_INSNS (1), /* cost of FABS instruction. */
460 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
461 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
464 1, /* scalar_stmt_cost. */
465 1, /* scalar load_cost. */
466 1, /* scalar_store_cost. */
467 1, /* vec_stmt_cost. */
468 1, /* vec_to_scalar_cost. */
469 1, /* scalar_to_vec_cost. */
470 1, /* vec_align_load_cost. */
471 2, /* vec_unalign_load_cost. */
472 1, /* vec_store_cost. */
473 3, /* cond_taken_branch_cost. */
474 1, /* cond_not_taken_branch_cost. */
477 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
478 (we ensure the alignment). For small blocks inline loop is still a
479 noticeable win, for bigger blocks either rep movsl or rep movsb is
480 way to go. Rep movsb has apparently more expensive startup time in CPU,
481 but after 4K the difference is down in the noise. */
482 static stringop_algs pentiumpro_memcpy[2] = {
483 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
484 {8192, rep_prefix_4_byte, false},
485 {-1, rep_prefix_1_byte, false}}},
486 DUMMY_STRINGOP_ALGS};
487 static stringop_algs pentiumpro_memset[2] = {
488 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
489 {8192, rep_prefix_4_byte, false},
490 {-1, libcall, false}}},
491 DUMMY_STRINGOP_ALGS};
493 struct processor_costs pentiumpro_cost = {
494 COSTS_N_INSNS (1), /* cost of an add instruction */
495 COSTS_N_INSNS (1), /* cost of a lea instruction */
496 COSTS_N_INSNS (1), /* variable shift costs */
497 COSTS_N_INSNS (1), /* constant shift costs */
498 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
499 COSTS_N_INSNS (4), /* HI */
500 COSTS_N_INSNS (4), /* SI */
501 COSTS_N_INSNS (4), /* DI */
502 COSTS_N_INSNS (4)}, /* other */
503 0, /* cost of multiply per each bit set */
504 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
505 COSTS_N_INSNS (17), /* HI */
506 COSTS_N_INSNS (17), /* SI */
507 COSTS_N_INSNS (17), /* DI */
508 COSTS_N_INSNS (17)}, /* other */
509 COSTS_N_INSNS (1), /* cost of movsx */
510 COSTS_N_INSNS (1), /* cost of movzx */
511 8, /* "large" insn */
513 2, /* cost for loading QImode using movzbl */
514 {4, 4, 4}, /* cost of loading integer registers
515 in QImode, HImode and SImode.
516 Relative to reg-reg move (2). */
517 {2, 2, 2}, /* cost of storing integer registers */
518 2, /* cost of reg,reg fld/fst */
519 {2, 2, 6}, /* cost of loading fp registers
520 in SFmode, DFmode and XFmode */
521 {4, 4, 6}, /* cost of storing fp registers
522 in SFmode, DFmode and XFmode */
523 2, /* cost of moving MMX register */
524 {2, 2}, /* cost of loading MMX registers
525 in SImode and DImode */
526 {2, 2}, /* cost of storing MMX registers
527 in SImode and DImode */
528 2, /* cost of moving SSE register */
529 {2, 2, 8}, /* cost of loading SSE registers
530 in SImode, DImode and TImode */
531 {2, 2, 8}, /* cost of storing SSE registers
532 in SImode, DImode and TImode */
533 3, /* MMX or SSE register to integer */
534 8, /* size of l1 cache. */
535 256, /* size of l2 cache */
536 32, /* size of prefetch block */
537 6, /* number of parallel prefetches */
539 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
540 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
541 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
542 COSTS_N_INSNS (2), /* cost of FABS instruction. */
543 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
544 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
547 1, /* scalar_stmt_cost. */
548 1, /* scalar load_cost. */
549 1, /* scalar_store_cost. */
550 1, /* vec_stmt_cost. */
551 1, /* vec_to_scalar_cost. */
552 1, /* scalar_to_vec_cost. */
553 1, /* vec_align_load_cost. */
554 2, /* vec_unalign_load_cost. */
555 1, /* vec_store_cost. */
556 3, /* cond_taken_branch_cost. */
557 1, /* cond_not_taken_branch_cost. */
560 static stringop_algs geode_memcpy[2] = {
561 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
562 DUMMY_STRINGOP_ALGS};
563 static stringop_algs geode_memset[2] = {
564 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
565 DUMMY_STRINGOP_ALGS};
567 struct processor_costs geode_cost = {
568 COSTS_N_INSNS (1), /* cost of an add instruction */
569 COSTS_N_INSNS (1), /* cost of a lea instruction */
570 COSTS_N_INSNS (2), /* variable shift costs */
571 COSTS_N_INSNS (1), /* constant shift costs */
572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
573 COSTS_N_INSNS (4), /* HI */
574 COSTS_N_INSNS (7), /* SI */
575 COSTS_N_INSNS (7), /* DI */
576 COSTS_N_INSNS (7)}, /* other */
577 0, /* cost of multiply per each bit set */
578 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
579 COSTS_N_INSNS (23), /* HI */
580 COSTS_N_INSNS (39), /* SI */
581 COSTS_N_INSNS (39), /* DI */
582 COSTS_N_INSNS (39)}, /* other */
583 COSTS_N_INSNS (1), /* cost of movsx */
584 COSTS_N_INSNS (1), /* cost of movzx */
585 8, /* "large" insn */
587 1, /* cost for loading QImode using movzbl */
588 {1, 1, 1}, /* cost of loading integer registers
589 in QImode, HImode and SImode.
590 Relative to reg-reg move (2). */
591 {1, 1, 1}, /* cost of storing integer registers */
592 1, /* cost of reg,reg fld/fst */
593 {1, 1, 1}, /* cost of loading fp registers
594 in SFmode, DFmode and XFmode */
595 {4, 6, 6}, /* cost of storing fp registers
596 in SFmode, DFmode and XFmode */
598 2, /* cost of moving MMX register */
599 {2, 2}, /* cost of loading MMX registers
600 in SImode and DImode */
601 {2, 2}, /* cost of storing MMX registers
602 in SImode and DImode */
603 2, /* cost of moving SSE register */
604 {2, 2, 8}, /* cost of loading SSE registers
605 in SImode, DImode and TImode */
606 {2, 2, 8}, /* cost of storing SSE registers
607 in SImode, DImode and TImode */
608 3, /* MMX or SSE register to integer */
609 64, /* size of l1 cache. */
610 128, /* size of l2 cache. */
611 32, /* size of prefetch block */
612 1, /* number of parallel prefetches */
614 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (1), /* cost of FABS instruction. */
618 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
622 1, /* scalar_stmt_cost. */
623 1, /* scalar load_cost. */
624 1, /* scalar_store_cost. */
625 1, /* vec_stmt_cost. */
626 1, /* vec_to_scalar_cost. */
627 1, /* scalar_to_vec_cost. */
628 1, /* vec_align_load_cost. */
629 2, /* vec_unalign_load_cost. */
630 1, /* vec_store_cost. */
631 3, /* cond_taken_branch_cost. */
632 1, /* cond_not_taken_branch_cost. */
635 static stringop_algs k6_memcpy[2] = {
636 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
637 DUMMY_STRINGOP_ALGS};
638 static stringop_algs k6_memset[2] = {
639 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
640 DUMMY_STRINGOP_ALGS};
642 struct processor_costs k6_cost = {
643 COSTS_N_INSNS (1), /* cost of an add instruction */
644 COSTS_N_INSNS (2), /* cost of a lea instruction */
645 COSTS_N_INSNS (1), /* variable shift costs */
646 COSTS_N_INSNS (1), /* constant shift costs */
647 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
648 COSTS_N_INSNS (3), /* HI */
649 COSTS_N_INSNS (3), /* SI */
650 COSTS_N_INSNS (3), /* DI */
651 COSTS_N_INSNS (3)}, /* other */
652 0, /* cost of multiply per each bit set */
653 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
654 COSTS_N_INSNS (18), /* HI */
655 COSTS_N_INSNS (18), /* SI */
656 COSTS_N_INSNS (18), /* DI */
657 COSTS_N_INSNS (18)}, /* other */
658 COSTS_N_INSNS (2), /* cost of movsx */
659 COSTS_N_INSNS (2), /* cost of movzx */
660 8, /* "large" insn */
662 3, /* cost for loading QImode using movzbl */
663 {4, 5, 4}, /* cost of loading integer registers
664 in QImode, HImode and SImode.
665 Relative to reg-reg move (2). */
666 {2, 3, 2}, /* cost of storing integer registers */
667 4, /* cost of reg,reg fld/fst */
668 {6, 6, 6}, /* cost of loading fp registers
669 in SFmode, DFmode and XFmode */
670 {4, 4, 4}, /* cost of storing fp registers
671 in SFmode, DFmode and XFmode */
672 2, /* cost of moving MMX register */
673 {2, 2}, /* cost of loading MMX registers
674 in SImode and DImode */
675 {2, 2}, /* cost of storing MMX registers
676 in SImode and DImode */
677 2, /* cost of moving SSE register */
678 {2, 2, 8}, /* cost of loading SSE registers
679 in SImode, DImode and TImode */
680 {2, 2, 8}, /* cost of storing SSE registers
681 in SImode, DImode and TImode */
682 6, /* MMX or SSE register to integer */
683 32, /* size of l1 cache. */
684 32, /* size of l2 cache. Some models
685 have integrated l2 cache, but
686 optimizing for k6 is not important
687 enough to worry about that. */
688 32, /* size of prefetch block */
689 1, /* number of parallel prefetches */
691 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
692 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
693 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
694 COSTS_N_INSNS (2), /* cost of FABS instruction. */
695 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
696 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
699 1, /* scalar_stmt_cost. */
700 1, /* scalar load_cost. */
701 1, /* scalar_store_cost. */
702 1, /* vec_stmt_cost. */
703 1, /* vec_to_scalar_cost. */
704 1, /* scalar_to_vec_cost. */
705 1, /* vec_align_load_cost. */
706 2, /* vec_unalign_load_cost. */
707 1, /* vec_store_cost. */
708 3, /* cond_taken_branch_cost. */
709 1, /* cond_not_taken_branch_cost. */
712 /* For some reason, Athlon deals better with REP prefix (relative to loops)
713 compared to K8. Alignment becomes important after 8 bytes for memcpy and
714 128 bytes for memset. */
715 static stringop_algs athlon_memcpy[2] = {
716 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
717 DUMMY_STRINGOP_ALGS};
718 static stringop_algs athlon_memset[2] = {
719 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
720 DUMMY_STRINGOP_ALGS};
722 struct processor_costs athlon_cost = {
723 COSTS_N_INSNS (1), /* cost of an add instruction */
724 COSTS_N_INSNS (2), /* cost of a lea instruction */
725 COSTS_N_INSNS (1), /* variable shift costs */
726 COSTS_N_INSNS (1), /* constant shift costs */
727 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
728 COSTS_N_INSNS (5), /* HI */
729 COSTS_N_INSNS (5), /* SI */
730 COSTS_N_INSNS (5), /* DI */
731 COSTS_N_INSNS (5)}, /* other */
732 0, /* cost of multiply per each bit set */
733 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
734 COSTS_N_INSNS (26), /* HI */
735 COSTS_N_INSNS (42), /* SI */
736 COSTS_N_INSNS (74), /* DI */
737 COSTS_N_INSNS (74)}, /* other */
738 COSTS_N_INSNS (1), /* cost of movsx */
739 COSTS_N_INSNS (1), /* cost of movzx */
740 8, /* "large" insn */
742 4, /* cost for loading QImode using movzbl */
743 {3, 4, 3}, /* cost of loading integer registers
744 in QImode, HImode and SImode.
745 Relative to reg-reg move (2). */
746 {3, 4, 3}, /* cost of storing integer registers */
747 4, /* cost of reg,reg fld/fst */
748 {4, 4, 12}, /* cost of loading fp registers
749 in SFmode, DFmode and XFmode */
750 {6, 6, 8}, /* cost of storing fp registers
751 in SFmode, DFmode and XFmode */
752 2, /* cost of moving MMX register */
753 {4, 4}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {4, 4, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 5}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 5, /* MMX or SSE register to integer */
763 64, /* size of l1 cache. */
764 256, /* size of l2 cache. */
765 64, /* size of prefetch block */
766 6, /* number of parallel prefetches */
768 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
769 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
770 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
771 COSTS_N_INSNS (2), /* cost of FABS instruction. */
772 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
773 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
776 1, /* scalar_stmt_cost. */
777 1, /* scalar load_cost. */
778 1, /* scalar_store_cost. */
779 1, /* vec_stmt_cost. */
780 1, /* vec_to_scalar_cost. */
781 1, /* scalar_to_vec_cost. */
782 1, /* vec_align_load_cost. */
783 2, /* vec_unalign_load_cost. */
784 1, /* vec_store_cost. */
785 3, /* cond_taken_branch_cost. */
786 1, /* cond_not_taken_branch_cost. */
789 /* K8 has optimized REP instruction for medium sized blocks, but for very
790 small blocks it is better to use loop. For large blocks, libcall can
791 do nontemporary accesses and beat inline considerably. */
792 static stringop_algs k8_memcpy[2] = {
793 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
794 {-1, rep_prefix_4_byte, false}}},
795 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
796 {-1, libcall, false}}}};
797 static stringop_algs k8_memset[2] = {
798 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
799 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
800 {libcall, {{48, unrolled_loop, false},
801 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
803 struct processor_costs k8_cost = {
804 COSTS_N_INSNS (1), /* cost of an add instruction */
805 COSTS_N_INSNS (2), /* cost of a lea instruction */
806 COSTS_N_INSNS (1), /* variable shift costs */
807 COSTS_N_INSNS (1), /* constant shift costs */
808 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
809 COSTS_N_INSNS (4), /* HI */
810 COSTS_N_INSNS (3), /* SI */
811 COSTS_N_INSNS (4), /* DI */
812 COSTS_N_INSNS (5)}, /* other */
813 0, /* cost of multiply per each bit set */
814 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
815 COSTS_N_INSNS (26), /* HI */
816 COSTS_N_INSNS (42), /* SI */
817 COSTS_N_INSNS (74), /* DI */
818 COSTS_N_INSNS (74)}, /* other */
819 COSTS_N_INSNS (1), /* cost of movsx */
820 COSTS_N_INSNS (1), /* cost of movzx */
821 8, /* "large" insn */
823 4, /* cost for loading QImode using movzbl */
824 {3, 4, 3}, /* cost of loading integer registers
825 in QImode, HImode and SImode.
826 Relative to reg-reg move (2). */
827 {3, 4, 3}, /* cost of storing integer registers */
828 4, /* cost of reg,reg fld/fst */
829 {4, 4, 12}, /* cost of loading fp registers
830 in SFmode, DFmode and XFmode */
831 {6, 6, 8}, /* cost of storing fp registers
832 in SFmode, DFmode and XFmode */
833 2, /* cost of moving MMX register */
834 {3, 3}, /* cost of loading MMX registers
835 in SImode and DImode */
836 {4, 4}, /* cost of storing MMX registers
837 in SImode and DImode */
838 2, /* cost of moving SSE register */
839 {4, 3, 6}, /* cost of loading SSE registers
840 in SImode, DImode and TImode */
841 {4, 4, 5}, /* cost of storing SSE registers
842 in SImode, DImode and TImode */
843 5, /* MMX or SSE register to integer */
844 64, /* size of l1 cache. */
845 512, /* size of l2 cache. */
846 64, /* size of prefetch block */
847 /* New AMD processors never drop prefetches; if they cannot be performed
848 immediately, they are queued. We set number of simultaneous prefetches
849 to a large constant to reflect this (it probably is not a good idea not
850 to limit number of prefetches at all, as their execution also takes some
852 100, /* number of parallel prefetches */
854 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
855 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
856 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
857 COSTS_N_INSNS (2), /* cost of FABS instruction. */
858 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
859 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
863 4, /* scalar_stmt_cost. */
864 2, /* scalar load_cost. */
865 2, /* scalar_store_cost. */
866 5, /* vec_stmt_cost. */
867 0, /* vec_to_scalar_cost. */
868 2, /* scalar_to_vec_cost. */
869 2, /* vec_align_load_cost. */
870 3, /* vec_unalign_load_cost. */
871 3, /* vec_store_cost. */
872 3, /* cond_taken_branch_cost. */
873 2, /* cond_not_taken_branch_cost. */
876 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
877 very small blocks it is better to use loop. For large blocks, libcall can
878 do nontemporary accesses and beat inline considerably. */
879 static stringop_algs amdfam10_memcpy[2] = {
880 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
881 {-1, rep_prefix_4_byte, false}}},
882 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
883 {-1, libcall, false}}}};
884 static stringop_algs amdfam10_memset[2] = {
885 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
886 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
887 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
888 {-1, libcall, false}}}};
889 struct processor_costs amdfam10_cost = {
890 COSTS_N_INSNS (1), /* cost of an add instruction */
891 COSTS_N_INSNS (2), /* cost of a lea instruction */
892 COSTS_N_INSNS (1), /* variable shift costs */
893 COSTS_N_INSNS (1), /* constant shift costs */
894 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
895 COSTS_N_INSNS (4), /* HI */
896 COSTS_N_INSNS (3), /* SI */
897 COSTS_N_INSNS (4), /* DI */
898 COSTS_N_INSNS (5)}, /* other */
899 0, /* cost of multiply per each bit set */
900 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
901 COSTS_N_INSNS (35), /* HI */
902 COSTS_N_INSNS (51), /* SI */
903 COSTS_N_INSNS (83), /* DI */
904 COSTS_N_INSNS (83)}, /* other */
905 COSTS_N_INSNS (1), /* cost of movsx */
906 COSTS_N_INSNS (1), /* cost of movzx */
907 8, /* "large" insn */
909 4, /* cost for loading QImode using movzbl */
910 {3, 4, 3}, /* cost of loading integer registers
911 in QImode, HImode and SImode.
912 Relative to reg-reg move (2). */
913 {3, 4, 3}, /* cost of storing integer registers */
914 4, /* cost of reg,reg fld/fst */
915 {4, 4, 12}, /* cost of loading fp registers
916 in SFmode, DFmode and XFmode */
917 {6, 6, 8}, /* cost of storing fp registers
918 in SFmode, DFmode and XFmode */
919 2, /* cost of moving MMX register */
920 {3, 3}, /* cost of loading MMX registers
921 in SImode and DImode */
922 {4, 4}, /* cost of storing MMX registers
923 in SImode and DImode */
924 2, /* cost of moving SSE register */
925 {4, 4, 3}, /* cost of loading SSE registers
926 in SImode, DImode and TImode */
927 {4, 4, 5}, /* cost of storing SSE registers
928 in SImode, DImode and TImode */
929 3, /* MMX or SSE register to integer */
931 MOVD reg64, xmmreg Double FSTORE 4
932 MOVD reg32, xmmreg Double FSTORE 4
934 MOVD reg64, xmmreg Double FADD 3
936 MOVD reg32, xmmreg Double FADD 3
938 64, /* size of l1 cache. */
939 512, /* size of l2 cache. */
940 64, /* size of prefetch block */
941 /* New AMD processors never drop prefetches; if they cannot be performed
942 immediately, they are queued. We set number of simultaneous prefetches
943 to a large constant to reflect this (it probably is not a good idea not
944 to limit number of prefetches at all, as their execution also takes some
946 100, /* number of parallel prefetches */
948 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
949 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
950 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
951 COSTS_N_INSNS (2), /* cost of FABS instruction. */
952 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
953 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
957 4, /* scalar_stmt_cost. */
958 2, /* scalar load_cost. */
959 2, /* scalar_store_cost. */
960 6, /* vec_stmt_cost. */
961 0, /* vec_to_scalar_cost. */
962 2, /* scalar_to_vec_cost. */
963 2, /* vec_align_load_cost. */
964 2, /* vec_unalign_load_cost. */
965 2, /* vec_store_cost. */
966 2, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
971 very small blocks it is better to use loop. For large blocks, libcall
972 can do nontemporary accesses and beat inline considerably. */
973 static stringop_algs bdver1_memcpy[2] = {
974 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
975 {-1, rep_prefix_4_byte, false}}},
976 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
977 {-1, libcall, false}}}};
978 static stringop_algs bdver1_memset[2] = {
979 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
980 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
981 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
982 {-1, libcall, false}}}};
984 const struct processor_costs bdver1_cost = {
985 COSTS_N_INSNS (1), /* cost of an add instruction */
986 COSTS_N_INSNS (1), /* cost of a lea instruction */
987 COSTS_N_INSNS (1), /* variable shift costs */
988 COSTS_N_INSNS (1), /* constant shift costs */
989 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
990 COSTS_N_INSNS (4), /* HI */
991 COSTS_N_INSNS (4), /* SI */
992 COSTS_N_INSNS (6), /* DI */
993 COSTS_N_INSNS (6)}, /* other */
994 0, /* cost of multiply per each bit set */
995 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
996 COSTS_N_INSNS (35), /* HI */
997 COSTS_N_INSNS (51), /* SI */
998 COSTS_N_INSNS (83), /* DI */
999 COSTS_N_INSNS (83)}, /* other */
1000 COSTS_N_INSNS (1), /* cost of movsx */
1001 COSTS_N_INSNS (1), /* cost of movzx */
1002 8, /* "large" insn */
1004 4, /* cost for loading QImode using movzbl */
1005 {5, 5, 4}, /* cost of loading integer registers
1006 in QImode, HImode and SImode.
1007 Relative to reg-reg move (2). */
1008 {4, 4, 4}, /* cost of storing integer registers */
1009 2, /* cost of reg,reg fld/fst */
1010 {5, 5, 12}, /* cost of loading fp registers
1011 in SFmode, DFmode and XFmode */
1012 {4, 4, 8}, /* cost of storing fp registers
1013 in SFmode, DFmode and XFmode */
1014 2, /* cost of moving MMX register */
1015 {4, 4}, /* cost of loading MMX registers
1016 in SImode and DImode */
1017 {4, 4}, /* cost of storing MMX registers
1018 in SImode and DImode */
1019 2, /* cost of moving SSE register */
1020 {4, 4, 4}, /* cost of loading SSE registers
1021 in SImode, DImode and TImode */
1022 {4, 4, 4}, /* cost of storing SSE registers
1023 in SImode, DImode and TImode */
1024 2, /* MMX or SSE register to integer */
1026 MOVD reg64, xmmreg Double FSTORE 4
1027 MOVD reg32, xmmreg Double FSTORE 4
1029 MOVD reg64, xmmreg Double FADD 3
1031 MOVD reg32, xmmreg Double FADD 3
1033 16, /* size of l1 cache. */
1034 2048, /* size of l2 cache. */
1035 64, /* size of prefetch block */
1036 /* New AMD processors never drop prefetches; if they cannot be performed
1037 immediately, they are queued. We set number of simultaneous prefetches
1038 to a large constant to reflect this (it probably is not a good idea not
1039 to limit number of prefetches at all, as their execution also takes some
1041 100, /* number of parallel prefetches */
1042 2, /* Branch cost */
1043 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1044 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1045 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1046 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1047 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1048 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1052 6, /* scalar_stmt_cost. */
1053 4, /* scalar load_cost. */
1054 4, /* scalar_store_cost. */
1055 6, /* vec_stmt_cost. */
1056 0, /* vec_to_scalar_cost. */
1057 2, /* scalar_to_vec_cost. */
1058 4, /* vec_align_load_cost. */
1059 4, /* vec_unalign_load_cost. */
1060 4, /* vec_store_cost. */
1061 4, /* cond_taken_branch_cost. */
1062 2, /* cond_not_taken_branch_cost. */
1065 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1066 very small blocks it is better to use loop. For large blocks, libcall
1067 can do nontemporary accesses and beat inline considerably. */
1069 static stringop_algs bdver2_memcpy[2] = {
1070 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1071 {-1, rep_prefix_4_byte, false}}},
1072 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1073 {-1, libcall, false}}}};
1074 static stringop_algs bdver2_memset[2] = {
1075 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1076 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1077 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1078 {-1, libcall, false}}}};
1080 const struct processor_costs bdver2_cost = {
1081 COSTS_N_INSNS (1), /* cost of an add instruction */
1082 COSTS_N_INSNS (1), /* cost of a lea instruction */
1083 COSTS_N_INSNS (1), /* variable shift costs */
1084 COSTS_N_INSNS (1), /* constant shift costs */
1085 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1086 COSTS_N_INSNS (4), /* HI */
1087 COSTS_N_INSNS (4), /* SI */
1088 COSTS_N_INSNS (6), /* DI */
1089 COSTS_N_INSNS (6)}, /* other */
1090 0, /* cost of multiply per each bit set */
1091 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1092 COSTS_N_INSNS (35), /* HI */
1093 COSTS_N_INSNS (51), /* SI */
1094 COSTS_N_INSNS (83), /* DI */
1095 COSTS_N_INSNS (83)}, /* other */
1096 COSTS_N_INSNS (1), /* cost of movsx */
1097 COSTS_N_INSNS (1), /* cost of movzx */
1098 8, /* "large" insn */
1100 4, /* cost for loading QImode using movzbl */
1101 {5, 5, 4}, /* cost of loading integer registers
1102 in QImode, HImode and SImode.
1103 Relative to reg-reg move (2). */
1104 {4, 4, 4}, /* cost of storing integer registers */
1105 2, /* cost of reg,reg fld/fst */
1106 {5, 5, 12}, /* cost of loading fp registers
1107 in SFmode, DFmode and XFmode */
1108 {4, 4, 8}, /* cost of storing fp registers
1109 in SFmode, DFmode and XFmode */
1110 2, /* cost of moving MMX register */
1111 {4, 4}, /* cost of loading MMX registers
1112 in SImode and DImode */
1113 {4, 4}, /* cost of storing MMX registers
1114 in SImode and DImode */
1115 2, /* cost of moving SSE register */
1116 {4, 4, 4}, /* cost of loading SSE registers
1117 in SImode, DImode and TImode */
1118 {4, 4, 4}, /* cost of storing SSE registers
1119 in SImode, DImode and TImode */
1120 2, /* MMX or SSE register to integer */
1122 MOVD reg64, xmmreg Double FSTORE 4
1123 MOVD reg32, xmmreg Double FSTORE 4
1125 MOVD reg64, xmmreg Double FADD 3
1127 MOVD reg32, xmmreg Double FADD 3
1129 16, /* size of l1 cache. */
1130 2048, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1137 100, /* number of parallel prefetches */
1138 2, /* Branch cost */
1139 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1148 6, /* scalar_stmt_cost. */
1149 4, /* scalar load_cost. */
1150 4, /* scalar_store_cost. */
1151 6, /* vec_stmt_cost. */
1152 0, /* vec_to_scalar_cost. */
1153 2, /* scalar_to_vec_cost. */
1154 4, /* vec_align_load_cost. */
1155 4, /* vec_unalign_load_cost. */
1156 4, /* vec_store_cost. */
1157 4, /* cond_taken_branch_cost. */
1158 2, /* cond_not_taken_branch_cost. */
1162 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1163 very small blocks it is better to use loop. For large blocks, libcall
1164 can do nontemporary accesses and beat inline considerably. */
1165 static stringop_algs bdver3_memcpy[2] = {
1166 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1167 {-1, rep_prefix_4_byte, false}}},
1168 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1169 {-1, libcall, false}}}};
1170 static stringop_algs bdver3_memset[2] = {
1171 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1172 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1173 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1174 {-1, libcall, false}}}};
1175 struct processor_costs bdver3_cost = {
1176 COSTS_N_INSNS (1), /* cost of an add instruction */
1177 COSTS_N_INSNS (1), /* cost of a lea instruction */
1178 COSTS_N_INSNS (1), /* variable shift costs */
1179 COSTS_N_INSNS (1), /* constant shift costs */
1180 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1181 COSTS_N_INSNS (4), /* HI */
1182 COSTS_N_INSNS (4), /* SI */
1183 COSTS_N_INSNS (6), /* DI */
1184 COSTS_N_INSNS (6)}, /* other */
1185 0, /* cost of multiply per each bit set */
1186 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1187 COSTS_N_INSNS (35), /* HI */
1188 COSTS_N_INSNS (51), /* SI */
1189 COSTS_N_INSNS (83), /* DI */
1190 COSTS_N_INSNS (83)}, /* other */
1191 COSTS_N_INSNS (1), /* cost of movsx */
1192 COSTS_N_INSNS (1), /* cost of movzx */
1193 8, /* "large" insn */
1195 4, /* cost for loading QImode using movzbl */
1196 {5, 5, 4}, /* cost of loading integer registers
1197 in QImode, HImode and SImode.
1198 Relative to reg-reg move (2). */
1199 {4, 4, 4}, /* cost of storing integer registers */
1200 2, /* cost of reg,reg fld/fst */
1201 {5, 5, 12}, /* cost of loading fp registers
1202 in SFmode, DFmode and XFmode */
1203 {4, 4, 8}, /* cost of storing fp registers
1204 in SFmode, DFmode and XFmode */
1205 2, /* cost of moving MMX register */
1206 {4, 4}, /* cost of loading MMX registers
1207 in SImode and DImode */
1208 {4, 4}, /* cost of storing MMX registers
1209 in SImode and DImode */
1210 2, /* cost of moving SSE register */
1211 {4, 4, 4}, /* cost of loading SSE registers
1212 in SImode, DImode and TImode */
1213 {4, 4, 4}, /* cost of storing SSE registers
1214 in SImode, DImode and TImode */
1215 2, /* MMX or SSE register to integer */
1216 16, /* size of l1 cache. */
1217 2048, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1235 6, /* scalar_stmt_cost. */
1236 4, /* scalar load_cost. */
1237 4, /* scalar_store_cost. */
1238 6, /* vec_stmt_cost. */
1239 0, /* vec_to_scalar_cost. */
1240 2, /* scalar_to_vec_cost. */
1241 4, /* vec_align_load_cost. */
1242 4, /* vec_unalign_load_cost. */
1243 4, /* vec_store_cost. */
1244 4, /* cond_taken_branch_cost. */
1245 2, /* cond_not_taken_branch_cost. */
1248 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1249 very small blocks it is better to use loop. For large blocks, libcall
1250 can do nontemporary accesses and beat inline considerably. */
1251 static stringop_algs bdver4_memcpy[2] = {
1252 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1253 {-1, rep_prefix_4_byte, false}}},
1254 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1255 {-1, libcall, false}}}};
1256 static stringop_algs bdver4_memset[2] = {
1257 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1258 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1259 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1260 {-1, libcall, false}}}};
1261 struct processor_costs bdver4_cost = {
1262 COSTS_N_INSNS (1), /* cost of an add instruction */
1263 COSTS_N_INSNS (1), /* cost of a lea instruction */
1264 COSTS_N_INSNS (1), /* variable shift costs */
1265 COSTS_N_INSNS (1), /* constant shift costs */
1266 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1267 COSTS_N_INSNS (4), /* HI */
1268 COSTS_N_INSNS (4), /* SI */
1269 COSTS_N_INSNS (6), /* DI */
1270 COSTS_N_INSNS (6)}, /* other */
1271 0, /* cost of multiply per each bit set */
1272 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1273 COSTS_N_INSNS (35), /* HI */
1274 COSTS_N_INSNS (51), /* SI */
1275 COSTS_N_INSNS (83), /* DI */
1276 COSTS_N_INSNS (83)}, /* other */
1277 COSTS_N_INSNS (1), /* cost of movsx */
1278 COSTS_N_INSNS (1), /* cost of movzx */
1279 8, /* "large" insn */
1281 4, /* cost for loading QImode using movzbl */
1282 {5, 5, 4}, /* cost of loading integer registers
1283 in QImode, HImode and SImode.
1284 Relative to reg-reg move (2). */
1285 {4, 4, 4}, /* cost of storing integer registers */
1286 2, /* cost of reg,reg fld/fst */
1287 {5, 5, 12}, /* cost of loading fp registers
1288 in SFmode, DFmode and XFmode */
1289 {4, 4, 8}, /* cost of storing fp registers
1290 in SFmode, DFmode and XFmode */
1291 2, /* cost of moving MMX register */
1292 {4, 4}, /* cost of loading MMX registers
1293 in SImode and DImode */
1294 {4, 4}, /* cost of storing MMX registers
1295 in SImode and DImode */
1296 2, /* cost of moving SSE register */
1297 {4, 4, 4}, /* cost of loading SSE registers
1298 in SImode, DImode and TImode */
1299 {4, 4, 4}, /* cost of storing SSE registers
1300 in SImode, DImode and TImode */
1301 2, /* MMX or SSE register to integer */
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 6, /* scalar_stmt_cost. */
1322 4, /* scalar load_cost. */
1323 4, /* scalar_store_cost. */
1324 6, /* vec_stmt_cost. */
1325 0, /* vec_to_scalar_cost. */
1326 2, /* scalar_to_vec_cost. */
1327 4, /* vec_align_load_cost. */
1328 4, /* vec_unalign_load_cost. */
1329 4, /* vec_store_cost. */
1330 4, /* cond_taken_branch_cost. */
1331 2, /* cond_not_taken_branch_cost. */
1335 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1336 very small blocks it is better to use loop. For large blocks, libcall
1337 can do nontemporary accesses and beat inline considerably. */
1338 static stringop_algs znver1_memcpy[2] = {
1339 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1340 {-1, rep_prefix_4_byte, false}}},
1341 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1342 {-1, libcall, false}}}};
1343 static stringop_algs znver1_memset[2] = {
1344 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1345 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1346 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1347 {-1, libcall, false}}}};
1348 struct processor_costs znver1_cost = {
1349 COSTS_N_INSNS (1), /* cost of an add instruction. */
1350 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1351 COSTS_N_INSNS (1), /* variable shift costs. */
1352 COSTS_N_INSNS (1), /* constant shift costs. */
1353 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1354 COSTS_N_INSNS (3), /* HI. */
1355 COSTS_N_INSNS (3), /* SI. */
1356 COSTS_N_INSNS (4), /* DI. */
1357 COSTS_N_INSNS (4)}, /* other. */
1358 0, /* cost of multiply per each bit
1360 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1361 COSTS_N_INSNS (35), /* HI. */
1362 COSTS_N_INSNS (51), /* SI. */
1363 COSTS_N_INSNS (83), /* DI. */
1364 COSTS_N_INSNS (83)}, /* other. */
1365 COSTS_N_INSNS (1), /* cost of movsx. */
1366 COSTS_N_INSNS (1), /* cost of movzx. */
1367 8, /* "large" insn. */
1368 9, /* MOVE_RATIO. */
1369 4, /* cost for loading QImode using
1371 {5, 5, 4}, /* cost of loading integer registers
1372 in QImode, HImode and SImode.
1373 Relative to reg-reg move (2). */
1374 {4, 4, 4}, /* cost of storing integer
1376 2, /* cost of reg,reg fld/fst. */
1377 {5, 5, 12}, /* cost of loading fp registers
1378 in SFmode, DFmode and XFmode. */
1379 {4, 4, 8}, /* cost of storing fp registers
1380 in SFmode, DFmode and XFmode. */
1381 2, /* cost of moving MMX register. */
1382 {4, 4}, /* cost of loading MMX registers
1383 in SImode and DImode. */
1384 {4, 4}, /* cost of storing MMX registers
1385 in SImode and DImode. */
1386 2, /* cost of moving SSE register. */
1387 {4, 4, 4}, /* cost of loading SSE registers
1388 in SImode, DImode and TImode. */
1389 {4, 4, 4}, /* cost of storing SSE registers
1390 in SImode, DImode and TImode. */
1391 2, /* MMX or SSE register to integer. */
1392 32, /* size of l1 cache. */
1393 512, /* size of l2 cache. */
1394 64, /* size of prefetch block. */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches. */
1401 2, /* Branch cost. */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1411 6, /* scalar_stmt_cost. */
1412 4, /* scalar load_cost. */
1413 4, /* scalar_store_cost. */
1414 6, /* vec_stmt_cost. */
1415 0, /* vec_to_scalar_cost. */
1416 2, /* scalar_to_vec_cost. */
1417 4, /* vec_align_load_cost. */
1418 4, /* vec_unalign_load_cost. */
1419 4, /* vec_store_cost. */
1420 4, /* cond_taken_branch_cost. */
1421 2, /* cond_not_taken_branch_cost. */
1424 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1425 very small blocks it is better to use loop. For large blocks, libcall can
1426 do nontemporary accesses and beat inline considerably. */
1427 static stringop_algs btver1_memcpy[2] = {
1428 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1429 {-1, rep_prefix_4_byte, false}}},
1430 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1431 {-1, libcall, false}}}};
1432 static stringop_algs btver1_memset[2] = {
1433 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1434 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1435 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1436 {-1, libcall, false}}}};
1437 const struct processor_costs btver1_cost = {
1438 COSTS_N_INSNS (1), /* cost of an add instruction */
1439 COSTS_N_INSNS (2), /* cost of a lea instruction */
1440 COSTS_N_INSNS (1), /* variable shift costs */
1441 COSTS_N_INSNS (1), /* constant shift costs */
1442 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1443 COSTS_N_INSNS (4), /* HI */
1444 COSTS_N_INSNS (3), /* SI */
1445 COSTS_N_INSNS (4), /* DI */
1446 COSTS_N_INSNS (5)}, /* other */
1447 0, /* cost of multiply per each bit set */
1448 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1449 COSTS_N_INSNS (35), /* HI */
1450 COSTS_N_INSNS (51), /* SI */
1451 COSTS_N_INSNS (83), /* DI */
1452 COSTS_N_INSNS (83)}, /* other */
1453 COSTS_N_INSNS (1), /* cost of movsx */
1454 COSTS_N_INSNS (1), /* cost of movzx */
1455 8, /* "large" insn */
1457 4, /* cost for loading QImode using movzbl */
1458 {3, 4, 3}, /* cost of loading integer registers
1459 in QImode, HImode and SImode.
1460 Relative to reg-reg move (2). */
1461 {3, 4, 3}, /* cost of storing integer registers */
1462 4, /* cost of reg,reg fld/fst */
1463 {4, 4, 12}, /* cost of loading fp registers
1464 in SFmode, DFmode and XFmode */
1465 {6, 6, 8}, /* cost of storing fp registers
1466 in SFmode, DFmode and XFmode */
1467 2, /* cost of moving MMX register */
1468 {3, 3}, /* cost of loading MMX registers
1469 in SImode and DImode */
1470 {4, 4}, /* cost of storing MMX registers
1471 in SImode and DImode */
1472 2, /* cost of moving SSE register */
1473 {4, 4, 3}, /* cost of loading SSE registers
1474 in SImode, DImode and TImode */
1475 {4, 4, 5}, /* cost of storing SSE registers
1476 in SImode, DImode and TImode */
1477 3, /* MMX or SSE register to integer */
1479 MOVD reg64, xmmreg Double FSTORE 4
1480 MOVD reg32, xmmreg Double FSTORE 4
1482 MOVD reg64, xmmreg Double FADD 3
1484 MOVD reg32, xmmreg Double FADD 3
1486 32, /* size of l1 cache. */
1487 512, /* size of l2 cache. */
1488 64, /* size of prefetch block */
1489 100, /* number of parallel prefetches */
1490 2, /* Branch cost */
1491 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1492 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1493 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1494 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1495 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1496 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1500 4, /* scalar_stmt_cost. */
1501 2, /* scalar load_cost. */
1502 2, /* scalar_store_cost. */
1503 6, /* vec_stmt_cost. */
1504 0, /* vec_to_scalar_cost. */
1505 2, /* scalar_to_vec_cost. */
1506 2, /* vec_align_load_cost. */
1507 2, /* vec_unalign_load_cost. */
1508 2, /* vec_store_cost. */
1509 2, /* cond_taken_branch_cost. */
1510 1, /* cond_not_taken_branch_cost. */
1513 static stringop_algs btver2_memcpy[2] = {
1514 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1515 {-1, rep_prefix_4_byte, false}}},
1516 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1517 {-1, libcall, false}}}};
1518 static stringop_algs btver2_memset[2] = {
1519 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1520 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1521 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1522 {-1, libcall, false}}}};
1523 const struct processor_costs btver2_cost = {
1524 COSTS_N_INSNS (1), /* cost of an add instruction */
1525 COSTS_N_INSNS (2), /* cost of a lea instruction */
1526 COSTS_N_INSNS (1), /* variable shift costs */
1527 COSTS_N_INSNS (1), /* constant shift costs */
1528 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1529 COSTS_N_INSNS (4), /* HI */
1530 COSTS_N_INSNS (3), /* SI */
1531 COSTS_N_INSNS (4), /* DI */
1532 COSTS_N_INSNS (5)}, /* other */
1533 0, /* cost of multiply per each bit set */
1534 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1535 COSTS_N_INSNS (35), /* HI */
1536 COSTS_N_INSNS (51), /* SI */
1537 COSTS_N_INSNS (83), /* DI */
1538 COSTS_N_INSNS (83)}, /* other */
1539 COSTS_N_INSNS (1), /* cost of movsx */
1540 COSTS_N_INSNS (1), /* cost of movzx */
1541 8, /* "large" insn */
1543 4, /* cost for loading QImode using movzbl */
1544 {3, 4, 3}, /* cost of loading integer registers
1545 in QImode, HImode and SImode.
1546 Relative to reg-reg move (2). */
1547 {3, 4, 3}, /* cost of storing integer registers */
1548 4, /* cost of reg,reg fld/fst */
1549 {4, 4, 12}, /* cost of loading fp registers
1550 in SFmode, DFmode and XFmode */
1551 {6, 6, 8}, /* cost of storing fp registers
1552 in SFmode, DFmode and XFmode */
1553 2, /* cost of moving MMX register */
1554 {3, 3}, /* cost of loading MMX registers
1555 in SImode and DImode */
1556 {4, 4}, /* cost of storing MMX registers
1557 in SImode and DImode */
1558 2, /* cost of moving SSE register */
1559 {4, 4, 3}, /* cost of loading SSE registers
1560 in SImode, DImode and TImode */
1561 {4, 4, 5}, /* cost of storing SSE registers
1562 in SImode, DImode and TImode */
1563 3, /* MMX or SSE register to integer */
1565 MOVD reg64, xmmreg Double FSTORE 4
1566 MOVD reg32, xmmreg Double FSTORE 4
1568 MOVD reg64, xmmreg Double FADD 3
1570 MOVD reg32, xmmreg Double FADD 3
1572 32, /* size of l1 cache. */
1573 2048, /* size of l2 cache. */
1574 64, /* size of prefetch block */
1575 100, /* number of parallel prefetches */
1576 2, /* Branch cost */
1577 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1578 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1579 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1580 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1581 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1582 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1585 4, /* scalar_stmt_cost. */
1586 2, /* scalar load_cost. */
1587 2, /* scalar_store_cost. */
1588 6, /* vec_stmt_cost. */
1589 0, /* vec_to_scalar_cost. */
1590 2, /* scalar_to_vec_cost. */
1591 2, /* vec_align_load_cost. */
1592 2, /* vec_unalign_load_cost. */
1593 2, /* vec_store_cost. */
1594 2, /* cond_taken_branch_cost. */
1595 1, /* cond_not_taken_branch_cost. */
1598 static stringop_algs pentium4_memcpy[2] = {
1599 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1600 DUMMY_STRINGOP_ALGS};
1601 static stringop_algs pentium4_memset[2] = {
1602 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1603 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1604 DUMMY_STRINGOP_ALGS};
1607 struct processor_costs pentium4_cost = {
1608 COSTS_N_INSNS (1), /* cost of an add instruction */
1609 COSTS_N_INSNS (3), /* cost of a lea instruction */
1610 COSTS_N_INSNS (4), /* variable shift costs */
1611 COSTS_N_INSNS (4), /* constant shift costs */
1612 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1613 COSTS_N_INSNS (15), /* HI */
1614 COSTS_N_INSNS (15), /* SI */
1615 COSTS_N_INSNS (15), /* DI */
1616 COSTS_N_INSNS (15)}, /* other */
1617 0, /* cost of multiply per each bit set */
1618 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1619 COSTS_N_INSNS (56), /* HI */
1620 COSTS_N_INSNS (56), /* SI */
1621 COSTS_N_INSNS (56), /* DI */
1622 COSTS_N_INSNS (56)}, /* other */
1623 COSTS_N_INSNS (1), /* cost of movsx */
1624 COSTS_N_INSNS (1), /* cost of movzx */
1625 16, /* "large" insn */
1627 2, /* cost for loading QImode using movzbl */
1628 {4, 5, 4}, /* cost of loading integer registers
1629 in QImode, HImode and SImode.
1630 Relative to reg-reg move (2). */
1631 {2, 3, 2}, /* cost of storing integer registers */
1632 2, /* cost of reg,reg fld/fst */
1633 {2, 2, 6}, /* cost of loading fp registers
1634 in SFmode, DFmode and XFmode */
1635 {4, 4, 6}, /* cost of storing fp registers
1636 in SFmode, DFmode and XFmode */
1637 2, /* cost of moving MMX register */
1638 {2, 2}, /* cost of loading MMX registers
1639 in SImode and DImode */
1640 {2, 2}, /* cost of storing MMX registers
1641 in SImode and DImode */
1642 12, /* cost of moving SSE register */
1643 {12, 12, 12}, /* cost of loading SSE registers
1644 in SImode, DImode and TImode */
1645 {2, 2, 8}, /* cost of storing SSE registers
1646 in SImode, DImode and TImode */
1647 10, /* MMX or SSE register to integer */
1648 8, /* size of l1 cache. */
1649 256, /* size of l2 cache. */
1650 64, /* size of prefetch block */
1651 6, /* number of parallel prefetches */
1652 2, /* Branch cost */
1653 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1654 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1655 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1656 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1657 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1658 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1661 1, /* scalar_stmt_cost. */
1662 1, /* scalar load_cost. */
1663 1, /* scalar_store_cost. */
1664 1, /* vec_stmt_cost. */
1665 1, /* vec_to_scalar_cost. */
1666 1, /* scalar_to_vec_cost. */
1667 1, /* vec_align_load_cost. */
1668 2, /* vec_unalign_load_cost. */
1669 1, /* vec_store_cost. */
1670 3, /* cond_taken_branch_cost. */
1671 1, /* cond_not_taken_branch_cost. */
1674 static stringop_algs nocona_memcpy[2] = {
1675 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1676 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1677 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1679 static stringop_algs nocona_memset[2] = {
1680 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1681 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1682 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1683 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 struct processor_costs nocona_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1), /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (10), /* HI */
1693 COSTS_N_INSNS (10), /* SI */
1694 COSTS_N_INSNS (10), /* DI */
1695 COSTS_N_INSNS (10)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (66), /* HI */
1699 COSTS_N_INSNS (66), /* SI */
1700 COSTS_N_INSNS (66), /* DI */
1701 COSTS_N_INSNS (66)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 16, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 3, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {4, 4, 4}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 6, /* cost of moving MMX register */
1717 {12, 12}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {12, 12}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 6, /* cost of moving SSE register */
1722 {12, 12, 12}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {12, 12, 12}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 8, /* MMX or SSE register to integer */
1727 8, /* size of l1 cache. */
1728 1024, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 8, /* number of parallel prefetches */
1731 1, /* Branch cost */
1732 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 1, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs atom_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs atom_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 struct processor_costs atom_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (4), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 1, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 static stringop_algs slm_memcpy[2] = {
1831 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1832 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1833 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1834 static stringop_algs slm_memset[2] = {
1835 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1836 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1837 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1838 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1840 struct processor_costs slm_cost = {
1841 COSTS_N_INSNS (1), /* cost of an add instruction */
1842 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1843 COSTS_N_INSNS (1), /* variable shift costs */
1844 COSTS_N_INSNS (1), /* constant shift costs */
1845 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1846 COSTS_N_INSNS (3), /* HI */
1847 COSTS_N_INSNS (3), /* SI */
1848 COSTS_N_INSNS (4), /* DI */
1849 COSTS_N_INSNS (2)}, /* other */
1850 0, /* cost of multiply per each bit set */
1851 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1852 COSTS_N_INSNS (26), /* HI */
1853 COSTS_N_INSNS (42), /* SI */
1854 COSTS_N_INSNS (74), /* DI */
1855 COSTS_N_INSNS (74)}, /* other */
1856 COSTS_N_INSNS (1), /* cost of movsx */
1857 COSTS_N_INSNS (1), /* cost of movzx */
1858 8, /* "large" insn */
1859 17, /* MOVE_RATIO */
1860 4, /* cost for loading QImode using movzbl */
1861 {4, 4, 4}, /* cost of loading integer registers
1862 in QImode, HImode and SImode.
1863 Relative to reg-reg move (2). */
1864 {4, 4, 4}, /* cost of storing integer registers */
1865 4, /* cost of reg,reg fld/fst */
1866 {12, 12, 12}, /* cost of loading fp registers
1867 in SFmode, DFmode and XFmode */
1868 {6, 6, 8}, /* cost of storing fp registers
1869 in SFmode, DFmode and XFmode */
1870 2, /* cost of moving MMX register */
1871 {8, 8}, /* cost of loading MMX registers
1872 in SImode and DImode */
1873 {8, 8}, /* cost of storing MMX registers
1874 in SImode and DImode */
1875 2, /* cost of moving SSE register */
1876 {8, 8, 8}, /* cost of loading SSE registers
1877 in SImode, DImode and TImode */
1878 {8, 8, 8}, /* cost of storing SSE registers
1879 in SImode, DImode and TImode */
1880 5, /* MMX or SSE register to integer */
1881 32, /* size of l1 cache. */
1882 256, /* size of l2 cache. */
1883 64, /* size of prefetch block */
1884 6, /* number of parallel prefetches */
1885 3, /* Branch cost */
1886 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1887 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1888 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1889 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1890 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1891 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1894 1, /* scalar_stmt_cost. */
1895 1, /* scalar load_cost. */
1896 1, /* scalar_store_cost. */
1897 1, /* vec_stmt_cost. */
1898 4, /* vec_to_scalar_cost. */
1899 1, /* scalar_to_vec_cost. */
1900 1, /* vec_align_load_cost. */
1901 2, /* vec_unalign_load_cost. */
1902 1, /* vec_store_cost. */
1903 3, /* cond_taken_branch_cost. */
1904 1, /* cond_not_taken_branch_cost. */
1907 static stringop_algs intel_memcpy[2] = {
1908 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1909 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1910 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1911 static stringop_algs intel_memset[2] = {
1912 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1914 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1915 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1917 struct processor_costs intel_cost = {
1918 COSTS_N_INSNS (1), /* cost of an add instruction */
1919 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1920 COSTS_N_INSNS (1), /* variable shift costs */
1921 COSTS_N_INSNS (1), /* constant shift costs */
1922 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1923 COSTS_N_INSNS (3), /* HI */
1924 COSTS_N_INSNS (3), /* SI */
1925 COSTS_N_INSNS (4), /* DI */
1926 COSTS_N_INSNS (2)}, /* other */
1927 0, /* cost of multiply per each bit set */
1928 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1929 COSTS_N_INSNS (26), /* HI */
1930 COSTS_N_INSNS (42), /* SI */
1931 COSTS_N_INSNS (74), /* DI */
1932 COSTS_N_INSNS (74)}, /* other */
1933 COSTS_N_INSNS (1), /* cost of movsx */
1934 COSTS_N_INSNS (1), /* cost of movzx */
1935 8, /* "large" insn */
1936 17, /* MOVE_RATIO */
1937 4, /* cost for loading QImode using movzbl */
1938 {4, 4, 4}, /* cost of loading integer registers
1939 in QImode, HImode and SImode.
1940 Relative to reg-reg move (2). */
1941 {4, 4, 4}, /* cost of storing integer registers */
1942 4, /* cost of reg,reg fld/fst */
1943 {12, 12, 12}, /* cost of loading fp registers
1944 in SFmode, DFmode and XFmode */
1945 {6, 6, 8}, /* cost of storing fp registers
1946 in SFmode, DFmode and XFmode */
1947 2, /* cost of moving MMX register */
1948 {8, 8}, /* cost of loading MMX registers
1949 in SImode and DImode */
1950 {8, 8}, /* cost of storing MMX registers
1951 in SImode and DImode */
1952 2, /* cost of moving SSE register */
1953 {8, 8, 8}, /* cost of loading SSE registers
1954 in SImode, DImode and TImode */
1955 {8, 8, 8}, /* cost of storing SSE registers
1956 in SImode, DImode and TImode */
1957 5, /* MMX or SSE register to integer */
1958 32, /* size of l1 cache. */
1959 256, /* size of l2 cache. */
1960 64, /* size of prefetch block */
1961 6, /* number of parallel prefetches */
1962 3, /* Branch cost */
1963 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1964 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1965 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1966 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1967 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1968 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1971 1, /* scalar_stmt_cost. */
1972 1, /* scalar load_cost. */
1973 1, /* scalar_store_cost. */
1974 1, /* vec_stmt_cost. */
1975 4, /* vec_to_scalar_cost. */
1976 1, /* scalar_to_vec_cost. */
1977 1, /* vec_align_load_cost. */
1978 2, /* vec_unalign_load_cost. */
1979 1, /* vec_store_cost. */
1980 3, /* cond_taken_branch_cost. */
1981 1, /* cond_not_taken_branch_cost. */
1984 /* Generic should produce code tuned for Core-i7 (and newer chips)
1985 and btver1 (and newer chips). */
1987 static stringop_algs generic_memcpy[2] = {
1988 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1989 {-1, libcall, false}}},
1990 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1991 {-1, libcall, false}}}};
1992 static stringop_algs generic_memset[2] = {
1993 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1994 {-1, libcall, false}}},
1995 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1996 {-1, libcall, false}}}};
1998 struct processor_costs generic_cost = {
1999 COSTS_N_INSNS (1), /* cost of an add instruction */
2000 /* On all chips taken into consideration lea is 2 cycles and more. With
2001 this cost however our current implementation of synth_mult results in
2002 use of unnecessary temporary registers causing regression on several
2003 SPECfp benchmarks. */
2004 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2005 COSTS_N_INSNS (1), /* variable shift costs */
2006 COSTS_N_INSNS (1), /* constant shift costs */
2007 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2008 COSTS_N_INSNS (4), /* HI */
2009 COSTS_N_INSNS (3), /* SI */
2010 COSTS_N_INSNS (4), /* DI */
2011 COSTS_N_INSNS (2)}, /* other */
2012 0, /* cost of multiply per each bit set */
2013 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2014 COSTS_N_INSNS (26), /* HI */
2015 COSTS_N_INSNS (42), /* SI */
2016 COSTS_N_INSNS (74), /* DI */
2017 COSTS_N_INSNS (74)}, /* other */
2018 COSTS_N_INSNS (1), /* cost of movsx */
2019 COSTS_N_INSNS (1), /* cost of movzx */
2020 8, /* "large" insn */
2021 17, /* MOVE_RATIO */
2022 4, /* cost for loading QImode using movzbl */
2023 {4, 4, 4}, /* cost of loading integer registers
2024 in QImode, HImode and SImode.
2025 Relative to reg-reg move (2). */
2026 {4, 4, 4}, /* cost of storing integer registers */
2027 4, /* cost of reg,reg fld/fst */
2028 {12, 12, 12}, /* cost of loading fp registers
2029 in SFmode, DFmode and XFmode */
2030 {6, 6, 8}, /* cost of storing fp registers
2031 in SFmode, DFmode and XFmode */
2032 2, /* cost of moving MMX register */
2033 {8, 8}, /* cost of loading MMX registers
2034 in SImode and DImode */
2035 {8, 8}, /* cost of storing MMX registers
2036 in SImode and DImode */
2037 2, /* cost of moving SSE register */
2038 {8, 8, 8}, /* cost of loading SSE registers
2039 in SImode, DImode and TImode */
2040 {8, 8, 8}, /* cost of storing SSE registers
2041 in SImode, DImode and TImode */
2042 5, /* MMX or SSE register to integer */
2043 32, /* size of l1 cache. */
2044 512, /* size of l2 cache. */
2045 64, /* size of prefetch block */
2046 6, /* number of parallel prefetches */
2047 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2048 value is increased to perhaps more appropriate value of 5. */
2049 3, /* Branch cost */
2050 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2051 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2052 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2053 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2054 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2055 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2058 1, /* scalar_stmt_cost. */
2059 1, /* scalar load_cost. */
2060 1, /* scalar_store_cost. */
2061 1, /* vec_stmt_cost. */
2062 1, /* vec_to_scalar_cost. */
2063 1, /* scalar_to_vec_cost. */
2064 1, /* vec_align_load_cost. */
2065 2, /* vec_unalign_load_cost. */
2066 1, /* vec_store_cost. */
2067 3, /* cond_taken_branch_cost. */
2068 1, /* cond_not_taken_branch_cost. */
2071 /* core_cost should produce code tuned for Core familly of CPUs. */
2072 static stringop_algs core_memcpy[2] = {
2073 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2074 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2075 {-1, libcall, false}}}};
2076 static stringop_algs core_memset[2] = {
2077 {libcall, {{6, loop_1_byte, true},
2079 {8192, rep_prefix_4_byte, true},
2080 {-1, libcall, false}}},
2081 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2082 {-1, libcall, false}}}};
2085 struct processor_costs core_cost = {
2086 COSTS_N_INSNS (1), /* cost of an add instruction */
2087 /* On all chips taken into consideration lea is 2 cycles and more. With
2088 this cost however our current implementation of synth_mult results in
2089 use of unnecessary temporary registers causing regression on several
2090 SPECfp benchmarks. */
2091 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2092 COSTS_N_INSNS (1), /* variable shift costs */
2093 COSTS_N_INSNS (1), /* constant shift costs */
2094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2095 COSTS_N_INSNS (4), /* HI */
2096 COSTS_N_INSNS (3), /* SI */
2097 COSTS_N_INSNS (4), /* DI */
2098 COSTS_N_INSNS (2)}, /* other */
2099 0, /* cost of multiply per each bit set */
2100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2101 COSTS_N_INSNS (26), /* HI */
2102 COSTS_N_INSNS (42), /* SI */
2103 COSTS_N_INSNS (74), /* DI */
2104 COSTS_N_INSNS (74)}, /* other */
2105 COSTS_N_INSNS (1), /* cost of movsx */
2106 COSTS_N_INSNS (1), /* cost of movzx */
2107 8, /* "large" insn */
2108 17, /* MOVE_RATIO */
2109 4, /* cost for loading QImode using movzbl */
2110 {4, 4, 4}, /* cost of loading integer registers
2111 in QImode, HImode and SImode.
2112 Relative to reg-reg move (2). */
2113 {4, 4, 4}, /* cost of storing integer registers */
2114 4, /* cost of reg,reg fld/fst */
2115 {12, 12, 12}, /* cost of loading fp registers
2116 in SFmode, DFmode and XFmode */
2117 {6, 6, 8}, /* cost of storing fp registers
2118 in SFmode, DFmode and XFmode */
2119 2, /* cost of moving MMX register */
2120 {8, 8}, /* cost of loading MMX registers
2121 in SImode and DImode */
2122 {8, 8}, /* cost of storing MMX registers
2123 in SImode and DImode */
2124 2, /* cost of moving SSE register */
2125 {8, 8, 8}, /* cost of loading SSE registers
2126 in SImode, DImode and TImode */
2127 {8, 8, 8}, /* cost of storing SSE registers
2128 in SImode, DImode and TImode */
2129 5, /* MMX or SSE register to integer */
2130 64, /* size of l1 cache. */
2131 512, /* size of l2 cache. */
2132 64, /* size of prefetch block */
2133 6, /* number of parallel prefetches */
2134 /* FIXME perhaps more appropriate value is 5. */
2135 3, /* Branch cost */
2136 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2137 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2138 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2139 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2140 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2141 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2144 1, /* scalar_stmt_cost. */
2145 1, /* scalar load_cost. */
2146 1, /* scalar_store_cost. */
2147 1, /* vec_stmt_cost. */
2148 1, /* vec_to_scalar_cost. */
2149 1, /* scalar_to_vec_cost. */
2150 1, /* vec_align_load_cost. */
2151 2, /* vec_unalign_load_cost. */
2152 1, /* vec_store_cost. */
2153 3, /* cond_taken_branch_cost. */
2154 1, /* cond_not_taken_branch_cost. */
2158 /* Set by -mtune. */
2159 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2161 /* Set by -mtune or -Os. */
2162 const struct processor_costs *ix86_cost = &pentium_cost;
2164 /* Processor feature/optimization bitmasks. */
2165 #define m_386 (1<<PROCESSOR_I386)
2166 #define m_486 (1<<PROCESSOR_I486)
2167 #define m_PENT (1<<PROCESSOR_PENTIUM)
2168 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2169 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2170 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2171 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2172 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2173 #define m_CORE2 (1<<PROCESSOR_CORE2)
2174 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2175 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2176 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2177 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2178 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2179 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2180 #define m_KNL (1<<PROCESSOR_KNL)
2181 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2182 #define m_INTEL (1<<PROCESSOR_INTEL)
2184 #define m_GEODE (1<<PROCESSOR_GEODE)
2185 #define m_K6 (1<<PROCESSOR_K6)
2186 #define m_K6_GEODE (m_K6 | m_GEODE)
2187 #define m_K8 (1<<PROCESSOR_K8)
2188 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2189 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2190 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2191 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2192 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2193 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2194 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2195 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2196 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2197 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2198 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2199 #define m_BTVER (m_BTVER1 | m_BTVER2)
2200 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2203 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2205 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2207 #define DEF_TUNE(tune, name, selector) name,
2208 #include "x86-tune.def"
2212 /* Feature tests against the various tunings. */
2213 unsigned char ix86_tune_features[X86_TUNE_LAST];
2215 /* Feature tests against the various tunings used to create ix86_tune_features
2216 based on the processor mask. */
2217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2219 #define DEF_TUNE(tune, name, selector) selector,
2220 #include "x86-tune.def"
2224 /* Feature tests against the various architecture variations. */
2225 unsigned char ix86_arch_features[X86_ARCH_LAST];
2227 /* Feature tests against the various architecture variations, used to create
2228 ix86_arch_features based on the processor mask. */
2229 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2230 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2231 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2246 /* In case the average insn count for single function invocation is
2247 lower than this constant, emit fast (but longer) prologue and
2249 #define FAST_PROLOGUE_INSN_COUNT 20
2251 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2252 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2253 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2254 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2256 /* Array of the smallest class containing reg number REGNO, indexed by
2257 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2259 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2261 /* ax, dx, cx, bx */
2262 AREG, DREG, CREG, BREG,
2263 /* si, di, bp, sp */
2264 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2266 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2267 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2270 /* flags, fpsr, fpcr, frame */
2271 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2273 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2276 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2281 /* SSE REX registers */
2282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2284 /* AVX-512 SSE registers */
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2289 /* Mask registers. */
2290 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2292 /* MPX bound registers */
2293 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2296 /* The "default" register map used in 32bit mode. */
2298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2308 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2309 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2310 101, 102, 103, 104, /* bound registers */
2313 /* The "default" register map used in 64bit mode. */
2315 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2317 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2318 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2319 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2320 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2321 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2322 8,9,10,11,12,13,14,15, /* extended integer registers */
2323 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2324 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2325 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2326 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2327 126, 127, 128, 129, /* bound registers */
2330 /* Define the register numbers to be used in Dwarf debugging information.
2331 The SVR4 reference port C compiler uses the following register numbers
2332 in its Dwarf output code:
2333 0 for %eax (gcc regno = 0)
2334 1 for %ecx (gcc regno = 2)
2335 2 for %edx (gcc regno = 1)
2336 3 for %ebx (gcc regno = 3)
2337 4 for %esp (gcc regno = 7)
2338 5 for %ebp (gcc regno = 6)
2339 6 for %esi (gcc regno = 4)
2340 7 for %edi (gcc regno = 5)
2341 The following three DWARF register numbers are never generated by
2342 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2343 believes these numbers have these meanings.
2344 8 for %eip (no gcc equivalent)
2345 9 for %eflags (gcc regno = 17)
2346 10 for %trapno (no gcc equivalent)
2347 It is not at all clear how we should number the FP stack registers
2348 for the x86 architecture. If the version of SDB on x86/svr4 were
2349 a bit less brain dead with respect to floating-point then we would
2350 have a precedent to follow with respect to DWARF register numbers
2351 for x86 FP registers, but the SDB on x86/svr4 is so completely
2352 broken with respect to FP registers that it is hardly worth thinking
2353 of it as something to strive for compatibility with.
2354 The version of x86/svr4 SDB I have at the moment does (partially)
2355 seem to believe that DWARF register number 11 is associated with
2356 the x86 register %st(0), but that's about all. Higher DWARF
2357 register numbers don't seem to be associated with anything in
2358 particular, and even for DWARF regno 11, SDB only seems to under-
2359 stand that it should say that a variable lives in %st(0) (when
2360 asked via an `=' command) if we said it was in DWARF regno 11,
2361 but SDB still prints garbage when asked for the value of the
2362 variable in question (via a `/' command).
2363 (Also note that the labels SDB prints for various FP stack regs
2364 when doing an `x' command are all wrong.)
2365 Note that these problems generally don't affect the native SVR4
2366 C compiler because it doesn't allow the use of -O with -g and
2367 because when it is *not* optimizing, it allocates a memory
2368 location for each floating-point variable, and the memory
2369 location is what gets described in the DWARF AT_location
2370 attribute for the variable in question.
2371 Regardless of the severe mental illness of the x86/svr4 SDB, we
2372 do something sensible here and we use the following DWARF
2373 register numbers. Note that these are all stack-top-relative
2375 11 for %st(0) (gcc regno = 8)
2376 12 for %st(1) (gcc regno = 9)
2377 13 for %st(2) (gcc regno = 10)
2378 14 for %st(3) (gcc regno = 11)
2379 15 for %st(4) (gcc regno = 12)
2380 16 for %st(5) (gcc regno = 13)
2381 17 for %st(6) (gcc regno = 14)
2382 18 for %st(7) (gcc regno = 15)
2384 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2386 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2387 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2388 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2389 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2390 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2394 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2395 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2396 101, 102, 103, 104, /* bound registers */
2399 /* Define parameter passing and return registers. */
2401 static int const x86_64_int_parameter_registers[6] =
2403 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2406 static int const x86_64_ms_abi_int_parameter_registers[4] =
2408 CX_REG, DX_REG, R8_REG, R9_REG
2411 static int const x86_64_int_return_registers[4] =
2413 AX_REG, DX_REG, DI_REG, SI_REG
2416 /* Additional registers that are clobbered by SYSV calls. */
2418 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2422 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2423 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2426 /* Define the structure for the machine field in struct function. */
2428 struct GTY(()) stack_local_entry {
2429 unsigned short mode;
2432 struct stack_local_entry *next;
2435 /* Structure describing stack frame layout.
2436 Stack grows downward:
2442 saved static chain if ix86_static_chain_on_stack
2444 saved frame pointer if frame_pointer_needed
2445 <- HARD_FRAME_POINTER
2451 <- sse_regs_save_offset
2454 [va_arg registers] |
2458 [padding2] | = to_allocate
2467 int outgoing_arguments_size;
2469 /* The offsets relative to ARG_POINTER. */
2470 HOST_WIDE_INT frame_pointer_offset;
2471 HOST_WIDE_INT hard_frame_pointer_offset;
2472 HOST_WIDE_INT stack_pointer_offset;
2473 HOST_WIDE_INT hfp_save_offset;
2474 HOST_WIDE_INT reg_save_offset;
2475 HOST_WIDE_INT sse_reg_save_offset;
2477 /* When save_regs_using_mov is set, emit prologue using
2478 move instead of push instructions. */
2479 bool save_regs_using_mov;
2482 /* Which cpu are we scheduling for. */
2483 enum attr_cpu ix86_schedule;
2485 /* Which cpu are we optimizing for. */
2486 enum processor_type ix86_tune;
2488 /* Which instruction set architecture to use. */
2489 enum processor_type ix86_arch;
2491 /* True if processor has SSE prefetch instruction. */
2492 unsigned char x86_prefetch_sse;
2494 /* -mstackrealign option */
2495 static const char ix86_force_align_arg_pointer_string[]
2496 = "force_align_arg_pointer";
2498 static rtx (*ix86_gen_leave) (void);
2499 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2501 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2502 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2503 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2505 static rtx (*ix86_gen_clzero) (rtx);
2506 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2507 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2508 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2511 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2513 /* Preferred alignment for stack boundary in bits. */
2514 unsigned int ix86_preferred_stack_boundary;
2516 /* Alignment for incoming stack boundary in bits specified at
2518 static unsigned int ix86_user_incoming_stack_boundary;
2520 /* Default alignment for incoming stack boundary in bits. */
2521 static unsigned int ix86_default_incoming_stack_boundary;
2523 /* Alignment for incoming stack boundary in bits. */
2524 unsigned int ix86_incoming_stack_boundary;
2526 /* Calling abi specific va_list type nodes. */
2527 static GTY(()) tree sysv_va_list_type_node;
2528 static GTY(()) tree ms_va_list_type_node;
2530 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2531 char internal_label_prefix[16];
2532 int internal_label_prefix_len;
2534 /* Fence to use after loop using movnt. */
2537 /* Register class used for passing given 64bit part of the argument.
2538 These represent classes as documented by the PS ABI, with the exception
2539 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2540 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2542 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2543 whenever possible (upper half does contain padding). */
2544 enum x86_64_reg_class
2547 X86_64_INTEGER_CLASS,
2548 X86_64_INTEGERSI_CLASS,
2555 X86_64_COMPLEX_X87_CLASS,
2559 #define MAX_CLASSES 8
2561 /* Table of constants used by fldpi, fldln2, etc.... */
2562 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2563 static bool ext_80387_constants_init = 0;
2566 static struct machine_function * ix86_init_machine_status (void);
2567 static rtx ix86_function_value (const_tree, const_tree, bool);
2568 static bool ix86_function_value_regno_p (const unsigned int);
2569 static unsigned int ix86_function_arg_boundary (machine_mode,
2571 static rtx ix86_static_chain (const_tree, bool);
2572 static int ix86_function_regparm (const_tree, const_tree);
2573 static void ix86_compute_frame_layout (struct ix86_frame *);
2574 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2576 static void ix86_add_new_builtins (HOST_WIDE_INT);
2577 static tree ix86_canonical_va_list_type (tree);
2578 static void predict_jump (int);
2579 static unsigned int split_stack_prologue_scratch_regno (void);
2580 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2582 enum ix86_function_specific_strings
2584 IX86_FUNCTION_SPECIFIC_ARCH,
2585 IX86_FUNCTION_SPECIFIC_TUNE,
2586 IX86_FUNCTION_SPECIFIC_MAX
2589 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2590 const char *, enum fpmath_unit, bool);
2591 static void ix86_function_specific_save (struct cl_target_option *,
2592 struct gcc_options *opts);
2593 static void ix86_function_specific_restore (struct gcc_options *opts,
2594 struct cl_target_option *);
2595 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2596 static void ix86_function_specific_print (FILE *, int,
2597 struct cl_target_option *);
2598 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2599 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2600 struct gcc_options *,
2601 struct gcc_options *,
2602 struct gcc_options *);
2603 static bool ix86_can_inline_p (tree, tree);
2604 static void ix86_set_current_function (tree);
2605 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2607 static enum calling_abi ix86_function_abi (const_tree);
2610 #ifndef SUBTARGET32_DEFAULT_CPU
2611 #define SUBTARGET32_DEFAULT_CPU "i386"
2614 /* Whether -mtune= or -march= were specified */
2615 static int ix86_tune_defaulted;
2616 static int ix86_arch_specified;
2618 /* Vectorization library interface and handlers. */
2619 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2621 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2622 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2624 /* Processor target table, indexed by processor number */
2627 const char *const name; /* processor name */
2628 const struct processor_costs *cost; /* Processor costs */
2629 const int align_loop; /* Default alignments. */
2630 const int align_loop_max_skip;
2631 const int align_jump;
2632 const int align_jump_max_skip;
2633 const int align_func;
2636 /* This table must be in sync with enum processor_type in i386.h. */
2637 static const struct ptt processor_target_table[PROCESSOR_max] =
2639 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2640 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2641 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2642 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2643 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2644 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2645 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2646 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2647 {"core2", &core_cost, 16, 10, 16, 10, 16},
2648 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2649 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2650 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2651 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2652 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2653 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2654 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2655 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2656 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2657 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2658 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2659 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2660 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2661 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2662 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2663 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2664 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2665 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2666 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
2667 {"znver1", &znver1_cost, 16, 10, 16, 7, 11}
2671 rest_of_handle_insert_vzeroupper (void)
2675 /* vzeroupper instructions are inserted immediately after reload to
2676 account for possible spills from 256bit registers. The pass
2677 reuses mode switching infrastructure by re-running mode insertion
2678 pass, so disable entities that have already been processed. */
2679 for (i = 0; i < MAX_386_ENTITIES; i++)
2680 ix86_optimize_mode_switching[i] = 0;
2682 ix86_optimize_mode_switching[AVX_U128] = 1;
2684 /* Call optimize_mode_switching. */
2685 g->get_passes ()->execute_pass_mode_switching ();
2689 /* Return 1 if INSN uses or defines a hard register.
2690 Hard register uses in a memory address are ignored.
2691 Clobbers and flags definitions are ignored. */
2694 has_non_address_hard_reg (rtx_insn *insn)
2697 FOR_EACH_INSN_DEF (ref, insn)
2698 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2699 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2700 && DF_REF_REGNO (ref) != FLAGS_REG)
2703 FOR_EACH_INSN_USE (ref, insn)
2704 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2710 /* Check if comparison INSN may be transformed
2711 into vector comparison. Currently we transform
2712 zero checks only which look like:
2714 (set (reg:CCZ 17 flags)
2715 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2716 (subreg:SI (reg:DI x) 0))
2717 (const_int 0 [0]))) */
2720 convertible_comparison_p (rtx_insn *insn)
2725 rtx def_set = single_set (insn);
2727 gcc_assert (def_set);
2729 rtx src = SET_SRC (def_set);
2730 rtx dst = SET_DEST (def_set);
2732 gcc_assert (GET_CODE (src) == COMPARE);
2734 if (GET_CODE (dst) != REG
2735 || REGNO (dst) != FLAGS_REG
2736 || GET_MODE (dst) != CCZmode)
2739 rtx op1 = XEXP (src, 0);
2740 rtx op2 = XEXP (src, 1);
2742 if (op2 != CONST0_RTX (GET_MODE (op2)))
2745 if (GET_CODE (op1) != IOR)
2748 op2 = XEXP (op1, 1);
2749 op1 = XEXP (op1, 0);
2753 || GET_MODE (op1) != SImode
2754 || GET_MODE (op2) != SImode
2755 || ((SUBREG_BYTE (op1) != 0
2756 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2757 && (SUBREG_BYTE (op2) != 0
2758 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2761 op1 = SUBREG_REG (op1);
2762 op2 = SUBREG_REG (op2);
2766 || GET_MODE (op1) != DImode)
2772 /* Return 1 if INSN may be converted into vector
2776 scalar_to_vector_candidate_p (rtx_insn *insn)
2778 rtx def_set = single_set (insn);
2783 if (has_non_address_hard_reg (insn))
2786 rtx src = SET_SRC (def_set);
2787 rtx dst = SET_DEST (def_set);
2789 if (GET_CODE (src) == COMPARE)
2790 return convertible_comparison_p (insn);
2792 /* We are interested in DImode promotion only. */
2793 if (GET_MODE (src) != DImode
2794 || GET_MODE (dst) != DImode)
2797 if (!REG_P (dst) && !MEM_P (dst))
2800 switch (GET_CODE (src))
2819 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))
2820 /* Check for andnot case. */
2821 && (GET_CODE (src) != AND
2822 || GET_CODE (XEXP (src, 0)) != NOT
2823 || !REG_P (XEXP (XEXP (src, 0), 0))))
2826 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2829 if (GET_MODE (XEXP (src, 0)) != DImode
2830 || GET_MODE (XEXP (src, 1)) != DImode)
2836 /* For a given bitmap of insn UIDs scans all instruction and
2837 remove insn from CANDIDATES in case it has both convertible
2838 and not convertible definitions.
2840 All insns in a bitmap are conversion candidates according to
2841 scalar_to_vector_candidate_p. Currently it implies all insns
2845 remove_non_convertible_regs (bitmap candidates)
2849 bitmap regs = BITMAP_ALLOC (NULL);
2851 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2853 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2854 rtx reg = SET_DEST (def_set);
2857 || bitmap_bit_p (regs, REGNO (reg))
2858 || HARD_REGISTER_P (reg))
2861 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2863 def = DF_REF_NEXT_REG (def))
2865 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2869 "r%d has non convertible definition in insn %d\n",
2870 REGNO (reg), DF_REF_INSN_UID (def));
2872 bitmap_set_bit (regs, REGNO (reg));
2878 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2880 for (df_ref def = DF_REG_DEF_CHAIN (id);
2882 def = DF_REF_NEXT_REG (def))
2883 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2886 fprintf (dump_file, "Removing insn %d from candidates list\n",
2887 DF_REF_INSN_UID (def));
2889 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2902 static unsigned max_id;
2904 /* ID of a chain. */
2905 unsigned int chain_id;
2906 /* A queue of instructions to be included into a chain. */
2908 /* Instructions included into a chain. */
2910 /* All registers defined by a chain. */
2912 /* Registers used in both vector and sclar modes. */
2915 void build (bitmap candidates, unsigned insn_uid);
2916 int compute_convert_gain ();
2920 void add_insn (bitmap candidates, unsigned insn_uid);
2921 void add_to_queue (unsigned insn_uid);
2922 void mark_dual_mode_def (df_ref def);
2923 void analyze_register_chain (bitmap candidates, df_ref ref);
2924 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2925 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2926 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2927 void convert_insn (rtx_insn *insn);
2928 void convert_op (rtx *op, rtx_insn *insn);
2929 void convert_reg (unsigned regno);
2930 void make_vector_copies (unsigned regno);
2933 unsigned scalar_chain::max_id = 0;
2935 /* Initialize new chain. */
2937 scalar_chain::scalar_chain ()
2939 chain_id = ++max_id;
2942 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2944 bitmap_obstack_initialize (NULL);
2945 insns = BITMAP_ALLOC (NULL);
2946 defs = BITMAP_ALLOC (NULL);
2947 defs_conv = BITMAP_ALLOC (NULL);
2951 /* Free chain's data. */
2953 scalar_chain::~scalar_chain ()
2955 BITMAP_FREE (insns);
2957 BITMAP_FREE (defs_conv);
2958 bitmap_obstack_release (NULL);
2961 /* Add instruction into chains' queue. */
2964 scalar_chain::add_to_queue (unsigned insn_uid)
2966 if (bitmap_bit_p (insns, insn_uid)
2967 || bitmap_bit_p (queue, insn_uid))
2971 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2972 insn_uid, chain_id);
2973 bitmap_set_bit (queue, insn_uid);
2976 /* Mark register defined by DEF as requiring conversion. */
2979 scalar_chain::mark_dual_mode_def (df_ref def)
2981 gcc_assert (DF_REF_REG_DEF_P (def));
2983 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2988 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2989 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2991 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2994 /* Check REF's chain to add new insns into a queue
2995 and find registers requiring conversion. */
2998 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
3002 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
3003 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
3004 add_to_queue (DF_REF_INSN_UID (ref));
3006 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3008 unsigned uid = DF_REF_INSN_UID (chain->ref);
3010 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3013 if (!DF_REF_REG_MEM_P (chain->ref))
3015 if (bitmap_bit_p (insns, uid))
3018 if (bitmap_bit_p (candidates, uid))
3025 if (DF_REF_REG_DEF_P (chain->ref))
3028 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3029 DF_REF_REGNO (chain->ref), uid);
3030 mark_dual_mode_def (chain->ref);
3035 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3036 DF_REF_REGNO (chain->ref), uid);
3037 mark_dual_mode_def (ref);
3042 /* Add instruction into a chain. */
3045 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3047 if (bitmap_bit_p (insns, insn_uid))
3051 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3053 bitmap_set_bit (insns, insn_uid);
3055 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3056 rtx def_set = single_set (insn);
3057 if (def_set && REG_P (SET_DEST (def_set))
3058 && !HARD_REGISTER_P (SET_DEST (def_set)))
3059 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3063 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3064 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3065 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3067 def = DF_REF_NEXT_REG (def))
3068 analyze_register_chain (candidates, def);
3069 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3070 if (!DF_REF_REG_MEM_P (ref))
3071 analyze_register_chain (candidates, ref);
3074 /* Build new chain starting from insn INSN_UID recursively
3075 adding all dependent uses and definitions. */
3078 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3080 queue = BITMAP_ALLOC (NULL);
3081 bitmap_set_bit (queue, insn_uid);
3084 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3086 while (!bitmap_empty_p (queue))
3088 insn_uid = bitmap_first_set_bit (queue);
3089 bitmap_clear_bit (queue, insn_uid);
3090 bitmap_clear_bit (candidates, insn_uid);
3091 add_insn (candidates, insn_uid);
3096 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3097 fprintf (dump_file, " insns: ");
3098 dump_bitmap (dump_file, insns);
3099 if (!bitmap_empty_p (defs_conv))
3103 const char *comma = "";
3104 fprintf (dump_file, " defs to convert: ");
3105 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3107 fprintf (dump_file, "%sr%d", comma, id);
3110 fprintf (dump_file, "\n");
3114 BITMAP_FREE (queue);
3117 /* Compute a gain for chain conversion. */
3120 scalar_chain::compute_convert_gain ()
3128 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3130 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3132 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3133 rtx def_set = single_set (insn);
3134 rtx src = SET_SRC (def_set);
3135 rtx dst = SET_DEST (def_set);
3137 if (REG_P (src) && REG_P (dst))
3138 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3139 else if (REG_P (src) && MEM_P (dst))
3140 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3141 else if (MEM_P (src) && REG_P (dst))
3142 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3143 else if (GET_CODE (src) == PLUS
3144 || GET_CODE (src) == MINUS
3145 || GET_CODE (src) == IOR
3146 || GET_CODE (src) == XOR
3147 || GET_CODE (src) == AND)
3148 gain += ix86_cost->add;
3149 else if (GET_CODE (src) == COMPARE)
3151 /* Assume comparison cost is the same. */
3158 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
3160 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3161 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3164 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
3169 fprintf (dump_file, " Total gain: %d\n", gain);
3174 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3177 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3180 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3182 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3184 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3187 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3188 else if (fmt[i] == 'E')
3189 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3190 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3197 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3200 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3202 replace_with_subreg (single_set (insn), reg, new_reg);
3205 /* Insert generated conversion instruction sequence INSNS
3206 after instruction AFTER. New BB may be required in case
3207 instruction has EH region attached. */
3210 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3212 if (!control_flow_insn_p (after))
3214 emit_insn_after (insns, after);
3218 basic_block bb = BLOCK_FOR_INSN (after);
3219 edge e = find_fallthru_edge (bb->succs);
3222 basic_block new_bb = split_edge (e);
3223 emit_insn_after (insns, BB_HEAD (new_bb));
3226 /* Make vector copies for all register REGNO definitions
3227 and replace its uses in a chain. */
3230 scalar_chain::make_vector_copies (unsigned regno)
3232 rtx reg = regno_reg_rtx[regno];
3233 rtx vreg = gen_reg_rtx (DImode);
3236 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3237 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3239 rtx_insn *insn = DF_REF_INSN (ref);
3244 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3245 CONST0_RTX (V4SImode),
3246 gen_rtx_SUBREG (SImode, reg, 0)));
3247 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3248 gen_rtx_SUBREG (V4SImode, vreg, 0),
3249 gen_rtx_SUBREG (SImode, reg, 4),
3252 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3254 rtx tmp = gen_reg_rtx (DImode);
3255 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3256 CONST0_RTX (V4SImode),
3257 gen_rtx_SUBREG (SImode, reg, 0)));
3258 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3259 CONST0_RTX (V4SImode),
3260 gen_rtx_SUBREG (SImode, reg, 4)));
3261 emit_insn (gen_vec_interleave_lowv4si
3262 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3263 gen_rtx_SUBREG (V4SImode, vreg, 0),
3264 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3268 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3269 emit_move_insn (adjust_address (tmp, SImode, 0),
3270 gen_rtx_SUBREG (SImode, reg, 0));
3271 emit_move_insn (adjust_address (tmp, SImode, 4),
3272 gen_rtx_SUBREG (SImode, reg, 4));
3273 emit_move_insn (vreg, tmp);
3275 rtx_insn *seq = get_insns ();
3277 emit_conversion_insns (seq, insn);
3281 " Copied r%d to a vector register r%d for insn %d\n",
3282 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3285 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3286 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3288 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3291 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3292 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3296 /* Convert all definitions of register REGNO
3297 and fix its uses. Scalar copies may be created
3298 in case register is used in not convertible insn. */
3301 scalar_chain::convert_reg (unsigned regno)
3303 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3304 rtx reg = regno_reg_rtx[regno];
3305 rtx scopy = NULL_RTX;
3309 conv = BITMAP_ALLOC (NULL);
3310 bitmap_copy (conv, insns);
3313 scopy = gen_reg_rtx (DImode);
3315 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3317 rtx_insn *insn = DF_REF_INSN (ref);
3318 rtx def_set = single_set (insn);
3319 rtx src = SET_SRC (def_set);
3320 rtx reg = DF_REF_REG (ref);
3324 replace_with_subreg_in_insn (insn, reg, reg);
3325 bitmap_clear_bit (conv, INSN_UID (insn));
3330 rtx vcopy = gen_reg_rtx (V2DImode);
3333 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3335 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3336 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3337 gen_rtx_SUBREG (SImode, vcopy, 0));
3338 emit_move_insn (vcopy,
3339 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3340 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3341 gen_rtx_SUBREG (SImode, vcopy, 0));
3345 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3346 emit_move_insn (tmp, reg);
3347 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3348 adjust_address (tmp, SImode, 0));
3349 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3350 adjust_address (tmp, SImode, 4));
3352 rtx_insn *seq = get_insns ();
3354 emit_conversion_insns (seq, insn);
3358 " Copied r%d to a scalar register r%d for insn %d\n",
3359 regno, REGNO (scopy), INSN_UID (insn));
3363 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3364 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3366 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3368 rtx def_set = single_set (DF_REF_INSN (ref));
3369 if (!MEM_P (SET_DEST (def_set))
3370 || !REG_P (SET_SRC (def_set)))
3371 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3372 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3375 /* Skip debug insns and uninitialized uses. */
3376 else if (DF_REF_CHAIN (ref)
3377 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3380 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3381 df_insn_rescan (DF_REF_INSN (ref));
3387 /* Convert operand OP in INSN. All register uses
3388 are converted during registers conversion.
3389 Therefore we should just handle memory operands. */
3392 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3394 *op = copy_rtx_if_shared (*op);
3396 if (GET_CODE (*op) == NOT)
3398 convert_op (&XEXP (*op, 0), insn);
3399 PUT_MODE (*op, V2DImode);
3401 else if (MEM_P (*op))
3403 rtx tmp = gen_reg_rtx (DImode);
3405 emit_insn_before (gen_move_insn (tmp, *op), insn);
3406 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3409 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3410 INSN_UID (insn), REGNO (tmp));
3414 gcc_assert (SUBREG_P (*op));
3415 gcc_assert (GET_MODE (*op) == V2DImode);
3419 /* Convert INSN to vector mode. */
3422 scalar_chain::convert_insn (rtx_insn *insn)
3424 rtx def_set = single_set (insn);
3425 rtx src = SET_SRC (def_set);
3426 rtx dst = SET_DEST (def_set);
3429 if (MEM_P (dst) && !REG_P (src))
3431 /* There are no scalar integer instructions and therefore
3432 temporary register usage is required. */
3433 rtx tmp = gen_reg_rtx (DImode);
3434 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3435 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3438 switch (GET_CODE (src))
3445 convert_op (&XEXP (src, 0), insn);
3446 convert_op (&XEXP (src, 1), insn);
3447 PUT_MODE (src, V2DImode);
3452 convert_op (&src, insn);
3459 gcc_assert (GET_MODE (src) == V2DImode);
3463 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3465 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3466 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3469 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3471 subreg = copy_rtx_if_shared (src);
3472 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3473 copy_rtx_if_shared (subreg),
3474 copy_rtx_if_shared (subreg)),
3476 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3477 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3478 copy_rtx_if_shared (src)),
3486 SET_SRC (def_set) = src;
3487 SET_DEST (def_set) = dst;
3489 /* Drop possible dead definitions. */
3490 PATTERN (insn) = def_set;
3492 INSN_CODE (insn) = -1;
3493 recog_memoized (insn);
3494 df_insn_rescan (insn);
3497 /* Convert whole chain creating required register
3498 conversions and copies. */
3501 scalar_chain::convert ()
3505 int converted_insns = 0;
3507 if (!dbg_cnt (stv_conversion))
3511 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3513 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3516 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3517 make_vector_copies (id);
3519 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3521 convert_insn (DF_INSN_UID_GET (id)->insn);
3525 return converted_insns;
3528 /* Main STV pass function. Find and convert scalar
3529 instructions into vector mode when profitable. */
3532 convert_scalars_to_vector ()
3536 int converted_insns = 0;
3538 bitmap_obstack_initialize (NULL);
3539 candidates = BITMAP_ALLOC (NULL);
3541 calculate_dominance_info (CDI_DOMINATORS);
3542 df_set_flags (DF_DEFER_INSN_RESCAN);
3543 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3544 df_md_add_problem ();
3547 /* Find all instructions we want to convert into vector mode. */
3549 fprintf (dump_file, "Searching for mode conversion candidates...\n");
3551 FOR_EACH_BB_FN (bb, cfun)
3554 FOR_BB_INSNS (bb, insn)
3555 if (scalar_to_vector_candidate_p (insn))
3558 fprintf (dump_file, " insn %d is marked as a candidate\n",
3561 bitmap_set_bit (candidates, INSN_UID (insn));
3565 remove_non_convertible_regs (candidates);
3567 if (bitmap_empty_p (candidates))
3569 fprintf (dump_file, "There are no candidates for optimization.\n");
3571 while (!bitmap_empty_p (candidates))
3573 unsigned uid = bitmap_first_set_bit (candidates);
3576 /* Find instructions chain we want to convert to vector mode.
3577 Check all uses and definitions to estimate all required
3579 chain.build (candidates, uid);
3581 if (chain.compute_convert_gain () > 0)
3582 converted_insns += chain.convert ();
3585 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3590 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3592 BITMAP_FREE (candidates);
3593 bitmap_obstack_release (NULL);
3594 df_process_deferred_rescans ();
3596 /* Conversion means we may have 128bit register spills/fills
3597 which require aligned stack. */
3598 if (converted_insns)
3600 if (crtl->stack_alignment_needed < 128)
3601 crtl->stack_alignment_needed = 128;
3602 if (crtl->stack_alignment_estimated < 128)
3603 crtl->stack_alignment_estimated = 128;
3611 const pass_data pass_data_insert_vzeroupper =
3613 RTL_PASS, /* type */
3614 "vzeroupper", /* name */
3615 OPTGROUP_NONE, /* optinfo_flags */
3616 TV_NONE, /* tv_id */
3617 0, /* properties_required */
3618 0, /* properties_provided */
3619 0, /* properties_destroyed */
3620 0, /* todo_flags_start */
3621 TODO_df_finish, /* todo_flags_finish */
3624 class pass_insert_vzeroupper : public rtl_opt_pass
3627 pass_insert_vzeroupper(gcc::context *ctxt)
3628 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3631 /* opt_pass methods: */
3632 virtual bool gate (function *)
3634 return TARGET_AVX && !TARGET_AVX512F
3635 && TARGET_VZEROUPPER && flag_expensive_optimizations
3639 virtual unsigned int execute (function *)
3641 return rest_of_handle_insert_vzeroupper ();
3644 }; // class pass_insert_vzeroupper
3646 const pass_data pass_data_stv =
3648 RTL_PASS, /* type */
3650 OPTGROUP_NONE, /* optinfo_flags */
3651 TV_NONE, /* tv_id */
3652 0, /* properties_required */
3653 0, /* properties_provided */
3654 0, /* properties_destroyed */
3655 0, /* todo_flags_start */
3656 TODO_df_finish, /* todo_flags_finish */
3659 class pass_stv : public rtl_opt_pass
3662 pass_stv (gcc::context *ctxt)
3663 : rtl_opt_pass (pass_data_stv, ctxt)
3666 /* opt_pass methods: */
3667 virtual bool gate (function *)
3669 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3672 virtual unsigned int execute (function *)
3674 return convert_scalars_to_vector ();
3677 }; // class pass_stv
3682 make_pass_insert_vzeroupper (gcc::context *ctxt)
3684 return new pass_insert_vzeroupper (ctxt);
3688 make_pass_stv (gcc::context *ctxt)
3690 return new pass_stv (ctxt);
3693 /* Return true if a red-zone is in use. */
3696 ix86_using_red_zone (void)
3698 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3701 /* Return a string that documents the current -m options. The caller is
3702 responsible for freeing the string. */
3705 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3706 const char *tune, enum fpmath_unit fpmath,
3709 struct ix86_target_opts
3711 const char *option; /* option string */
3712 HOST_WIDE_INT mask; /* isa mask options */
3715 /* This table is ordered so that options like -msse4.2 that imply
3716 preceding options while match those first. */
3717 static struct ix86_target_opts isa_opts[] =
3719 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3720 { "-mfma", OPTION_MASK_ISA_FMA },
3721 { "-mxop", OPTION_MASK_ISA_XOP },
3722 { "-mlwp", OPTION_MASK_ISA_LWP },
3723 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3724 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3725 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3726 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3727 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3728 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3729 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3730 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3731 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3732 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3733 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3734 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3735 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3736 { "-msse3", OPTION_MASK_ISA_SSE3 },
3737 { "-msse2", OPTION_MASK_ISA_SSE2 },
3738 { "-msse", OPTION_MASK_ISA_SSE },
3739 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3740 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3741 { "-mmmx", OPTION_MASK_ISA_MMX },
3742 { "-mabm", OPTION_MASK_ISA_ABM },
3743 { "-mbmi", OPTION_MASK_ISA_BMI },
3744 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3745 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3746 { "-mhle", OPTION_MASK_ISA_HLE },
3747 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3748 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3749 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3750 { "-madx", OPTION_MASK_ISA_ADX },
3751 { "-mtbm", OPTION_MASK_ISA_TBM },
3752 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3753 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3754 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3755 { "-maes", OPTION_MASK_ISA_AES },
3756 { "-msha", OPTION_MASK_ISA_SHA },
3757 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3758 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3759 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3760 { "-mf16c", OPTION_MASK_ISA_F16C },
3761 { "-mrtm", OPTION_MASK_ISA_RTM },
3762 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3763 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3764 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3765 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3766 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3767 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3768 { "-mmpx", OPTION_MASK_ISA_MPX },
3769 { "-mclwb", OPTION_MASK_ISA_CLWB },
3770 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3771 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3772 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3773 { "-mpku", OPTION_MASK_ISA_PKU },
3777 static struct ix86_target_opts flag_opts[] =
3779 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3780 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3781 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3782 { "-m80387", MASK_80387 },
3783 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3784 { "-malign-double", MASK_ALIGN_DOUBLE },
3785 { "-mcld", MASK_CLD },
3786 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3787 { "-mieee-fp", MASK_IEEE_FP },
3788 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3789 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3790 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3791 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3792 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3793 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3794 { "-mno-red-zone", MASK_NO_RED_ZONE },
3795 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3796 { "-mrecip", MASK_RECIP },
3797 { "-mrtd", MASK_RTD },
3798 { "-msseregparm", MASK_SSEREGPARM },
3799 { "-mstack-arg-probe", MASK_STACK_PROBE },
3800 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3801 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3802 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3803 { "-mvzeroupper", MASK_VZEROUPPER },
3804 { "-mstv", MASK_STV},
3805 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3806 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3807 { "-mprefer-avx128", MASK_PREFER_AVX128},
3810 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3813 char target_other[40];
3823 memset (opts, '\0', sizeof (opts));
3825 /* Add -march= option. */
3828 opts[num][0] = "-march=";
3829 opts[num++][1] = arch;
3832 /* Add -mtune= option. */
3835 opts[num][0] = "-mtune=";
3836 opts[num++][1] = tune;
3839 /* Add -m32/-m64/-mx32. */
3840 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3842 if ((isa & OPTION_MASK_ABI_64) != 0)
3846 isa &= ~ (OPTION_MASK_ISA_64BIT
3847 | OPTION_MASK_ABI_64
3848 | OPTION_MASK_ABI_X32);
3852 opts[num++][0] = abi;
3854 /* Pick out the options in isa options. */
3855 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3857 if ((isa & isa_opts[i].mask) != 0)
3859 opts[num++][0] = isa_opts[i].option;
3860 isa &= ~ isa_opts[i].mask;
3864 if (isa && add_nl_p)
3866 opts[num++][0] = isa_other;
3867 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3871 /* Add flag options. */
3872 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3874 if ((flags & flag_opts[i].mask) != 0)
3876 opts[num++][0] = flag_opts[i].option;
3877 flags &= ~ flag_opts[i].mask;
3881 if (flags && add_nl_p)
3883 opts[num++][0] = target_other;
3884 sprintf (target_other, "(other flags: %#x)", flags);
3887 /* Add -fpmath= option. */
3890 opts[num][0] = "-mfpmath=";
3891 switch ((int) fpmath)
3894 opts[num++][1] = "387";
3898 opts[num++][1] = "sse";
3901 case FPMATH_387 | FPMATH_SSE:
3902 opts[num++][1] = "sse+387";
3914 gcc_assert (num < ARRAY_SIZE (opts));
3916 /* Size the string. */
3918 sep_len = (add_nl_p) ? 3 : 1;
3919 for (i = 0; i < num; i++)
3922 for (j = 0; j < 2; j++)
3924 len += strlen (opts[i][j]);
3927 /* Build the string. */
3928 ret = ptr = (char *) xmalloc (len);
3931 for (i = 0; i < num; i++)
3935 for (j = 0; j < 2; j++)
3936 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3943 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3951 for (j = 0; j < 2; j++)
3954 memcpy (ptr, opts[i][j], len2[j]);
3956 line_len += len2[j];
3961 gcc_assert (ret + len >= ptr);
3966 /* Return true, if profiling code should be emitted before
3967 prologue. Otherwise it returns false.
3968 Note: For x86 with "hotfix" it is sorried. */
3970 ix86_profile_before_prologue (void)
3972 return flag_fentry != 0;
3975 /* Function that is callable from the debugger to print the current
3977 void ATTRIBUTE_UNUSED
3978 ix86_debug_options (void)
3980 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3981 ix86_arch_string, ix86_tune_string,
3986 fprintf (stderr, "%s\n\n", opts);
3990 fputs ("<no options>\n\n", stderr);
3995 /* Return true if T is one of the bytes we should avoid with
3999 ix86_rop_should_change_byte_p (int t)
4001 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
4004 static const char *stringop_alg_names[] = {
4006 #define DEF_ALG(alg, name) #name,
4007 #include "stringop.def"
4012 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
4013 The string is of the following form (or comma separated list of it):
4015 strategy_alg:max_size:[align|noalign]
4017 where the full size range for the strategy is either [0, max_size] or
4018 [min_size, max_size], in which min_size is the max_size + 1 of the
4019 preceding range. The last size range must have max_size == -1.
4024 -mmemcpy-strategy=libcall:-1:noalign
4026 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4030 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4032 This is to tell the compiler to use the following strategy for memset
4033 1) when the expected size is between [1, 16], use rep_8byte strategy;
4034 2) when the size is between [17, 2048], use vector_loop;
4035 3) when the size is > 2048, use libcall. */
4037 struct stringop_size_range
4045 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4047 const struct stringop_algs *default_algs;
4048 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4049 char *curr_range_str, *next_range_str;
4053 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4055 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4057 curr_range_str = strategy_str;
4064 next_range_str = strchr (curr_range_str, ',');
4066 *next_range_str++ = '\0';
4068 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4069 alg_name, &maxs, align))
4071 error ("wrong arg %s to option %s", curr_range_str,
4072 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4076 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4078 error ("size ranges of option %s should be increasing",
4079 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4083 for (i = 0; i < last_alg; i++)
4084 if (!strcmp (alg_name, stringop_alg_names[i]))
4089 error ("wrong stringop strategy name %s specified for option %s",
4091 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4095 if ((stringop_alg) i == rep_prefix_8_byte
4098 /* rep; movq isn't available in 32-bit code. */
4099 error ("stringop strategy name %s specified for option %s "
4100 "not supported for 32-bit code",
4102 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4106 input_ranges[n].max = maxs;
4107 input_ranges[n].alg = (stringop_alg) i;
4108 if (!strcmp (align, "align"))
4109 input_ranges[n].noalign = false;
4110 else if (!strcmp (align, "noalign"))
4111 input_ranges[n].noalign = true;
4114 error ("unknown alignment %s specified for option %s",
4115 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4119 curr_range_str = next_range_str;
4121 while (curr_range_str);
4123 if (input_ranges[n - 1].max != -1)
4125 error ("the max value for the last size range should be -1"
4127 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4131 if (n > MAX_STRINGOP_ALGS)
4133 error ("too many size ranges specified in option %s",
4134 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4138 /* Now override the default algs array. */
4139 for (i = 0; i < n; i++)
4141 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4142 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4143 = input_ranges[i].alg;
4144 *const_cast<int *>(&default_algs->size[i].noalign)
4145 = input_ranges[i].noalign;
4150 /* parse -mtune-ctrl= option. When DUMP is true,
4151 print the features that are explicitly set. */
4154 parse_mtune_ctrl_str (bool dump)
4156 if (!ix86_tune_ctrl_string)
4159 char *next_feature_string = NULL;
4160 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4161 char *orig = curr_feature_string;
4167 next_feature_string = strchr (curr_feature_string, ',');
4168 if (next_feature_string)
4169 *next_feature_string++ = '\0';
4170 if (*curr_feature_string == '^')
4172 curr_feature_string++;
4175 for (i = 0; i < X86_TUNE_LAST; i++)
4177 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4179 ix86_tune_features[i] = !clear;
4181 fprintf (stderr, "Explicitly %s feature %s\n",
4182 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4186 if (i == X86_TUNE_LAST)
4187 error ("Unknown parameter to option -mtune-ctrl: %s",
4188 clear ? curr_feature_string - 1 : curr_feature_string);
4189 curr_feature_string = next_feature_string;
4191 while (curr_feature_string);
4195 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4199 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4201 unsigned int ix86_tune_mask = 1u << ix86_tune;
4204 for (i = 0; i < X86_TUNE_LAST; ++i)
4206 if (ix86_tune_no_default)
4207 ix86_tune_features[i] = 0;
4209 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4214 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4215 for (i = 0; i < X86_TUNE_LAST; i++)
4216 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4217 ix86_tune_features[i] ? "on" : "off");
4220 parse_mtune_ctrl_str (dump);
4224 /* Default align_* from the processor table. */
4227 ix86_default_align (struct gcc_options *opts)
4229 if (opts->x_align_loops == 0)
4231 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4232 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4234 if (opts->x_align_jumps == 0)
4236 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4237 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4239 if (opts->x_align_functions == 0)
4241 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4245 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4248 ix86_override_options_after_change (void)
4250 ix86_default_align (&global_options);
4253 /* Override various settings based on options. If MAIN_ARGS_P, the
4254 options are from the command line, otherwise they are from
4258 ix86_option_override_internal (bool main_args_p,
4259 struct gcc_options *opts,
4260 struct gcc_options *opts_set)
4263 unsigned int ix86_arch_mask;
4264 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4269 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4270 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4271 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4272 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4273 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4274 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4275 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4276 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4277 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4278 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4279 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4280 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4281 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4282 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4283 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4284 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4285 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4286 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4287 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4288 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4289 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4290 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4291 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4292 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4293 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4294 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4295 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4296 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4297 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4298 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4299 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4300 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4301 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4302 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4303 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4304 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4305 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4306 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4307 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4308 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4309 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4310 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4311 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4312 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4313 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4314 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4315 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4316 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4317 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4318 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4319 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4320 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4321 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4322 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4323 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4324 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4325 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4326 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4327 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4328 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4329 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4332 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4333 | PTA_CX16 | PTA_FXSR)
4334 #define PTA_NEHALEM \
4335 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4336 #define PTA_WESTMERE \
4337 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4338 #define PTA_SANDYBRIDGE \
4339 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4340 #define PTA_IVYBRIDGE \
4341 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4342 #define PTA_HASWELL \
4343 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4344 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4345 #define PTA_BROADWELL \
4346 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4347 #define PTA_SKYLAKE \
4348 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4349 #define PTA_SKYLAKE_AVX512 \
4350 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4351 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4353 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4354 #define PTA_BONNELL \
4355 (PTA_CORE2 | PTA_MOVBE)
4356 #define PTA_SILVERMONT \
4357 (PTA_WESTMERE | PTA_MOVBE)
4359 /* if this reaches 64, need to widen struct pta flags below */
4363 const char *const name; /* processor name or nickname. */
4364 const enum processor_type processor;
4365 const enum attr_cpu schedule;
4366 const unsigned HOST_WIDE_INT flags;
4368 const processor_alias_table[] =
4370 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4371 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4372 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4373 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4374 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4375 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4376 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4377 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4378 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4379 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4380 PTA_MMX | PTA_SSE | PTA_FXSR},
4381 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4382 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4383 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4384 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4385 PTA_MMX | PTA_SSE | PTA_FXSR},
4386 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4387 PTA_MMX | PTA_SSE | PTA_FXSR},
4388 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4389 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4390 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4391 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4392 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4393 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4394 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4395 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4396 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4397 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4398 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4399 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4400 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4401 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4402 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4403 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4405 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4407 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4409 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4411 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4412 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4413 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4414 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4415 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4416 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4417 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4418 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4419 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4420 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4421 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4422 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4423 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4424 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4425 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4426 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4427 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4428 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4429 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4430 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4431 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4432 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4433 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4434 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4435 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4436 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4437 {"x86-64", PROCESSOR_K8, CPU_K8,
4438 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4439 {"k8", PROCESSOR_K8, CPU_K8,
4440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4441 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4442 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4444 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4445 {"opteron", PROCESSOR_K8, CPU_K8,
4446 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4447 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4448 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4449 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4450 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4451 {"athlon64", PROCESSOR_K8, CPU_K8,
4452 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4453 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4454 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4455 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4456 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4457 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4458 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4459 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4460 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4461 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4462 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4463 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4464 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4465 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4466 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4467 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4468 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4469 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4470 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4471 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4472 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4473 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4474 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4475 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4476 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4477 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4478 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4479 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4480 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4481 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4482 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4483 | PTA_XSAVEOPT | PTA_FSGSBASE},
4484 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4485 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4486 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4487 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4488 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4489 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4490 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4491 | PTA_MOVBE | PTA_MWAITX},
4492 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4493 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4494 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4495 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4496 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4497 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4498 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4499 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4500 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4501 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4502 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4503 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4504 | PTA_FXSR | PTA_XSAVE},
4505 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4506 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4507 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4508 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4509 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4510 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4512 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4514 | PTA_HLE /* flags are only used for -march switch. */ },
4517 /* -mrecip options. */
4520 const char *string; /* option name */
4521 unsigned int mask; /* mask bits to set */
4523 const recip_options[] =
4525 { "all", RECIP_MASK_ALL },
4526 { "none", RECIP_MASK_NONE },
4527 { "div", RECIP_MASK_DIV },
4528 { "sqrt", RECIP_MASK_SQRT },
4529 { "vec-div", RECIP_MASK_VEC_DIV },
4530 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4533 int const pta_size = ARRAY_SIZE (processor_alias_table);
4535 /* Set up prefix/suffix so the error messages refer to either the command
4536 line argument, or the attribute(target). */
4545 prefix = "option(\"";
4550 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4551 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4552 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4553 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4554 #ifdef TARGET_BI_ARCH
4557 #if TARGET_BI_ARCH == 1
4558 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4559 is on and OPTION_MASK_ABI_X32 is off. We turn off
4560 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4562 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4563 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4565 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4566 on and OPTION_MASK_ABI_64 is off. We turn off
4567 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4568 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4569 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4570 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4571 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4573 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4574 && TARGET_IAMCU_P (opts->x_target_flags))
4575 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4576 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4580 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4582 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4583 OPTION_MASK_ABI_64 for TARGET_X32. */
4584 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4585 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4587 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4588 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4589 | OPTION_MASK_ABI_X32
4590 | OPTION_MASK_ABI_64);
4591 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4593 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4594 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4596 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4599 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4600 SUBTARGET_OVERRIDE_OPTIONS;
4603 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4604 SUBSUBTARGET_OVERRIDE_OPTIONS;
4607 /* -fPIC is the default for x86_64. */
4608 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4609 opts->x_flag_pic = 2;
4611 /* Need to check -mtune=generic first. */
4612 if (opts->x_ix86_tune_string)
4614 /* As special support for cross compilers we read -mtune=native
4615 as -mtune=generic. With native compilers we won't see the
4616 -mtune=native, as it was changed by the driver. */
4617 if (!strcmp (opts->x_ix86_tune_string, "native"))
4619 opts->x_ix86_tune_string = "generic";
4621 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4622 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4623 "%stune=k8%s or %stune=generic%s instead as appropriate",
4624 prefix, suffix, prefix, suffix, prefix, suffix);
4628 if (opts->x_ix86_arch_string)
4629 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4630 if (!opts->x_ix86_tune_string)
4632 opts->x_ix86_tune_string
4633 = processor_target_table[TARGET_CPU_DEFAULT].name;
4634 ix86_tune_defaulted = 1;
4637 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4638 or defaulted. We need to use a sensible tune option. */
4639 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4641 opts->x_ix86_tune_string = "generic";
4645 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4646 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4648 /* rep; movq isn't available in 32-bit code. */
4649 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4650 opts->x_ix86_stringop_alg = no_stringop;
4653 if (!opts->x_ix86_arch_string)
4654 opts->x_ix86_arch_string
4655 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4656 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4658 ix86_arch_specified = 1;
4660 if (opts_set->x_ix86_pmode)
4662 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4663 && opts->x_ix86_pmode == PMODE_SI)
4664 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4665 && opts->x_ix86_pmode == PMODE_DI))
4666 error ("address mode %qs not supported in the %s bit mode",
4667 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4668 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4671 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4672 ? PMODE_DI : PMODE_SI;
4674 if (!opts_set->x_ix86_abi)
4675 opts->x_ix86_abi = DEFAULT_ABI;
4677 /* For targets using ms ABI enable ms-extensions, if not
4678 explicit turned off. For non-ms ABI we turn off this
4680 if (!opts_set->x_flag_ms_extensions)
4681 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4683 if (opts_set->x_ix86_cmodel)
4685 switch (opts->x_ix86_cmodel)
4689 if (opts->x_flag_pic)
4690 opts->x_ix86_cmodel = CM_SMALL_PIC;
4691 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4692 error ("code model %qs not supported in the %s bit mode",
4698 if (opts->x_flag_pic)
4699 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4700 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4701 error ("code model %qs not supported in the %s bit mode",
4703 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4704 error ("code model %qs not supported in x32 mode",
4710 if (opts->x_flag_pic)
4711 opts->x_ix86_cmodel = CM_LARGE_PIC;
4712 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4713 error ("code model %qs not supported in the %s bit mode",
4715 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4716 error ("code model %qs not supported in x32 mode",
4721 if (opts->x_flag_pic)
4722 error ("code model %s does not support PIC mode", "32");
4723 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4724 error ("code model %qs not supported in the %s bit mode",
4729 if (opts->x_flag_pic)
4731 error ("code model %s does not support PIC mode", "kernel");
4732 opts->x_ix86_cmodel = CM_32;
4734 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4735 error ("code model %qs not supported in the %s bit mode",
4745 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4746 use of rip-relative addressing. This eliminates fixups that
4747 would otherwise be needed if this object is to be placed in a
4748 DLL, and is essentially just as efficient as direct addressing. */
4749 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4750 && (TARGET_RDOS || TARGET_PECOFF))
4751 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4752 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4753 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4755 opts->x_ix86_cmodel = CM_32;
4757 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4759 error ("-masm=intel not supported in this configuration");
4760 opts->x_ix86_asm_dialect = ASM_ATT;
4762 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4763 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4764 sorry ("%i-bit mode not compiled in",
4765 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4767 for (i = 0; i < pta_size; i++)
4768 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4770 ix86_schedule = processor_alias_table[i].schedule;
4771 ix86_arch = processor_alias_table[i].processor;
4772 /* Default cpu tuning to the architecture. */
4773 ix86_tune = ix86_arch;
4775 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4776 && !(processor_alias_table[i].flags & PTA_64BIT))
4777 error ("CPU you selected does not support x86-64 "
4780 if (processor_alias_table[i].flags & PTA_MMX
4781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4783 if (processor_alias_table[i].flags & PTA_3DNOW
4784 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4786 if (processor_alias_table[i].flags & PTA_3DNOW_A
4787 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4788 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4789 if (processor_alias_table[i].flags & PTA_SSE
4790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4792 if (processor_alias_table[i].flags & PTA_SSE2
4793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4795 if (processor_alias_table[i].flags & PTA_SSE3
4796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4798 if (processor_alias_table[i].flags & PTA_SSSE3
4799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4801 if (processor_alias_table[i].flags & PTA_SSE4_1
4802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4804 if (processor_alias_table[i].flags & PTA_SSE4_2
4805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4807 if (processor_alias_table[i].flags & PTA_AVX
4808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4810 if (processor_alias_table[i].flags & PTA_AVX2
4811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4813 if (processor_alias_table[i].flags & PTA_FMA
4814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4816 if (processor_alias_table[i].flags & PTA_SSE4A
4817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4819 if (processor_alias_table[i].flags & PTA_FMA4
4820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4822 if (processor_alias_table[i].flags & PTA_XOP
4823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4825 if (processor_alias_table[i].flags & PTA_LWP
4826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4828 if (processor_alias_table[i].flags & PTA_ABM
4829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4831 if (processor_alias_table[i].flags & PTA_BMI
4832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4834 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4835 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4836 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4837 if (processor_alias_table[i].flags & PTA_TBM
4838 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4839 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4840 if (processor_alias_table[i].flags & PTA_BMI2
4841 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4842 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4843 if (processor_alias_table[i].flags & PTA_CX16
4844 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4845 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4846 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4847 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4848 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4849 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4850 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4851 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4853 if (processor_alias_table[i].flags & PTA_MOVBE
4854 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4855 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4856 if (processor_alias_table[i].flags & PTA_AES
4857 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4858 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4859 if (processor_alias_table[i].flags & PTA_SHA
4860 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4861 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4862 if (processor_alias_table[i].flags & PTA_PCLMUL
4863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4865 if (processor_alias_table[i].flags & PTA_FSGSBASE
4866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4868 if (processor_alias_table[i].flags & PTA_RDRND
4869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4871 if (processor_alias_table[i].flags & PTA_F16C
4872 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4874 if (processor_alias_table[i].flags & PTA_RTM
4875 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4876 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4877 if (processor_alias_table[i].flags & PTA_HLE
4878 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4879 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4880 if (processor_alias_table[i].flags & PTA_PRFCHW
4881 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4882 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4883 if (processor_alias_table[i].flags & PTA_RDSEED
4884 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4885 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4886 if (processor_alias_table[i].flags & PTA_ADX
4887 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4888 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4889 if (processor_alias_table[i].flags & PTA_FXSR
4890 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4891 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4892 if (processor_alias_table[i].flags & PTA_XSAVE
4893 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4894 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4895 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4896 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4898 if (processor_alias_table[i].flags & PTA_AVX512F
4899 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4900 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4901 if (processor_alias_table[i].flags & PTA_AVX512ER
4902 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4903 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4904 if (processor_alias_table[i].flags & PTA_AVX512PF
4905 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4906 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4907 if (processor_alias_table[i].flags & PTA_AVX512CD
4908 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4910 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4911 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4912 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4913 if (processor_alias_table[i].flags & PTA_PCOMMIT
4914 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4915 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4916 if (processor_alias_table[i].flags & PTA_CLWB
4917 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4918 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4919 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4920 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4921 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4922 if (processor_alias_table[i].flags & PTA_CLZERO
4923 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4924 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4925 if (processor_alias_table[i].flags & PTA_XSAVEC
4926 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4927 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4928 if (processor_alias_table[i].flags & PTA_XSAVES
4929 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4931 if (processor_alias_table[i].flags & PTA_AVX512DQ
4932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4934 if (processor_alias_table[i].flags & PTA_AVX512BW
4935 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4936 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4937 if (processor_alias_table[i].flags & PTA_AVX512VL
4938 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4939 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4940 if (processor_alias_table[i].flags & PTA_MPX
4941 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4942 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4943 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4944 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4945 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4946 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4947 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4948 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4949 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4950 x86_prefetch_sse = true;
4951 if (processor_alias_table[i].flags & PTA_MWAITX
4952 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4953 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4954 if (processor_alias_table[i].flags & PTA_PKU
4955 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4956 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4958 if (!(opts_set->x_target_flags & MASK_80387))
4960 if (processor_alias_table[i].flags & PTA_NO_80387)
4961 opts->x_target_flags &= ~MASK_80387;
4963 opts->x_target_flags |= MASK_80387;
4968 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4969 error ("Intel MPX does not support x32");
4971 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4972 error ("Intel MPX does not support x32");
4974 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4975 error ("generic CPU can be used only for %stune=%s %s",
4976 prefix, suffix, sw);
4977 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4978 error ("intel CPU can be used only for %stune=%s %s",
4979 prefix, suffix, sw);
4980 else if (i == pta_size)
4981 error ("bad value (%s) for %sarch=%s %s",
4982 opts->x_ix86_arch_string, prefix, suffix, sw);
4984 ix86_arch_mask = 1u << ix86_arch;
4985 for (i = 0; i < X86_ARCH_LAST; ++i)
4986 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4988 for (i = 0; i < pta_size; i++)
4989 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4991 ix86_schedule = processor_alias_table[i].schedule;
4992 ix86_tune = processor_alias_table[i].processor;
4993 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4995 if (!(processor_alias_table[i].flags & PTA_64BIT))
4997 if (ix86_tune_defaulted)
4999 opts->x_ix86_tune_string = "x86-64";
5000 for (i = 0; i < pta_size; i++)
5001 if (! strcmp (opts->x_ix86_tune_string,
5002 processor_alias_table[i].name))
5004 ix86_schedule = processor_alias_table[i].schedule;
5005 ix86_tune = processor_alias_table[i].processor;
5008 error ("CPU you selected does not support x86-64 "
5012 /* Intel CPUs have always interpreted SSE prefetch instructions as
5013 NOPs; so, we can enable SSE prefetch instructions even when
5014 -mtune (rather than -march) points us to a processor that has them.
5015 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5016 higher processors. */
5018 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5019 x86_prefetch_sse = true;
5023 if (ix86_tune_specified && i == pta_size)
5024 error ("bad value (%s) for %stune=%s %s",
5025 opts->x_ix86_tune_string, prefix, suffix, sw);
5027 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5029 #ifndef USE_IX86_FRAME_POINTER
5030 #define USE_IX86_FRAME_POINTER 0
5033 #ifndef USE_X86_64_FRAME_POINTER
5034 #define USE_X86_64_FRAME_POINTER 0
5037 /* Set the default values for switches whose default depends on TARGET_64BIT
5038 in case they weren't overwritten by command line options. */
5039 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5041 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5042 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5043 if (opts->x_flag_asynchronous_unwind_tables
5044 && !opts_set->x_flag_unwind_tables
5045 && TARGET_64BIT_MS_ABI)
5046 opts->x_flag_unwind_tables = 1;
5047 if (opts->x_flag_asynchronous_unwind_tables == 2)
5048 opts->x_flag_unwind_tables
5049 = opts->x_flag_asynchronous_unwind_tables = 1;
5050 if (opts->x_flag_pcc_struct_return == 2)
5051 opts->x_flag_pcc_struct_return = 0;
5055 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5056 opts->x_flag_omit_frame_pointer
5057 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5058 if (opts->x_flag_asynchronous_unwind_tables == 2)
5059 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5060 if (opts->x_flag_pcc_struct_return == 2)
5062 /* Intel MCU psABI specifies that -freg-struct-return should
5063 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5064 we check -miamcu so that -freg-struct-return is always
5065 turned on if -miamcu is used. */
5066 if (TARGET_IAMCU_P (opts->x_target_flags))
5067 opts->x_flag_pcc_struct_return = 0;
5069 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5073 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5074 /* TODO: ix86_cost should be chosen at instruction or function granuality
5075 so for cold code we use size_cost even in !optimize_size compilation. */
5076 if (opts->x_optimize_size)
5077 ix86_cost = &ix86_size_cost;
5079 ix86_cost = ix86_tune_cost;
5081 /* Arrange to set up i386_stack_locals for all functions. */
5082 init_machine_status = ix86_init_machine_status;
5084 /* Validate -mregparm= value. */
5085 if (opts_set->x_ix86_regparm)
5087 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5088 warning (0, "-mregparm is ignored in 64-bit mode");
5089 else if (TARGET_IAMCU_P (opts->x_target_flags))
5090 warning (0, "-mregparm is ignored for Intel MCU psABI");
5091 if (opts->x_ix86_regparm > REGPARM_MAX)
5093 error ("-mregparm=%d is not between 0 and %d",
5094 opts->x_ix86_regparm, REGPARM_MAX);
5095 opts->x_ix86_regparm = 0;
5098 if (TARGET_IAMCU_P (opts->x_target_flags)
5099 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5100 opts->x_ix86_regparm = REGPARM_MAX;
5102 /* Default align_* from the processor table. */
5103 ix86_default_align (opts);
5105 /* Provide default for -mbranch-cost= value. */
5106 if (!opts_set->x_ix86_branch_cost)
5107 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5109 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5111 opts->x_target_flags
5112 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5114 /* Enable by default the SSE and MMX builtins. Do allow the user to
5115 explicitly disable any of these. In particular, disabling SSE and
5116 MMX for kernel code is extremely useful. */
5117 if (!ix86_arch_specified)
5118 opts->x_ix86_isa_flags
5119 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5120 | TARGET_SUBTARGET64_ISA_DEFAULT)
5121 & ~opts->x_ix86_isa_flags_explicit);
5123 if (TARGET_RTD_P (opts->x_target_flags))
5124 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5128 opts->x_target_flags
5129 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5131 if (!ix86_arch_specified)
5132 opts->x_ix86_isa_flags
5133 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5135 /* i386 ABI does not specify red zone. It still makes sense to use it
5136 when programmer takes care to stack from being destroyed. */
5137 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5138 opts->x_target_flags |= MASK_NO_RED_ZONE;
5141 /* Keep nonleaf frame pointers. */
5142 if (opts->x_flag_omit_frame_pointer)
5143 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5144 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5145 opts->x_flag_omit_frame_pointer = 1;
5147 /* If we're doing fast math, we don't care about comparison order
5148 wrt NaNs. This lets us use a shorter comparison sequence. */
5149 if (opts->x_flag_finite_math_only)
5150 opts->x_target_flags &= ~MASK_IEEE_FP;
5152 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5153 since the insns won't need emulation. */
5154 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5155 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5157 /* Likewise, if the target doesn't have a 387, or we've specified
5158 software floating point, don't use 387 inline intrinsics. */
5159 if (!TARGET_80387_P (opts->x_target_flags))
5160 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5162 /* Turn on MMX builtins for -msse. */
5163 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5164 opts->x_ix86_isa_flags
5165 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5167 /* Enable SSE prefetch. */
5168 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5169 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5170 x86_prefetch_sse = true;
5172 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5173 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5174 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5175 opts->x_ix86_isa_flags
5176 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5178 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5179 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5180 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5181 opts->x_ix86_isa_flags
5182 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5184 /* Enable lzcnt instruction for -mabm. */
5185 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5186 opts->x_ix86_isa_flags
5187 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5189 /* Validate -mpreferred-stack-boundary= value or default it to
5190 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5191 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5192 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5194 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5195 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5196 int max = (TARGET_SEH ? 4 : 12);
5198 if (opts->x_ix86_preferred_stack_boundary_arg < min
5199 || opts->x_ix86_preferred_stack_boundary_arg > max)
5202 error ("-mpreferred-stack-boundary is not supported "
5205 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5206 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5209 ix86_preferred_stack_boundary
5210 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5213 /* Set the default value for -mstackrealign. */
5214 if (opts->x_ix86_force_align_arg_pointer == -1)
5215 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5217 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5219 /* Validate -mincoming-stack-boundary= value or default it to
5220 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5221 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5222 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5224 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5226 if (opts->x_ix86_incoming_stack_boundary_arg < min
5227 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5228 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5229 opts->x_ix86_incoming_stack_boundary_arg, min);
5232 ix86_user_incoming_stack_boundary
5233 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5234 ix86_incoming_stack_boundary
5235 = ix86_user_incoming_stack_boundary;
5239 #ifndef NO_PROFILE_COUNTERS
5240 if (flag_nop_mcount)
5241 error ("-mnop-mcount is not compatible with this target");
5243 if (flag_nop_mcount && flag_pic)
5244 error ("-mnop-mcount is not implemented for -fPIC");
5246 /* Accept -msseregparm only if at least SSE support is enabled. */
5247 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5248 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5249 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5251 if (opts_set->x_ix86_fpmath)
5253 if (opts->x_ix86_fpmath & FPMATH_SSE)
5255 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5257 if (TARGET_80387_P (opts->x_target_flags))
5259 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5260 opts->x_ix86_fpmath = FPMATH_387;
5263 else if ((opts->x_ix86_fpmath & FPMATH_387)
5264 && !TARGET_80387_P (opts->x_target_flags))
5266 warning (0, "387 instruction set disabled, using SSE arithmetics");
5267 opts->x_ix86_fpmath = FPMATH_SSE;
5271 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5272 fpmath=387. The second is however default at many targets since the
5273 extra 80bit precision of temporaries is considered to be part of ABI.
5274 Overwrite the default at least for -ffast-math.
5275 TODO: -mfpmath=both seems to produce same performing code with bit
5276 smaller binaries. It is however not clear if register allocation is
5277 ready for this setting.
5278 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5279 codegen. We may switch to 387 with -ffast-math for size optimized
5281 else if (fast_math_flags_set_p (&global_options)
5282 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5283 opts->x_ix86_fpmath = FPMATH_SSE;
5285 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5287 /* Use external vectorized library in vectorizing intrinsics. */
5288 if (opts_set->x_ix86_veclibabi_type)
5289 switch (opts->x_ix86_veclibabi_type)
5291 case ix86_veclibabi_type_svml:
5292 ix86_veclib_handler = ix86_veclibabi_svml;
5295 case ix86_veclibabi_type_acml:
5296 ix86_veclib_handler = ix86_veclibabi_acml;
5303 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5304 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5305 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5307 /* If stack probes are required, the space used for large function
5308 arguments on the stack must also be probed, so enable
5309 -maccumulate-outgoing-args so this happens in the prologue. */
5310 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5311 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5313 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5314 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5315 "for correctness", prefix, suffix);
5316 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5319 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5320 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5321 if (fixed_regs[BP_REG]
5322 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5324 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5325 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5327 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5330 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5333 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5334 p = strchr (internal_label_prefix, 'X');
5335 internal_label_prefix_len = p - internal_label_prefix;
5339 /* When scheduling description is not available, disable scheduler pass
5340 so it won't slow down the compilation and make x87 code slower. */
5341 if (!TARGET_SCHEDULE)
5342 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5344 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5345 ix86_tune_cost->simultaneous_prefetches,
5346 opts->x_param_values,
5347 opts_set->x_param_values);
5348 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5349 ix86_tune_cost->prefetch_block,
5350 opts->x_param_values,
5351 opts_set->x_param_values);
5352 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5353 ix86_tune_cost->l1_cache_size,
5354 opts->x_param_values,
5355 opts_set->x_param_values);
5356 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5357 ix86_tune_cost->l2_cache_size,
5358 opts->x_param_values,
5359 opts_set->x_param_values);
5361 /* Restrict number of if-converted SET insns to 1. */
5362 if (TARGET_ONE_IF_CONV_INSN)
5363 maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
5365 opts->x_param_values,
5366 opts_set->x_param_values);
5368 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5369 if (opts->x_flag_prefetch_loop_arrays < 0
5371 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5372 && !opts->x_optimize_size
5373 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5374 opts->x_flag_prefetch_loop_arrays = 1;
5376 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5377 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5378 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5379 targetm.expand_builtin_va_start = NULL;
5381 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5383 ix86_gen_leave = gen_leave_rex64;
5384 if (Pmode == DImode)
5386 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5387 ix86_gen_tls_local_dynamic_base_64
5388 = gen_tls_local_dynamic_base_64_di;
5392 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5393 ix86_gen_tls_local_dynamic_base_64
5394 = gen_tls_local_dynamic_base_64_si;
5398 ix86_gen_leave = gen_leave;
5400 if (Pmode == DImode)
5402 ix86_gen_add3 = gen_adddi3;
5403 ix86_gen_sub3 = gen_subdi3;
5404 ix86_gen_sub3_carry = gen_subdi3_carry;
5405 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5406 ix86_gen_andsp = gen_anddi3;
5407 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5408 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5409 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5410 ix86_gen_monitor = gen_sse3_monitor_di;
5411 ix86_gen_monitorx = gen_monitorx_di;
5412 ix86_gen_clzero = gen_clzero_di;
5416 ix86_gen_add3 = gen_addsi3;
5417 ix86_gen_sub3 = gen_subsi3;
5418 ix86_gen_sub3_carry = gen_subsi3_carry;
5419 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5420 ix86_gen_andsp = gen_andsi3;
5421 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5422 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5423 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5424 ix86_gen_monitor = gen_sse3_monitor_si;
5425 ix86_gen_monitorx = gen_monitorx_si;
5426 ix86_gen_clzero = gen_clzero_si;
5430 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5431 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5432 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5435 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5437 if (opts->x_flag_fentry > 0)
5438 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5440 opts->x_flag_fentry = 0;
5442 else if (TARGET_SEH)
5444 if (opts->x_flag_fentry == 0)
5445 sorry ("-mno-fentry isn%'t compatible with SEH");
5446 opts->x_flag_fentry = 1;
5448 else if (opts->x_flag_fentry < 0)
5450 #if defined(PROFILE_BEFORE_PROLOGUE)
5451 opts->x_flag_fentry = 1;
5453 opts->x_flag_fentry = 0;
5457 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5458 opts->x_target_flags |= MASK_VZEROUPPER;
5459 if (!(opts_set->x_target_flags & MASK_STV))
5460 opts->x_target_flags |= MASK_STV;
5461 /* Disable STV if -mpreferred-stack-boundary={2,3} or
5462 -mincoming-stack-boundary={2,3} - the needed
5463 stack realignment will be extra cost the pass doesn't take into
5464 account and the pass can't realign the stack. */
5465 if (ix86_preferred_stack_boundary < 128
5466 || ix86_incoming_stack_boundary < 128)
5467 opts->x_target_flags &= ~MASK_STV;
5468 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5469 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5470 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5471 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5472 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5473 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5474 /* Enable 128-bit AVX instruction generation
5475 for the auto-vectorizer. */
5476 if (TARGET_AVX128_OPTIMAL
5477 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5478 opts->x_target_flags |= MASK_PREFER_AVX128;
5480 if (opts->x_ix86_recip_name)
5482 char *p = ASTRDUP (opts->x_ix86_recip_name);
5484 unsigned int mask, i;
5487 while ((q = strtok (p, ",")) != NULL)
5498 if (!strcmp (q, "default"))
5499 mask = RECIP_MASK_ALL;
5502 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5503 if (!strcmp (q, recip_options[i].string))
5505 mask = recip_options[i].mask;
5509 if (i == ARRAY_SIZE (recip_options))
5511 error ("unknown option for -mrecip=%s", q);
5513 mask = RECIP_MASK_NONE;
5517 opts->x_recip_mask_explicit |= mask;
5519 opts->x_recip_mask &= ~mask;
5521 opts->x_recip_mask |= mask;
5525 if (TARGET_RECIP_P (opts->x_target_flags))
5526 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5527 else if (opts_set->x_target_flags & MASK_RECIP)
5528 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5530 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5531 for 64-bit Bionic. Also default long double to 64-bit for Intel
5533 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5534 && !(opts_set->x_target_flags
5535 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5536 opts->x_target_flags |= (TARGET_64BIT
5537 ? MASK_LONG_DOUBLE_128
5538 : MASK_LONG_DOUBLE_64);
5540 /* Only one of them can be active. */
5541 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5542 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5544 /* Save the initial options in case the user does function specific
5547 target_option_default_node = target_option_current_node
5548 = build_target_option_node (opts);
5550 /* Handle stack protector */
5551 if (!opts_set->x_ix86_stack_protector_guard)
5552 opts->x_ix86_stack_protector_guard
5553 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5555 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5556 if (opts->x_ix86_tune_memcpy_strategy)
5558 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5559 ix86_parse_stringop_strategy_string (str, false);
5563 if (opts->x_ix86_tune_memset_strategy)
5565 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5566 ix86_parse_stringop_strategy_string (str, true);
5571 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5574 ix86_option_override (void)
5576 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5577 struct register_pass_info insert_vzeroupper_info
5578 = { pass_insert_vzeroupper, "reload",
5579 1, PASS_POS_INSERT_AFTER
5581 opt_pass *pass_stv = make_pass_stv (g);
5582 struct register_pass_info stv_info
5583 = { pass_stv, "combine",
5584 1, PASS_POS_INSERT_AFTER
5587 ix86_option_override_internal (true, &global_options, &global_options_set);
5590 /* This needs to be done at start up. It's convenient to do it here. */
5591 register_pass (&insert_vzeroupper_info);
5592 register_pass (&stv_info);
5595 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5597 ix86_offload_options (void)
5600 return xstrdup ("-foffload-abi=lp64");
5601 return xstrdup ("-foffload-abi=ilp32");
5604 /* Update register usage after having seen the compiler flags. */
5607 ix86_conditional_register_usage (void)
5611 /* For 32-bit targets, squash the REX registers. */
5614 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5615 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5616 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5617 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5618 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5619 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5622 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5623 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5625 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5627 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5629 /* Set/reset conditionally defined registers from
5630 CALL_USED_REGISTERS initializer. */
5631 if (call_used_regs[i] > 1)
5632 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5634 /* Calculate registers of CLOBBERED_REGS register set
5635 as call used registers from GENERAL_REGS register set. */
5636 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5637 && call_used_regs[i])
5638 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5641 /* If MMX is disabled, squash the registers. */
5643 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5644 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5645 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5647 /* If SSE is disabled, squash the registers. */
5649 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5650 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5651 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5653 /* If the FPU is disabled, squash the registers. */
5654 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5655 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5656 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5657 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5659 /* If AVX512F is disabled, squash the registers. */
5660 if (! TARGET_AVX512F)
5662 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5663 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5665 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5666 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5669 /* If MPX is disabled, squash the registers. */
5671 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5672 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5676 /* Save the current options */
5679 ix86_function_specific_save (struct cl_target_option *ptr,
5680 struct gcc_options *opts)
5682 ptr->arch = ix86_arch;
5683 ptr->schedule = ix86_schedule;
5684 ptr->prefetch_sse = x86_prefetch_sse;
5685 ptr->tune = ix86_tune;
5686 ptr->branch_cost = ix86_branch_cost;
5687 ptr->tune_defaulted = ix86_tune_defaulted;
5688 ptr->arch_specified = ix86_arch_specified;
5689 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5690 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5691 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5692 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5693 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5694 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5695 ptr->x_ix86_abi = opts->x_ix86_abi;
5696 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5697 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5698 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5699 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5700 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5701 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5702 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5703 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5704 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5705 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5706 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5707 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5708 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5709 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5710 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5711 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5712 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5713 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5714 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5715 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5717 /* The fields are char but the variables are not; make sure the
5718 values fit in the fields. */
5719 gcc_assert (ptr->arch == ix86_arch);
5720 gcc_assert (ptr->schedule == ix86_schedule);
5721 gcc_assert (ptr->tune == ix86_tune);
5722 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5725 /* Restore the current options */
5728 ix86_function_specific_restore (struct gcc_options *opts,
5729 struct cl_target_option *ptr)
5731 enum processor_type old_tune = ix86_tune;
5732 enum processor_type old_arch = ix86_arch;
5733 unsigned int ix86_arch_mask;
5736 /* We don't change -fPIC. */
5737 opts->x_flag_pic = flag_pic;
5739 ix86_arch = (enum processor_type) ptr->arch;
5740 ix86_schedule = (enum attr_cpu) ptr->schedule;
5741 ix86_tune = (enum processor_type) ptr->tune;
5742 x86_prefetch_sse = ptr->prefetch_sse;
5743 opts->x_ix86_branch_cost = ptr->branch_cost;
5744 ix86_tune_defaulted = ptr->tune_defaulted;
5745 ix86_arch_specified = ptr->arch_specified;
5746 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5747 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5748 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5749 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5750 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5751 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5752 opts->x_ix86_abi = ptr->x_ix86_abi;
5753 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5754 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5755 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5756 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5757 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5758 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5759 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5760 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5761 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5762 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5763 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5764 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5765 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5766 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5767 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5768 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5769 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5770 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5771 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5772 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5773 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5774 /* TODO: ix86_cost should be chosen at instruction or function granuality
5775 so for cold code we use size_cost even in !optimize_size compilation. */
5776 if (opts->x_optimize_size)
5777 ix86_cost = &ix86_size_cost;
5779 ix86_cost = ix86_tune_cost;
5781 /* Recreate the arch feature tests if the arch changed */
5782 if (old_arch != ix86_arch)
5784 ix86_arch_mask = 1u << ix86_arch;
5785 for (i = 0; i < X86_ARCH_LAST; ++i)
5786 ix86_arch_features[i]
5787 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5790 /* Recreate the tune optimization tests */
5791 if (old_tune != ix86_tune)
5792 set_ix86_tune_features (ix86_tune, false);
5795 /* Adjust target options after streaming them in. This is mainly about
5796 reconciling them with global options. */
5799 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5801 /* flag_pic is a global option, but ix86_cmodel is target saved option
5802 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5803 for PIC, or error out. */
5805 switch (ptr->x_ix86_cmodel)
5808 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5812 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5816 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5820 error ("code model %s does not support PIC mode", "kernel");
5827 switch (ptr->x_ix86_cmodel)
5830 ptr->x_ix86_cmodel = CM_SMALL;
5834 ptr->x_ix86_cmodel = CM_MEDIUM;
5838 ptr->x_ix86_cmodel = CM_LARGE;
5846 /* Print the current options */
5849 ix86_function_specific_print (FILE *file, int indent,
5850 struct cl_target_option *ptr)
5853 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5854 NULL, NULL, ptr->x_ix86_fpmath, false);
5856 gcc_assert (ptr->arch < PROCESSOR_max);
5857 fprintf (file, "%*sarch = %d (%s)\n",
5859 ptr->arch, processor_target_table[ptr->arch].name);
5861 gcc_assert (ptr->tune < PROCESSOR_max);
5862 fprintf (file, "%*stune = %d (%s)\n",
5864 ptr->tune, processor_target_table[ptr->tune].name);
5866 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5870 fprintf (file, "%*s%s\n", indent, "", target_string);
5871 free (target_string);
5876 /* Inner function to process the attribute((target(...))), take an argument and
5877 set the current options from the argument. If we have a list, recursively go
5881 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5882 struct gcc_options *opts,
5883 struct gcc_options *opts_set,
5884 struct gcc_options *enum_opts_set)
5889 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5890 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5891 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5892 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5893 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5909 enum ix86_opt_type type;
5914 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5915 IX86_ATTR_ISA ("abm", OPT_mabm),
5916 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5917 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5918 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5919 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5920 IX86_ATTR_ISA ("aes", OPT_maes),
5921 IX86_ATTR_ISA ("sha", OPT_msha),
5922 IX86_ATTR_ISA ("avx", OPT_mavx),
5923 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5924 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5925 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5926 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5927 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5928 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5929 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5930 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5931 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5932 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5933 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5934 IX86_ATTR_ISA ("sse", OPT_msse),
5935 IX86_ATTR_ISA ("sse2", OPT_msse2),
5936 IX86_ATTR_ISA ("sse3", OPT_msse3),
5937 IX86_ATTR_ISA ("sse4", OPT_msse4),
5938 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5939 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5940 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5941 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5942 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5943 IX86_ATTR_ISA ("fma", OPT_mfma),
5944 IX86_ATTR_ISA ("xop", OPT_mxop),
5945 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5946 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5947 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5948 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5949 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5950 IX86_ATTR_ISA ("hle", OPT_mhle),
5951 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5952 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5953 IX86_ATTR_ISA ("adx", OPT_madx),
5954 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5955 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5956 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5957 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5958 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5959 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5960 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5961 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5962 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5963 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5964 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5965 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5966 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5967 IX86_ATTR_ISA ("pku", OPT_mpku),
5970 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5972 /* string options */
5973 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5974 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5977 IX86_ATTR_YES ("cld",
5981 IX86_ATTR_NO ("fancy-math-387",
5982 OPT_mfancy_math_387,
5983 MASK_NO_FANCY_MATH_387),
5985 IX86_ATTR_YES ("ieee-fp",
5989 IX86_ATTR_YES ("inline-all-stringops",
5990 OPT_minline_all_stringops,
5991 MASK_INLINE_ALL_STRINGOPS),
5993 IX86_ATTR_YES ("inline-stringops-dynamically",
5994 OPT_minline_stringops_dynamically,
5995 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5997 IX86_ATTR_NO ("align-stringops",
5998 OPT_mno_align_stringops,
5999 MASK_NO_ALIGN_STRINGOPS),
6001 IX86_ATTR_YES ("recip",
6007 /* If this is a list, recurse to get the options. */
6008 if (TREE_CODE (args) == TREE_LIST)
6012 for (; args; args = TREE_CHAIN (args))
6013 if (TREE_VALUE (args)
6014 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
6015 p_strings, opts, opts_set,
6022 else if (TREE_CODE (args) != STRING_CST)
6024 error ("attribute %<target%> argument not a string");
6028 /* Handle multiple arguments separated by commas. */
6029 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6031 while (next_optstr && *next_optstr != '\0')
6033 char *p = next_optstr;
6035 char *comma = strchr (next_optstr, ',');
6036 const char *opt_string;
6037 size_t len, opt_len;
6042 enum ix86_opt_type type = ix86_opt_unknown;
6048 len = comma - next_optstr;
6049 next_optstr = comma + 1;
6057 /* Recognize no-xxx. */
6058 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6067 /* Find the option. */
6070 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6072 type = attrs[i].type;
6073 opt_len = attrs[i].len;
6074 if (ch == attrs[i].string[0]
6075 && ((type != ix86_opt_str && type != ix86_opt_enum)
6078 && memcmp (p, attrs[i].string, opt_len) == 0)
6081 mask = attrs[i].mask;
6082 opt_string = attrs[i].string;
6087 /* Process the option. */
6090 error ("attribute(target(\"%s\")) is unknown", orig_p);
6094 else if (type == ix86_opt_isa)
6096 struct cl_decoded_option decoded;
6098 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6099 ix86_handle_option (opts, opts_set,
6100 &decoded, input_location);
6103 else if (type == ix86_opt_yes || type == ix86_opt_no)
6105 if (type == ix86_opt_no)
6106 opt_set_p = !opt_set_p;
6109 opts->x_target_flags |= mask;
6111 opts->x_target_flags &= ~mask;
6114 else if (type == ix86_opt_str)
6118 error ("option(\"%s\") was already specified", opt_string);
6122 p_strings[opt] = xstrdup (p + opt_len);
6125 else if (type == ix86_opt_enum)
6130 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6132 set_option (opts, enum_opts_set, opt, value,
6133 p + opt_len, DK_UNSPECIFIED, input_location,
6137 error ("attribute(target(\"%s\")) is unknown", orig_p);
6149 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6152 ix86_valid_target_attribute_tree (tree args,
6153 struct gcc_options *opts,
6154 struct gcc_options *opts_set)
6156 const char *orig_arch_string = opts->x_ix86_arch_string;
6157 const char *orig_tune_string = opts->x_ix86_tune_string;
6158 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6159 int orig_tune_defaulted = ix86_tune_defaulted;
6160 int orig_arch_specified = ix86_arch_specified;
6161 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6164 struct cl_target_option *def
6165 = TREE_TARGET_OPTION (target_option_default_node);
6166 struct gcc_options enum_opts_set;
6168 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6170 /* Process each of the options on the chain. */
6171 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6172 opts_set, &enum_opts_set))
6173 return error_mark_node;
6175 /* If the changed options are different from the default, rerun
6176 ix86_option_override_internal, and then save the options away.
6177 The string options are attribute options, and will be undone
6178 when we copy the save structure. */
6179 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6180 || opts->x_target_flags != def->x_target_flags
6181 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6182 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6183 || enum_opts_set.x_ix86_fpmath)
6185 /* If we are using the default tune= or arch=, undo the string assigned,
6186 and use the default. */
6187 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6189 opts->x_ix86_arch_string
6190 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6192 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6193 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6194 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6195 | OPTION_MASK_ABI_64
6196 | OPTION_MASK_ABI_X32
6197 | OPTION_MASK_CODE16);
6200 else if (!orig_arch_specified)
6201 opts->x_ix86_arch_string = NULL;
6203 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6204 opts->x_ix86_tune_string
6205 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6206 else if (orig_tune_defaulted)
6207 opts->x_ix86_tune_string = NULL;
6209 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6210 if (enum_opts_set.x_ix86_fpmath)
6211 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6212 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6213 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6215 if (TARGET_80387_P (opts->x_target_flags))
6216 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6219 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6220 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6223 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6224 ix86_option_override_internal (false, opts, opts_set);
6226 /* Add any builtin functions with the new isa if any. */
6227 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6229 /* Save the current options unless we are validating options for
6231 t = build_target_option_node (opts);
6233 opts->x_ix86_arch_string = orig_arch_string;
6234 opts->x_ix86_tune_string = orig_tune_string;
6235 opts_set->x_ix86_fpmath = orig_fpmath_set;
6237 /* Free up memory allocated to hold the strings */
6238 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6239 free (option_strings[i]);
6245 /* Hook to validate attribute((target("string"))). */
6248 ix86_valid_target_attribute_p (tree fndecl,
6249 tree ARG_UNUSED (name),
6251 int ARG_UNUSED (flags))
6253 struct gcc_options func_options;
6254 tree new_target, new_optimize;
6257 /* attribute((target("default"))) does nothing, beyond
6258 affecting multi-versioning. */
6259 if (TREE_VALUE (args)
6260 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6261 && TREE_CHAIN (args) == NULL_TREE
6262 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6265 tree old_optimize = build_optimization_node (&global_options);
6267 /* Get the optimization options of the current function. */
6268 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6271 func_optimize = old_optimize;
6273 /* Init func_options. */
6274 memset (&func_options, 0, sizeof (func_options));
6275 init_options_struct (&func_options, NULL);
6276 lang_hooks.init_options_struct (&func_options);
6278 cl_optimization_restore (&func_options,
6279 TREE_OPTIMIZATION (func_optimize));
6281 /* Initialize func_options to the default before its target options can
6283 cl_target_option_restore (&func_options,
6284 TREE_TARGET_OPTION (target_option_default_node));
6286 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6287 &global_options_set);
6289 new_optimize = build_optimization_node (&func_options);
6291 if (new_target == error_mark_node)
6294 else if (fndecl && new_target)
6296 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6298 if (old_optimize != new_optimize)
6299 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6302 finalize_options_struct (&func_options);
6308 /* Hook to determine if one function can safely inline another. */
6311 ix86_can_inline_p (tree caller, tree callee)
6314 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6315 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6317 /* If callee has no option attributes, then it is ok to inline. */
6321 /* If caller has no option attributes, but callee does then it is not ok to
6323 else if (!caller_tree)
6328 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6329 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6331 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6332 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6334 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6335 != callee_opts->x_ix86_isa_flags)
6338 /* See if we have the same non-isa options. */
6339 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6342 /* See if arch, tune, etc. are the same. */
6343 else if (caller_opts->arch != callee_opts->arch)
6346 else if (caller_opts->tune != callee_opts->tune)
6349 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6352 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6363 /* Remember the last target of ix86_set_current_function. */
6364 static GTY(()) tree ix86_previous_fndecl;
6366 /* Set targets globals to the default (or current #pragma GCC target
6367 if active). Invalidate ix86_previous_fndecl cache. */
6370 ix86_reset_previous_fndecl (void)
6372 tree new_tree = target_option_current_node;
6373 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6374 if (TREE_TARGET_GLOBALS (new_tree))
6375 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6376 else if (new_tree == target_option_default_node)
6377 restore_target_globals (&default_target_globals);
6379 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6380 ix86_previous_fndecl = NULL_TREE;
6383 /* Establish appropriate back-end context for processing the function
6384 FNDECL. The argument might be NULL to indicate processing at top
6385 level, outside of any function scope. */
6387 ix86_set_current_function (tree fndecl)
6389 /* Only change the context if the function changes. This hook is called
6390 several times in the course of compiling a function, and we don't want to
6391 slow things down too much or call target_reinit when it isn't safe. */
6392 if (fndecl == ix86_previous_fndecl)
6396 if (ix86_previous_fndecl == NULL_TREE)
6397 old_tree = target_option_current_node;
6398 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6399 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6401 old_tree = target_option_default_node;
6403 if (fndecl == NULL_TREE)
6405 if (old_tree != target_option_current_node)
6406 ix86_reset_previous_fndecl ();
6410 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6411 if (new_tree == NULL_TREE)
6412 new_tree = target_option_default_node;
6414 if (old_tree != new_tree)
6416 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6417 if (TREE_TARGET_GLOBALS (new_tree))
6418 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6419 else if (new_tree == target_option_default_node)
6420 restore_target_globals (&default_target_globals);
6422 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6424 ix86_previous_fndecl = fndecl;
6426 /* 64-bit MS and SYSV ABI have different set of call used registers.
6427 Avoid expensive re-initialization of init_regs each time we switch
6428 function context. */
6430 && (call_used_regs[SI_REG]
6431 == (cfun->machine->call_abi == MS_ABI)))
6436 /* Return true if this goes in large data/bss. */
6439 ix86_in_large_data_p (tree exp)
6441 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6444 /* Functions are never large data. */
6445 if (TREE_CODE (exp) == FUNCTION_DECL)
6448 /* Automatic variables are never large data. */
6449 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6452 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6454 const char *section = DECL_SECTION_NAME (exp);
6455 if (strcmp (section, ".ldata") == 0
6456 || strcmp (section, ".lbss") == 0)
6462 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6464 /* If this is an incomplete type with size 0, then we can't put it
6465 in data because it might be too big when completed. Also,
6466 int_size_in_bytes returns -1 if size can vary or is larger than
6467 an integer in which case also it is safer to assume that it goes in
6469 if (size <= 0 || size > ix86_section_threshold)
6476 /* Switch to the appropriate section for output of DECL.
6477 DECL is either a `VAR_DECL' node or a constant of some sort.
6478 RELOC indicates whether forming the initial value of DECL requires
6479 link-time relocations. */
6481 ATTRIBUTE_UNUSED static section *
6482 x86_64_elf_select_section (tree decl, int reloc,
6483 unsigned HOST_WIDE_INT align)
6485 if (ix86_in_large_data_p (decl))
6487 const char *sname = NULL;
6488 unsigned int flags = SECTION_WRITE;
6489 switch (categorize_decl_for_section (decl, reloc))
6494 case SECCAT_DATA_REL:
6495 sname = ".ldata.rel";
6497 case SECCAT_DATA_REL_LOCAL:
6498 sname = ".ldata.rel.local";
6500 case SECCAT_DATA_REL_RO:
6501 sname = ".ldata.rel.ro";
6503 case SECCAT_DATA_REL_RO_LOCAL:
6504 sname = ".ldata.rel.ro.local";
6508 flags |= SECTION_BSS;
6511 case SECCAT_RODATA_MERGE_STR:
6512 case SECCAT_RODATA_MERGE_STR_INIT:
6513 case SECCAT_RODATA_MERGE_CONST:
6517 case SECCAT_SRODATA:
6524 /* We don't split these for medium model. Place them into
6525 default sections and hope for best. */
6530 /* We might get called with string constants, but get_named_section
6531 doesn't like them as they are not DECLs. Also, we need to set
6532 flags in that case. */
6534 return get_section (sname, flags, NULL);
6535 return get_named_section (decl, sname, reloc);
6538 return default_elf_select_section (decl, reloc, align);
6541 /* Select a set of attributes for section NAME based on the properties
6542 of DECL and whether or not RELOC indicates that DECL's initializer
6543 might contain runtime relocations. */
6545 static unsigned int ATTRIBUTE_UNUSED
6546 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6548 unsigned int flags = default_section_type_flags (decl, name, reloc);
6550 if (decl == NULL_TREE
6551 && (strcmp (name, ".ldata.rel.ro") == 0
6552 || strcmp (name, ".ldata.rel.ro.local") == 0))
6553 flags |= SECTION_RELRO;
6555 if (strcmp (name, ".lbss") == 0
6556 || strncmp (name, ".lbss.", 5) == 0
6557 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6558 flags |= SECTION_BSS;
6563 /* Build up a unique section name, expressed as a
6564 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6565 RELOC indicates whether the initial value of EXP requires
6566 link-time relocations. */
6568 static void ATTRIBUTE_UNUSED
6569 x86_64_elf_unique_section (tree decl, int reloc)
6571 if (ix86_in_large_data_p (decl))
6573 const char *prefix = NULL;
6574 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6575 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6577 switch (categorize_decl_for_section (decl, reloc))
6580 case SECCAT_DATA_REL:
6581 case SECCAT_DATA_REL_LOCAL:
6582 case SECCAT_DATA_REL_RO:
6583 case SECCAT_DATA_REL_RO_LOCAL:
6584 prefix = one_only ? ".ld" : ".ldata";
6587 prefix = one_only ? ".lb" : ".lbss";
6590 case SECCAT_RODATA_MERGE_STR:
6591 case SECCAT_RODATA_MERGE_STR_INIT:
6592 case SECCAT_RODATA_MERGE_CONST:
6593 prefix = one_only ? ".lr" : ".lrodata";
6595 case SECCAT_SRODATA:
6602 /* We don't split these for medium model. Place them into
6603 default sections and hope for best. */
6608 const char *name, *linkonce;
6611 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6612 name = targetm.strip_name_encoding (name);
6614 /* If we're using one_only, then there needs to be a .gnu.linkonce
6615 prefix to the section name. */
6616 linkonce = one_only ? ".gnu.linkonce" : "";
6618 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6620 set_decl_section_name (decl, string);
6624 default_unique_section (decl, reloc);
6627 #ifdef COMMON_ASM_OP
6628 /* This says how to output assembler code to declare an
6629 uninitialized external linkage data object.
6631 For medium model x86-64 we need to use .largecomm opcode for
6634 x86_elf_aligned_common (FILE *file,
6635 const char *name, unsigned HOST_WIDE_INT size,
6638 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6639 && size > (unsigned int)ix86_section_threshold)
6640 fputs ("\t.largecomm\t", file);
6642 fputs (COMMON_ASM_OP, file);
6643 assemble_name (file, name);
6644 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6645 size, align / BITS_PER_UNIT);
6649 /* Utility function for targets to use in implementing
6650 ASM_OUTPUT_ALIGNED_BSS. */
6653 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6654 unsigned HOST_WIDE_INT size, int align)
6656 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6657 && size > (unsigned int)ix86_section_threshold)
6658 switch_to_section (get_named_section (decl, ".lbss", 0));
6660 switch_to_section (bss_section);
6661 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6662 #ifdef ASM_DECLARE_OBJECT_NAME
6663 last_assemble_variable_decl = decl;
6664 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6666 /* Standard thing is just output label for the object. */
6667 ASM_OUTPUT_LABEL (file, name);
6668 #endif /* ASM_DECLARE_OBJECT_NAME */
6669 ASM_OUTPUT_SKIP (file, size ? size : 1);
6672 /* Decide whether we must probe the stack before any space allocation
6673 on this target. It's essentially TARGET_STACK_PROBE except when
6674 -fstack-check causes the stack to be already probed differently. */
6677 ix86_target_stack_probe (void)
6679 /* Do not probe the stack twice if static stack checking is enabled. */
6680 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6683 return TARGET_STACK_PROBE;
6686 /* Decide whether we can make a sibling call to a function. DECL is the
6687 declaration of the function being targeted by the call and EXP is the
6688 CALL_EXPR representing the call. */
6691 ix86_function_ok_for_sibcall (tree decl, tree exp)
6693 tree type, decl_or_type;
6695 bool bind_global = decl && !targetm.binds_local_p (decl);
6697 /* If we are generating position-independent code, we cannot sibcall
6698 optimize direct calls to global functions, as the PLT requires
6699 %ebx be live. (Darwin does not have a PLT.) */
6707 /* If we need to align the outgoing stack, then sibcalling would
6708 unalign the stack, which may break the called function. */
6709 if (ix86_minimum_incoming_stack_boundary (true)
6710 < PREFERRED_STACK_BOUNDARY)
6715 decl_or_type = decl;
6716 type = TREE_TYPE (decl);
6720 /* We're looking at the CALL_EXPR, we need the type of the function. */
6721 type = CALL_EXPR_FN (exp); /* pointer expression */
6722 type = TREE_TYPE (type); /* pointer type */
6723 type = TREE_TYPE (type); /* function type */
6724 decl_or_type = type;
6727 /* Check that the return value locations are the same. Like
6728 if we are returning floats on the 80387 register stack, we cannot
6729 make a sibcall from a function that doesn't return a float to a
6730 function that does or, conversely, from a function that does return
6731 a float to a function that doesn't; the necessary stack adjustment
6732 would not be executed. This is also the place we notice
6733 differences in the return value ABI. Note that it is ok for one
6734 of the functions to have void return type as long as the return
6735 value of the other is passed in a register. */
6736 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6737 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6739 if (STACK_REG_P (a) || STACK_REG_P (b))
6741 if (!rtx_equal_p (a, b))
6744 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6746 else if (!rtx_equal_p (a, b))
6751 /* The SYSV ABI has more call-clobbered registers;
6752 disallow sibcalls from MS to SYSV. */
6753 if (cfun->machine->call_abi == MS_ABI
6754 && ix86_function_type_abi (type) == SYSV_ABI)
6759 /* If this call is indirect, we'll need to be able to use a
6760 call-clobbered register for the address of the target function.
6761 Make sure that all such registers are not used for passing
6762 parameters. Note that DLLIMPORT functions and call to global
6763 function via GOT slot are indirect. */
6765 || (bind_global && flag_pic && !flag_plt)
6766 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6768 /* Check if regparm >= 3 since arg_reg_available is set to
6769 false if regparm == 0. If regparm is 1 or 2, there is
6770 always a call-clobbered register available.
6772 ??? The symbol indirect call doesn't need a call-clobbered
6773 register. But we don't know if this is a symbol indirect
6774 call or not here. */
6775 if (ix86_function_regparm (type, NULL) >= 3
6776 && !cfun->machine->arg_reg_available)
6781 /* Otherwise okay. That also includes certain types of indirect calls. */
6785 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6786 and "sseregparm" calling convention attributes;
6787 arguments as in struct attribute_spec.handler. */
6790 ix86_handle_cconv_attribute (tree *node, tree name,
6795 if (TREE_CODE (*node) != FUNCTION_TYPE
6796 && TREE_CODE (*node) != METHOD_TYPE
6797 && TREE_CODE (*node) != FIELD_DECL
6798 && TREE_CODE (*node) != TYPE_DECL)
6800 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6802 *no_add_attrs = true;
6806 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6807 if (is_attribute_p ("regparm", name))
6811 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6813 error ("fastcall and regparm attributes are not compatible");
6816 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6818 error ("regparam and thiscall attributes are not compatible");
6821 cst = TREE_VALUE (args);
6822 if (TREE_CODE (cst) != INTEGER_CST)
6824 warning (OPT_Wattributes,
6825 "%qE attribute requires an integer constant argument",
6827 *no_add_attrs = true;
6829 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6831 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6833 *no_add_attrs = true;
6841 /* Do not warn when emulating the MS ABI. */
6842 if ((TREE_CODE (*node) != FUNCTION_TYPE
6843 && TREE_CODE (*node) != METHOD_TYPE)
6844 || ix86_function_type_abi (*node) != MS_ABI)
6845 warning (OPT_Wattributes, "%qE attribute ignored",
6847 *no_add_attrs = true;
6851 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6852 if (is_attribute_p ("fastcall", name))
6854 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6856 error ("fastcall and cdecl attributes are not compatible");
6858 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6860 error ("fastcall and stdcall attributes are not compatible");
6862 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6864 error ("fastcall and regparm attributes are not compatible");
6866 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6868 error ("fastcall and thiscall attributes are not compatible");
6872 /* Can combine stdcall with fastcall (redundant), regparm and
6874 else if (is_attribute_p ("stdcall", name))
6876 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6878 error ("stdcall and cdecl attributes are not compatible");
6880 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6882 error ("stdcall and fastcall attributes are not compatible");
6884 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6886 error ("stdcall and thiscall attributes are not compatible");
6890 /* Can combine cdecl with regparm and sseregparm. */
6891 else if (is_attribute_p ("cdecl", name))
6893 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6895 error ("stdcall and cdecl attributes are not compatible");
6897 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6899 error ("fastcall and cdecl attributes are not compatible");
6901 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6903 error ("cdecl and thiscall attributes are not compatible");
6906 else if (is_attribute_p ("thiscall", name))
6908 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6909 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6911 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6913 error ("stdcall and thiscall attributes are not compatible");
6915 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6917 error ("fastcall and thiscall attributes are not compatible");
6919 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6921 error ("cdecl and thiscall attributes are not compatible");
6925 /* Can combine sseregparm with all attributes. */
6930 /* The transactional memory builtins are implicitly regparm or fastcall
6931 depending on the ABI. Override the generic do-nothing attribute that
6932 these builtins were declared with, and replace it with one of the two
6933 attributes that we expect elsewhere. */
6936 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6937 int flags, bool *no_add_attrs)
6941 /* In no case do we want to add the placeholder attribute. */
6942 *no_add_attrs = true;
6944 /* The 64-bit ABI is unchanged for transactional memory. */
6948 /* ??? Is there a better way to validate 32-bit windows? We have
6949 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6950 if (CHECK_STACK_LIMIT > 0)
6951 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6954 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6955 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6957 decl_attributes (node, alt, flags);
6962 /* This function determines from TYPE the calling-convention. */
6965 ix86_get_callcvt (const_tree type)
6967 unsigned int ret = 0;
6972 return IX86_CALLCVT_CDECL;
6974 attrs = TYPE_ATTRIBUTES (type);
6975 if (attrs != NULL_TREE)
6977 if (lookup_attribute ("cdecl", attrs))
6978 ret |= IX86_CALLCVT_CDECL;
6979 else if (lookup_attribute ("stdcall", attrs))
6980 ret |= IX86_CALLCVT_STDCALL;
6981 else if (lookup_attribute ("fastcall", attrs))
6982 ret |= IX86_CALLCVT_FASTCALL;
6983 else if (lookup_attribute ("thiscall", attrs))
6984 ret |= IX86_CALLCVT_THISCALL;
6986 /* Regparam isn't allowed for thiscall and fastcall. */
6987 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6989 if (lookup_attribute ("regparm", attrs))
6990 ret |= IX86_CALLCVT_REGPARM;
6991 if (lookup_attribute ("sseregparm", attrs))
6992 ret |= IX86_CALLCVT_SSEREGPARM;
6995 if (IX86_BASE_CALLCVT(ret) != 0)
6999 is_stdarg = stdarg_p (type);
7000 if (TARGET_RTD && !is_stdarg)
7001 return IX86_CALLCVT_STDCALL | ret;
7005 || TREE_CODE (type) != METHOD_TYPE
7006 || ix86_function_type_abi (type) != MS_ABI)
7007 return IX86_CALLCVT_CDECL | ret;
7009 return IX86_CALLCVT_THISCALL;
7012 /* Return 0 if the attributes for two types are incompatible, 1 if they
7013 are compatible, and 2 if they are nearly compatible (which causes a
7014 warning to be generated). */
7017 ix86_comp_type_attributes (const_tree type1, const_tree type2)
7019 unsigned int ccvt1, ccvt2;
7021 if (TREE_CODE (type1) != FUNCTION_TYPE
7022 && TREE_CODE (type1) != METHOD_TYPE)
7025 ccvt1 = ix86_get_callcvt (type1);
7026 ccvt2 = ix86_get_callcvt (type2);
7029 if (ix86_function_regparm (type1, NULL)
7030 != ix86_function_regparm (type2, NULL))
7036 /* Return the regparm value for a function with the indicated TYPE and DECL.
7037 DECL may be NULL when calling function indirectly
7038 or considering a libcall. */
7041 ix86_function_regparm (const_tree type, const_tree decl)
7048 return (ix86_function_type_abi (type) == SYSV_ABI
7049 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7050 ccvt = ix86_get_callcvt (type);
7051 regparm = ix86_regparm;
7053 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7055 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7058 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7062 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7064 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7067 /* Use register calling convention for local functions when possible. */
7069 && TREE_CODE (decl) == FUNCTION_DECL)
7071 cgraph_node *target = cgraph_node::get (decl);
7073 target = target->function_symbol ();
7075 /* Caller and callee must agree on the calling convention, so
7076 checking here just optimize means that with
7077 __attribute__((optimize (...))) caller could use regparm convention
7078 and callee not, or vice versa. Instead look at whether the callee
7079 is optimized or not. */
7080 if (target && opt_for_fn (target->decl, optimize)
7081 && !(profile_flag && !flag_fentry))
7083 cgraph_local_info *i = &target->local;
7084 if (i && i->local && i->can_change_signature)
7086 int local_regparm, globals = 0, regno;
7088 /* Make sure no regparm register is taken by a
7089 fixed register variable. */
7090 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7092 if (fixed_regs[local_regparm])
7095 /* We don't want to use regparm(3) for nested functions as
7096 these use a static chain pointer in the third argument. */
7097 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7100 /* Save a register for the split stack. */
7101 if (local_regparm == 3 && flag_split_stack)
7104 /* Each fixed register usage increases register pressure,
7105 so less registers should be used for argument passing.
7106 This functionality can be overriden by an explicit
7108 for (regno = AX_REG; regno <= DI_REG; regno++)
7109 if (fixed_regs[regno])
7113 = globals < local_regparm ? local_regparm - globals : 0;
7115 if (local_regparm > regparm)
7116 regparm = local_regparm;
7124 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7125 DFmode (2) arguments in SSE registers for a function with the
7126 indicated TYPE and DECL. DECL may be NULL when calling function
7127 indirectly or considering a libcall. Return -1 if any FP parameter
7128 should be rejected by error. This is used in siutation we imply SSE
7129 calling convetion but the function is called from another function with
7130 SSE disabled. Otherwise return 0. */
7133 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7135 gcc_assert (!TARGET_64BIT);
7137 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7138 by the sseregparm attribute. */
7139 if (TARGET_SSEREGPARM
7140 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7147 error ("calling %qD with attribute sseregparm without "
7148 "SSE/SSE2 enabled", decl);
7150 error ("calling %qT with attribute sseregparm without "
7151 "SSE/SSE2 enabled", type);
7162 cgraph_node *target = cgraph_node::get (decl);
7164 target = target->function_symbol ();
7166 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7167 (and DFmode for SSE2) arguments in SSE registers. */
7169 /* TARGET_SSE_MATH */
7170 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7171 && opt_for_fn (target->decl, optimize)
7172 && !(profile_flag && !flag_fentry))
7174 cgraph_local_info *i = &target->local;
7175 if (i && i->local && i->can_change_signature)
7177 /* Refuse to produce wrong code when local function with SSE enabled
7178 is called from SSE disabled function.
7179 FIXME: We need a way to detect these cases cross-ltrans partition
7180 and avoid using SSE calling conventions on local functions called
7181 from function with SSE disabled. For now at least delay the
7182 warning until we know we are going to produce wrong code.
7184 if (!TARGET_SSE && warn)
7186 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7187 ->x_ix86_isa_flags) ? 2 : 1;
7194 /* Return true if EAX is live at the start of the function. Used by
7195 ix86_expand_prologue to determine if we need special help before
7196 calling allocate_stack_worker. */
7199 ix86_eax_live_at_start_p (void)
7201 /* Cheat. Don't bother working forward from ix86_function_regparm
7202 to the function type to whether an actual argument is located in
7203 eax. Instead just look at cfg info, which is still close enough
7204 to correct at this point. This gives false positives for broken
7205 functions that might use uninitialized data that happens to be
7206 allocated in eax, but who cares? */
7207 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7211 ix86_keep_aggregate_return_pointer (tree fntype)
7217 attr = lookup_attribute ("callee_pop_aggregate_return",
7218 TYPE_ATTRIBUTES (fntype));
7220 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7222 /* For 32-bit MS-ABI the default is to keep aggregate
7224 if (ix86_function_type_abi (fntype) == MS_ABI)
7227 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7230 /* Value is the number of bytes of arguments automatically
7231 popped when returning from a subroutine call.
7232 FUNDECL is the declaration node of the function (as a tree),
7233 FUNTYPE is the data type of the function (as a tree),
7234 or for a library call it is an identifier node for the subroutine name.
7235 SIZE is the number of bytes of arguments passed on the stack.
7237 On the 80386, the RTD insn may be used to pop them if the number
7238 of args is fixed, but if the number is variable then the caller
7239 must pop them all. RTD can't be used for library calls now
7240 because the library is compiled with the Unix compiler.
7241 Use of RTD is a selectable option, since it is incompatible with
7242 standard Unix calling sequences. If the option is not selected,
7243 the caller must always pop the args.
7245 The attribute stdcall is equivalent to RTD on a per module basis. */
7248 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7252 /* None of the 64-bit ABIs pop arguments. */
7256 ccvt = ix86_get_callcvt (funtype);
7258 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7259 | IX86_CALLCVT_THISCALL)) != 0
7260 && ! stdarg_p (funtype))
7263 /* Lose any fake structure return argument if it is passed on the stack. */
7264 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7265 && !ix86_keep_aggregate_return_pointer (funtype))
7267 int nregs = ix86_function_regparm (funtype, fundecl);
7269 return GET_MODE_SIZE (Pmode);
7275 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7278 ix86_legitimate_combined_insn (rtx_insn *insn)
7280 /* Check operand constraints in case hard registers were propagated
7281 into insn pattern. This check prevents combine pass from
7282 generating insn patterns with invalid hard register operands.
7283 These invalid insns can eventually confuse reload to error out
7284 with a spill failure. See also PRs 46829 and 46843. */
7285 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7289 extract_insn (insn);
7290 preprocess_constraints (insn);
7292 int n_operands = recog_data.n_operands;
7293 int n_alternatives = recog_data.n_alternatives;
7294 for (i = 0; i < n_operands; i++)
7296 rtx op = recog_data.operand[i];
7297 machine_mode mode = GET_MODE (op);
7298 const operand_alternative *op_alt;
7303 /* For pre-AVX disallow unaligned loads/stores where the
7304 instructions don't support it. */
7306 && VECTOR_MODE_P (mode)
7307 && misaligned_operand (op, mode))
7309 unsigned int min_align = get_attr_ssememalign (insn);
7311 || MEM_ALIGN (op) < min_align)
7315 /* A unary operator may be accepted by the predicate, but it
7316 is irrelevant for matching constraints. */
7322 if (REG_P (SUBREG_REG (op))
7323 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7324 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7325 GET_MODE (SUBREG_REG (op)),
7328 op = SUBREG_REG (op);
7331 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7334 op_alt = recog_op_alt;
7336 /* Operand has no constraints, anything is OK. */
7337 win = !n_alternatives;
7339 alternative_mask preferred = get_preferred_alternatives (insn);
7340 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7342 if (!TEST_BIT (preferred, j))
7344 if (op_alt[i].anything_ok
7345 || (op_alt[i].matches != -1
7347 (recog_data.operand[i],
7348 recog_data.operand[op_alt[i].matches]))
7349 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7364 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7366 static unsigned HOST_WIDE_INT
7367 ix86_asan_shadow_offset (void)
7369 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7370 : HOST_WIDE_INT_C (0x7fff8000))
7371 : (HOST_WIDE_INT_1 << 29);
7374 /* Argument support functions. */
7376 /* Return true when register may be used to pass function parameters. */
7378 ix86_function_arg_regno_p (int regno)
7381 enum calling_abi call_abi;
7382 const int *parm_regs;
7384 if (TARGET_MPX && BND_REGNO_P (regno))
7390 return (regno < REGPARM_MAX
7391 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7393 return (regno < REGPARM_MAX
7394 || (TARGET_MMX && MMX_REGNO_P (regno)
7395 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7396 || (TARGET_SSE && SSE_REGNO_P (regno)
7397 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7400 if (TARGET_SSE && SSE_REGNO_P (regno)
7401 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7404 /* TODO: The function should depend on current function ABI but
7405 builtins.c would need updating then. Therefore we use the
7407 call_abi = ix86_cfun_abi ();
7409 /* RAX is used as hidden argument to va_arg functions. */
7410 if (call_abi == SYSV_ABI && regno == AX_REG)
7413 if (call_abi == MS_ABI)
7414 parm_regs = x86_64_ms_abi_int_parameter_registers;
7416 parm_regs = x86_64_int_parameter_registers;
7418 for (i = 0; i < (call_abi == MS_ABI
7419 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7420 if (regno == parm_regs[i])
7425 /* Return if we do not know how to pass TYPE solely in registers. */
7428 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7430 if (must_pass_in_stack_var_size_or_pad (mode, type))
7433 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7434 The layout_type routine is crafty and tries to trick us into passing
7435 currently unsupported vector types on the stack by using TImode. */
7436 return (!TARGET_64BIT && mode == TImode
7437 && type && TREE_CODE (type) != VECTOR_TYPE);
7440 /* It returns the size, in bytes, of the area reserved for arguments passed
7441 in registers for the function represented by fndecl dependent to the used
7444 ix86_reg_parm_stack_space (const_tree fndecl)
7446 enum calling_abi call_abi = SYSV_ABI;
7447 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7448 call_abi = ix86_function_abi (fndecl);
7450 call_abi = ix86_function_type_abi (fndecl);
7451 if (TARGET_64BIT && call_abi == MS_ABI)
7456 /* We add this as a workaround in order to use libc_has_function
7459 ix86_libc_has_function (enum function_class fn_class)
7461 return targetm.libc_has_function (fn_class);
7464 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7465 specifying the call abi used. */
7467 ix86_function_type_abi (const_tree fntype)
7469 enum calling_abi abi = ix86_abi;
7471 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7475 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7478 error ("X32 does not support ms_abi attribute");
7482 else if (abi == MS_ABI
7483 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7489 static enum calling_abi
7490 ix86_function_abi (const_tree fndecl)
7492 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7495 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7496 specifying the call abi used. */
7498 ix86_cfun_abi (void)
7500 return cfun ? cfun->machine->call_abi : ix86_abi;
7504 ix86_function_ms_hook_prologue (const_tree fn)
7506 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7508 if (decl_function_context (fn) != NULL_TREE)
7509 error_at (DECL_SOURCE_LOCATION (fn),
7510 "ms_hook_prologue is not compatible with nested function");
7517 /* Write the extra assembler code needed to declare a function properly. */
7520 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7523 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7527 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7528 unsigned int filler_cc = 0xcccccccc;
7530 for (i = 0; i < filler_count; i += 4)
7531 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7534 #ifdef SUBTARGET_ASM_UNWIND_INIT
7535 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7538 ASM_OUTPUT_LABEL (asm_out_file, fname);
7540 /* Output magic byte marker, if hot-patch attribute is set. */
7545 /* leaq [%rsp + 0], %rsp */
7546 asm_fprintf (asm_out_file, ASM_BYTE
7547 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7551 /* movl.s %edi, %edi
7553 movl.s %esp, %ebp */
7554 asm_fprintf (asm_out_file, ASM_BYTE
7555 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7561 extern void init_regs (void);
7563 /* Implementation of call abi switching target hook. Specific to FNDECL
7564 the specific call register sets are set. See also
7565 ix86_conditional_register_usage for more details. */
7567 ix86_call_abi_override (const_tree fndecl)
7569 cfun->machine->call_abi = ix86_function_abi (fndecl);
7572 /* Return 1 if pseudo register should be created and used to hold
7573 GOT address for PIC code. */
7575 ix86_use_pseudo_pic_reg (void)
7578 && (ix86_cmodel == CM_SMALL_PIC
7585 /* Initialize large model PIC register. */
7588 ix86_init_large_pic_reg (unsigned int tmp_regno)
7590 rtx_code_label *label;
7593 gcc_assert (Pmode == DImode);
7594 label = gen_label_rtx ();
7596 LABEL_PRESERVE_P (label) = 1;
7597 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7598 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7599 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7601 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7602 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7603 pic_offset_table_rtx, tmp_reg));
7606 /* Create and initialize PIC register if required. */
7608 ix86_init_pic_reg (void)
7613 if (!ix86_use_pseudo_pic_reg ())
7620 if (ix86_cmodel == CM_LARGE_PIC)
7621 ix86_init_large_pic_reg (R11_REG);
7623 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7627 /* If there is future mcount call in the function it is more profitable
7628 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7629 rtx reg = crtl->profile
7630 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7631 : pic_offset_table_rtx;
7632 rtx_insn *insn = emit_insn (gen_set_got (reg));
7633 RTX_FRAME_RELATED_P (insn) = 1;
7635 emit_move_insn (pic_offset_table_rtx, reg);
7636 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7642 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7643 insert_insn_on_edge (seq, entry_edge);
7644 commit_one_edge_insertion (entry_edge);
7647 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7648 for a call to a function whose data type is FNTYPE.
7649 For a library call, FNTYPE is 0. */
7652 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7653 tree fntype, /* tree ptr for function decl */
7654 rtx libname, /* SYMBOL_REF of library name or 0 */
7658 struct cgraph_local_info *i = NULL;
7659 struct cgraph_node *target = NULL;
7661 memset (cum, 0, sizeof (*cum));
7665 target = cgraph_node::get (fndecl);
7668 target = target->function_symbol ();
7669 i = cgraph_node::local_info (target->decl);
7670 cum->call_abi = ix86_function_abi (target->decl);
7673 cum->call_abi = ix86_function_abi (fndecl);
7676 cum->call_abi = ix86_function_type_abi (fntype);
7678 cum->caller = caller;
7680 /* Set up the number of registers to use for passing arguments. */
7681 cum->nregs = ix86_regparm;
7684 cum->nregs = (cum->call_abi == SYSV_ABI
7685 ? X86_64_REGPARM_MAX
7686 : X86_64_MS_REGPARM_MAX);
7690 cum->sse_nregs = SSE_REGPARM_MAX;
7693 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7694 ? X86_64_SSE_REGPARM_MAX
7695 : X86_64_MS_SSE_REGPARM_MAX);
7699 cum->mmx_nregs = MMX_REGPARM_MAX;
7700 cum->warn_avx512f = true;
7701 cum->warn_avx = true;
7702 cum->warn_sse = true;
7703 cum->warn_mmx = true;
7705 /* Because type might mismatch in between caller and callee, we need to
7706 use actual type of function for local calls.
7707 FIXME: cgraph_analyze can be told to actually record if function uses
7708 va_start so for local functions maybe_vaarg can be made aggressive
7710 FIXME: once typesytem is fixed, we won't need this code anymore. */
7711 if (i && i->local && i->can_change_signature)
7712 fntype = TREE_TYPE (target->decl);
7713 cum->stdarg = stdarg_p (fntype);
7714 cum->maybe_vaarg = (fntype
7715 ? (!prototype_p (fntype) || stdarg_p (fntype))
7718 cum->bnd_regno = FIRST_BND_REG;
7719 cum->bnds_in_bt = 0;
7720 cum->force_bnd_pass = 0;
7725 /* If there are variable arguments, then we won't pass anything
7726 in registers in 32-bit mode. */
7727 if (stdarg_p (fntype))
7730 /* Since in 32-bit, variable arguments are always passed on
7731 stack, there is scratch register available for indirect
7733 cfun->machine->arg_reg_available = true;
7736 cum->warn_avx512f = false;
7737 cum->warn_avx = false;
7738 cum->warn_sse = false;
7739 cum->warn_mmx = false;
7743 /* Use ecx and edx registers if function has fastcall attribute,
7744 else look for regparm information. */
7747 unsigned int ccvt = ix86_get_callcvt (fntype);
7748 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7751 cum->fastcall = 1; /* Same first register as in fastcall. */
7753 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7759 cum->nregs = ix86_function_regparm (fntype, fndecl);
7762 /* Set up the number of SSE registers used for passing SFmode
7763 and DFmode arguments. Warn for mismatching ABI. */
7764 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7767 cfun->machine->arg_reg_available = (cum->nregs > 0);
7770 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7771 But in the case of vector types, it is some vector mode.
7773 When we have only some of our vector isa extensions enabled, then there
7774 are some modes for which vector_mode_supported_p is false. For these
7775 modes, the generic vector support in gcc will choose some non-vector mode
7776 in order to implement the type. By computing the natural mode, we'll
7777 select the proper ABI location for the operand and not depend on whatever
7778 the middle-end decides to do with these vector types.
7780 The midde-end can't deal with the vector types > 16 bytes. In this
7781 case, we return the original mode and warn ABI change if CUM isn't
7784 If INT_RETURN is true, warn ABI change if the vector mode isn't
7785 available for function return value. */
7788 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7791 machine_mode mode = TYPE_MODE (type);
7793 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7795 HOST_WIDE_INT size = int_size_in_bytes (type);
7796 if ((size == 8 || size == 16 || size == 32 || size == 64)
7797 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7798 && TYPE_VECTOR_SUBPARTS (type) > 1)
7800 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7802 /* There are no XFmode vector modes. */
7803 if (innermode == XFmode)
7806 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7807 mode = MIN_MODE_VECTOR_FLOAT;
7809 mode = MIN_MODE_VECTOR_INT;
7811 /* Get the mode which has this inner mode and number of units. */
7812 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7813 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7814 && GET_MODE_INNER (mode) == innermode)
7816 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7818 static bool warnedavx512f;
7819 static bool warnedavx512f_ret;
7821 if (cum && cum->warn_avx512f && !warnedavx512f)
7823 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7824 "without AVX512F enabled changes the ABI"))
7825 warnedavx512f = true;
7827 else if (in_return && !warnedavx512f_ret)
7829 if (warning (OPT_Wpsabi, "AVX512F vector return "
7830 "without AVX512F enabled changes the ABI"))
7831 warnedavx512f_ret = true;
7834 return TYPE_MODE (type);
7836 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7838 static bool warnedavx;
7839 static bool warnedavx_ret;
7841 if (cum && cum->warn_avx && !warnedavx)
7843 if (warning (OPT_Wpsabi, "AVX vector argument "
7844 "without AVX enabled changes the ABI"))
7847 else if (in_return && !warnedavx_ret)
7849 if (warning (OPT_Wpsabi, "AVX vector return "
7850 "without AVX enabled changes the ABI"))
7851 warnedavx_ret = true;
7854 return TYPE_MODE (type);
7856 else if (((size == 8 && TARGET_64BIT) || size == 16)
7860 static bool warnedsse;
7861 static bool warnedsse_ret;
7863 if (cum && cum->warn_sse && !warnedsse)
7865 if (warning (OPT_Wpsabi, "SSE vector argument "
7866 "without SSE enabled changes the ABI"))
7869 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7871 if (warning (OPT_Wpsabi, "SSE vector return "
7872 "without SSE enabled changes the ABI"))
7873 warnedsse_ret = true;
7876 else if ((size == 8 && !TARGET_64BIT)
7880 static bool warnedmmx;
7881 static bool warnedmmx_ret;
7883 if (cum && cum->warn_mmx && !warnedmmx)
7885 if (warning (OPT_Wpsabi, "MMX vector argument "
7886 "without MMX enabled changes the ABI"))
7889 else if (in_return && !warnedmmx_ret)
7891 if (warning (OPT_Wpsabi, "MMX vector return "
7892 "without MMX enabled changes the ABI"))
7893 warnedmmx_ret = true;
7906 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7907 this may not agree with the mode that the type system has chosen for the
7908 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7909 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7912 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7917 if (orig_mode != BLKmode)
7918 tmp = gen_rtx_REG (orig_mode, regno);
7921 tmp = gen_rtx_REG (mode, regno);
7922 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7923 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7929 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7930 of this code is to classify each 8bytes of incoming argument by the register
7931 class and assign registers accordingly. */
7933 /* Return the union class of CLASS1 and CLASS2.
7934 See the x86-64 PS ABI for details. */
7936 static enum x86_64_reg_class
7937 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7939 /* Rule #1: If both classes are equal, this is the resulting class. */
7940 if (class1 == class2)
7943 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7945 if (class1 == X86_64_NO_CLASS)
7947 if (class2 == X86_64_NO_CLASS)
7950 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7951 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7952 return X86_64_MEMORY_CLASS;
7954 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7955 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7956 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7957 return X86_64_INTEGERSI_CLASS;
7958 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7959 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7960 return X86_64_INTEGER_CLASS;
7962 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7964 if (class1 == X86_64_X87_CLASS
7965 || class1 == X86_64_X87UP_CLASS
7966 || class1 == X86_64_COMPLEX_X87_CLASS
7967 || class2 == X86_64_X87_CLASS
7968 || class2 == X86_64_X87UP_CLASS
7969 || class2 == X86_64_COMPLEX_X87_CLASS)
7970 return X86_64_MEMORY_CLASS;
7972 /* Rule #6: Otherwise class SSE is used. */
7973 return X86_64_SSE_CLASS;
7976 /* Classify the argument of type TYPE and mode MODE.
7977 CLASSES will be filled by the register class used to pass each word
7978 of the operand. The number of words is returned. In case the parameter
7979 should be passed in memory, 0 is returned. As a special case for zero
7980 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7982 BIT_OFFSET is used internally for handling records and specifies offset
7983 of the offset in bits modulo 512 to avoid overflow cases.
7985 See the x86-64 PS ABI for details.
7989 classify_argument (machine_mode mode, const_tree type,
7990 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7992 HOST_WIDE_INT bytes =
7993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7994 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7996 /* Variable sized entities are always passed/returned in memory. */
8000 if (mode != VOIDmode
8001 && targetm.calls.must_pass_in_stack (mode, type))
8004 if (type && AGGREGATE_TYPE_P (type))
8008 enum x86_64_reg_class subclasses[MAX_CLASSES];
8010 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
8014 for (i = 0; i < words; i++)
8015 classes[i] = X86_64_NO_CLASS;
8017 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
8018 signalize memory class, so handle it as special case. */
8021 classes[0] = X86_64_NO_CLASS;
8025 /* Classify each field of record and merge classes. */
8026 switch (TREE_CODE (type))
8029 /* And now merge the fields of structure. */
8030 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8032 if (TREE_CODE (field) == FIELD_DECL)
8036 if (TREE_TYPE (field) == error_mark_node)
8039 /* Bitfields are always classified as integer. Handle them
8040 early, since later code would consider them to be
8041 misaligned integers. */
8042 if (DECL_BIT_FIELD (field))
8044 for (i = (int_bit_position (field)
8045 + (bit_offset % 64)) / 8 / 8;
8046 i < ((int_bit_position (field) + (bit_offset % 64))
8047 + tree_to_shwi (DECL_SIZE (field))
8050 merge_classes (X86_64_INTEGER_CLASS,
8057 type = TREE_TYPE (field);
8059 /* Flexible array member is ignored. */
8060 if (TYPE_MODE (type) == BLKmode
8061 && TREE_CODE (type) == ARRAY_TYPE
8062 && TYPE_SIZE (type) == NULL_TREE
8063 && TYPE_DOMAIN (type) != NULL_TREE
8064 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8069 if (!warned && warn_psabi)
8072 inform (input_location,
8073 "the ABI of passing struct with"
8074 " a flexible array member has"
8075 " changed in GCC 4.4");
8079 num = classify_argument (TYPE_MODE (type), type,
8081 (int_bit_position (field)
8082 + bit_offset) % 512);
8085 pos = (int_bit_position (field)
8086 + (bit_offset % 64)) / 8 / 8;
8087 for (i = 0; i < num && (i + pos) < words; i++)
8089 merge_classes (subclasses[i], classes[i + pos]);
8096 /* Arrays are handled as small records. */
8099 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8100 TREE_TYPE (type), subclasses, bit_offset);
8104 /* The partial classes are now full classes. */
8105 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8106 subclasses[0] = X86_64_SSE_CLASS;
8107 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8108 && !((bit_offset % 64) == 0 && bytes == 4))
8109 subclasses[0] = X86_64_INTEGER_CLASS;
8111 for (i = 0; i < words; i++)
8112 classes[i] = subclasses[i % num];
8117 case QUAL_UNION_TYPE:
8118 /* Unions are similar to RECORD_TYPE but offset is always 0.
8120 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8122 if (TREE_CODE (field) == FIELD_DECL)
8126 if (TREE_TYPE (field) == error_mark_node)
8129 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8130 TREE_TYPE (field), subclasses,
8134 for (i = 0; i < num && i < words; i++)
8135 classes[i] = merge_classes (subclasses[i], classes[i]);
8146 /* When size > 16 bytes, if the first one isn't
8147 X86_64_SSE_CLASS or any other ones aren't
8148 X86_64_SSEUP_CLASS, everything should be passed in
8150 if (classes[0] != X86_64_SSE_CLASS)
8153 for (i = 1; i < words; i++)
8154 if (classes[i] != X86_64_SSEUP_CLASS)
8158 /* Final merger cleanup. */
8159 for (i = 0; i < words; i++)
8161 /* If one class is MEMORY, everything should be passed in
8163 if (classes[i] == X86_64_MEMORY_CLASS)
8166 /* The X86_64_SSEUP_CLASS should be always preceded by
8167 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8168 if (classes[i] == X86_64_SSEUP_CLASS
8169 && classes[i - 1] != X86_64_SSE_CLASS
8170 && classes[i - 1] != X86_64_SSEUP_CLASS)
8172 /* The first one should never be X86_64_SSEUP_CLASS. */
8173 gcc_assert (i != 0);
8174 classes[i] = X86_64_SSE_CLASS;
8177 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8178 everything should be passed in memory. */
8179 if (classes[i] == X86_64_X87UP_CLASS
8180 && (classes[i - 1] != X86_64_X87_CLASS))
8184 /* The first one should never be X86_64_X87UP_CLASS. */
8185 gcc_assert (i != 0);
8186 if (!warned && warn_psabi)
8189 inform (input_location,
8190 "the ABI of passing union with long double"
8191 " has changed in GCC 4.4");
8199 /* Compute alignment needed. We align all types to natural boundaries with
8200 exception of XFmode that is aligned to 64bits. */
8201 if (mode != VOIDmode && mode != BLKmode)
8203 int mode_alignment = GET_MODE_BITSIZE (mode);
8206 mode_alignment = 128;
8207 else if (mode == XCmode)
8208 mode_alignment = 256;
8209 if (COMPLEX_MODE_P (mode))
8210 mode_alignment /= 2;
8211 /* Misaligned fields are always returned in memory. */
8212 if (bit_offset % mode_alignment)
8216 /* for V1xx modes, just use the base mode */
8217 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8218 && GET_MODE_UNIT_SIZE (mode) == bytes)
8219 mode = GET_MODE_INNER (mode);
8221 /* Classification of atomic types. */
8226 classes[0] = X86_64_SSE_CLASS;
8229 classes[0] = X86_64_SSE_CLASS;
8230 classes[1] = X86_64_SSEUP_CLASS;
8240 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8242 /* Analyze last 128 bits only. */
8243 size = (size - 1) & 0x7f;
8247 classes[0] = X86_64_INTEGERSI_CLASS;
8252 classes[0] = X86_64_INTEGER_CLASS;
8255 else if (size < 64+32)
8257 classes[0] = X86_64_INTEGER_CLASS;
8258 classes[1] = X86_64_INTEGERSI_CLASS;
8261 else if (size < 64+64)
8263 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8271 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8275 /* OImode shouldn't be used directly. */
8280 if (!(bit_offset % 64))
8281 classes[0] = X86_64_SSESF_CLASS;
8283 classes[0] = X86_64_SSE_CLASS;
8286 classes[0] = X86_64_SSEDF_CLASS;
8289 classes[0] = X86_64_X87_CLASS;
8290 classes[1] = X86_64_X87UP_CLASS;
8293 classes[0] = X86_64_SSE_CLASS;
8294 classes[1] = X86_64_SSEUP_CLASS;
8297 classes[0] = X86_64_SSE_CLASS;
8298 if (!(bit_offset % 64))
8304 if (!warned && warn_psabi)
8307 inform (input_location,
8308 "the ABI of passing structure with complex float"
8309 " member has changed in GCC 4.4");
8311 classes[1] = X86_64_SSESF_CLASS;
8315 classes[0] = X86_64_SSEDF_CLASS;
8316 classes[1] = X86_64_SSEDF_CLASS;
8319 classes[0] = X86_64_COMPLEX_X87_CLASS;
8322 /* This modes is larger than 16 bytes. */
8330 classes[0] = X86_64_SSE_CLASS;
8331 classes[1] = X86_64_SSEUP_CLASS;
8332 classes[2] = X86_64_SSEUP_CLASS;
8333 classes[3] = X86_64_SSEUP_CLASS;
8341 classes[0] = X86_64_SSE_CLASS;
8342 classes[1] = X86_64_SSEUP_CLASS;
8343 classes[2] = X86_64_SSEUP_CLASS;
8344 classes[3] = X86_64_SSEUP_CLASS;
8345 classes[4] = X86_64_SSEUP_CLASS;
8346 classes[5] = X86_64_SSEUP_CLASS;
8347 classes[6] = X86_64_SSEUP_CLASS;
8348 classes[7] = X86_64_SSEUP_CLASS;
8356 classes[0] = X86_64_SSE_CLASS;
8357 classes[1] = X86_64_SSEUP_CLASS;
8365 classes[0] = X86_64_SSE_CLASS;
8371 gcc_assert (VECTOR_MODE_P (mode));
8376 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8378 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8379 classes[0] = X86_64_INTEGERSI_CLASS;
8381 classes[0] = X86_64_INTEGER_CLASS;
8382 classes[1] = X86_64_INTEGER_CLASS;
8383 return 1 + (bytes > 8);
8387 /* Examine the argument and return set number of register required in each
8388 class. Return true iff parameter should be passed in memory. */
8391 examine_argument (machine_mode mode, const_tree type, int in_return,
8392 int *int_nregs, int *sse_nregs)
8394 enum x86_64_reg_class regclass[MAX_CLASSES];
8395 int n = classify_argument (mode, type, regclass, 0);
8402 for (n--; n >= 0; n--)
8403 switch (regclass[n])
8405 case X86_64_INTEGER_CLASS:
8406 case X86_64_INTEGERSI_CLASS:
8409 case X86_64_SSE_CLASS:
8410 case X86_64_SSESF_CLASS:
8411 case X86_64_SSEDF_CLASS:
8414 case X86_64_NO_CLASS:
8415 case X86_64_SSEUP_CLASS:
8417 case X86_64_X87_CLASS:
8418 case X86_64_X87UP_CLASS:
8419 case X86_64_COMPLEX_X87_CLASS:
8423 case X86_64_MEMORY_CLASS:
8430 /* Construct container for the argument used by GCC interface. See
8431 FUNCTION_ARG for the detailed description. */
8434 construct_container (machine_mode mode, machine_mode orig_mode,
8435 const_tree type, int in_return, int nintregs, int nsseregs,
8436 const int *intreg, int sse_regno)
8438 /* The following variables hold the static issued_error state. */
8439 static bool issued_sse_arg_error;
8440 static bool issued_sse_ret_error;
8441 static bool issued_x87_ret_error;
8443 machine_mode tmpmode;
8445 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8446 enum x86_64_reg_class regclass[MAX_CLASSES];
8450 int needed_sseregs, needed_intregs;
8451 rtx exp[MAX_CLASSES];
8454 n = classify_argument (mode, type, regclass, 0);
8457 if (examine_argument (mode, type, in_return, &needed_intregs,
8460 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8463 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8464 some less clueful developer tries to use floating-point anyway. */
8465 if (needed_sseregs && !TARGET_SSE)
8469 if (!issued_sse_ret_error)
8471 error ("SSE register return with SSE disabled");
8472 issued_sse_ret_error = true;
8475 else if (!issued_sse_arg_error)
8477 error ("SSE register argument with SSE disabled");
8478 issued_sse_arg_error = true;
8483 /* Likewise, error if the ABI requires us to return values in the
8484 x87 registers and the user specified -mno-80387. */
8485 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8486 for (i = 0; i < n; i++)
8487 if (regclass[i] == X86_64_X87_CLASS
8488 || regclass[i] == X86_64_X87UP_CLASS
8489 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8491 if (!issued_x87_ret_error)
8493 error ("x87 register return with x87 disabled");
8494 issued_x87_ret_error = true;
8499 /* First construct simple cases. Avoid SCmode, since we want to use
8500 single register to pass this type. */
8501 if (n == 1 && mode != SCmode)
8502 switch (regclass[0])
8504 case X86_64_INTEGER_CLASS:
8505 case X86_64_INTEGERSI_CLASS:
8506 return gen_rtx_REG (mode, intreg[0]);
8507 case X86_64_SSE_CLASS:
8508 case X86_64_SSESF_CLASS:
8509 case X86_64_SSEDF_CLASS:
8510 if (mode != BLKmode)
8511 return gen_reg_or_parallel (mode, orig_mode,
8512 SSE_REGNO (sse_regno));
8514 case X86_64_X87_CLASS:
8515 case X86_64_COMPLEX_X87_CLASS:
8516 return gen_rtx_REG (mode, FIRST_STACK_REG);
8517 case X86_64_NO_CLASS:
8518 /* Zero sized array, struct or class. */
8524 && regclass[0] == X86_64_SSE_CLASS
8525 && regclass[1] == X86_64_SSEUP_CLASS
8527 return gen_reg_or_parallel (mode, orig_mode,
8528 SSE_REGNO (sse_regno));
8530 && regclass[0] == X86_64_SSE_CLASS
8531 && regclass[1] == X86_64_SSEUP_CLASS
8532 && regclass[2] == X86_64_SSEUP_CLASS
8533 && regclass[3] == X86_64_SSEUP_CLASS
8535 return gen_reg_or_parallel (mode, orig_mode,
8536 SSE_REGNO (sse_regno));
8538 && regclass[0] == X86_64_SSE_CLASS
8539 && regclass[1] == X86_64_SSEUP_CLASS
8540 && regclass[2] == X86_64_SSEUP_CLASS
8541 && regclass[3] == X86_64_SSEUP_CLASS
8542 && regclass[4] == X86_64_SSEUP_CLASS
8543 && regclass[5] == X86_64_SSEUP_CLASS
8544 && regclass[6] == X86_64_SSEUP_CLASS
8545 && regclass[7] == X86_64_SSEUP_CLASS
8547 return gen_reg_or_parallel (mode, orig_mode,
8548 SSE_REGNO (sse_regno));
8550 && regclass[0] == X86_64_X87_CLASS
8551 && regclass[1] == X86_64_X87UP_CLASS)
8552 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8555 && regclass[0] == X86_64_INTEGER_CLASS
8556 && regclass[1] == X86_64_INTEGER_CLASS
8557 && (mode == CDImode || mode == TImode)
8558 && intreg[0] + 1 == intreg[1])
8559 return gen_rtx_REG (mode, intreg[0]);
8561 /* Otherwise figure out the entries of the PARALLEL. */
8562 for (i = 0; i < n; i++)
8566 switch (regclass[i])
8568 case X86_64_NO_CLASS:
8570 case X86_64_INTEGER_CLASS:
8571 case X86_64_INTEGERSI_CLASS:
8572 /* Merge TImodes on aligned occasions here too. */
8573 if (i * 8 + 8 > bytes)
8575 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8576 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8580 /* We've requested 24 bytes we
8581 don't have mode for. Use DImode. */
8582 if (tmpmode == BLKmode)
8585 = gen_rtx_EXPR_LIST (VOIDmode,
8586 gen_rtx_REG (tmpmode, *intreg),
8590 case X86_64_SSESF_CLASS:
8592 = gen_rtx_EXPR_LIST (VOIDmode,
8593 gen_rtx_REG (SFmode,
8594 SSE_REGNO (sse_regno)),
8598 case X86_64_SSEDF_CLASS:
8600 = gen_rtx_EXPR_LIST (VOIDmode,
8601 gen_rtx_REG (DFmode,
8602 SSE_REGNO (sse_regno)),
8606 case X86_64_SSE_CLASS:
8614 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8624 && regclass[1] == X86_64_SSEUP_CLASS
8625 && regclass[2] == X86_64_SSEUP_CLASS
8626 && regclass[3] == X86_64_SSEUP_CLASS);
8632 && regclass[1] == X86_64_SSEUP_CLASS
8633 && regclass[2] == X86_64_SSEUP_CLASS
8634 && regclass[3] == X86_64_SSEUP_CLASS
8635 && regclass[4] == X86_64_SSEUP_CLASS
8636 && regclass[5] == X86_64_SSEUP_CLASS
8637 && regclass[6] == X86_64_SSEUP_CLASS
8638 && regclass[7] == X86_64_SSEUP_CLASS);
8646 = gen_rtx_EXPR_LIST (VOIDmode,
8647 gen_rtx_REG (tmpmode,
8648 SSE_REGNO (sse_regno)),
8657 /* Empty aligned struct, union or class. */
8661 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8662 for (i = 0; i < nexps; i++)
8663 XVECEXP (ret, 0, i) = exp [i];
8667 /* Update the data in CUM to advance over an argument of mode MODE
8668 and data type TYPE. (TYPE is null for libcalls where that information
8669 may not be available.)
8671 Return a number of integer regsiters advanced over. */
8674 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8675 const_tree type, HOST_WIDE_INT bytes,
8676 HOST_WIDE_INT words)
8679 bool error_p = NULL;
8683 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8684 bytes in registers. */
8685 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8705 cum->words += words;
8706 cum->nregs -= words;
8707 cum->regno += words;
8708 if (cum->nregs >= 0)
8710 if (cum->nregs <= 0)
8713 cfun->machine->arg_reg_available = false;
8719 /* OImode shouldn't be used directly. */
8723 if (cum->float_in_sse == -1)
8725 if (cum->float_in_sse < 2)
8728 if (cum->float_in_sse == -1)
8730 if (cum->float_in_sse < 1)
8753 if (!type || !AGGREGATE_TYPE_P (type))
8755 cum->sse_words += words;
8756 cum->sse_nregs -= 1;
8757 cum->sse_regno += 1;
8758 if (cum->sse_nregs <= 0)
8772 if (!type || !AGGREGATE_TYPE_P (type))
8774 cum->mmx_words += words;
8775 cum->mmx_nregs -= 1;
8776 cum->mmx_regno += 1;
8777 if (cum->mmx_nregs <= 0)
8787 cum->float_in_sse = 0;
8788 error ("calling %qD with SSE calling convention without "
8789 "SSE/SSE2 enabled", cum->decl);
8790 sorry ("this is a GCC bug that can be worked around by adding "
8791 "attribute used to function called");
8798 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8799 const_tree type, HOST_WIDE_INT words, bool named)
8801 int int_nregs, sse_nregs;
8803 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8804 if (!named && (VALID_AVX512F_REG_MODE (mode)
8805 || VALID_AVX256_REG_MODE (mode)))
8808 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8809 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8811 cum->nregs -= int_nregs;
8812 cum->sse_nregs -= sse_nregs;
8813 cum->regno += int_nregs;
8814 cum->sse_regno += sse_nregs;
8819 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8820 cum->words = ROUND_UP (cum->words, align);
8821 cum->words += words;
8827 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8828 HOST_WIDE_INT words)
8830 /* Otherwise, this should be passed indirect. */
8831 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8833 cum->words += words;
8843 /* Update the data in CUM to advance over an argument of mode MODE and
8844 data type TYPE. (TYPE is null for libcalls where that information
8845 may not be available.) */
8848 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8849 const_tree type, bool named)
8851 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8852 HOST_WIDE_INT bytes, words;
8855 if (mode == BLKmode)
8856 bytes = int_size_in_bytes (type);
8858 bytes = GET_MODE_SIZE (mode);
8859 words = CEIL (bytes, UNITS_PER_WORD);
8862 mode = type_natural_mode (type, NULL, false);
8864 if ((type && POINTER_BOUNDS_TYPE_P (type))
8865 || POINTER_BOUNDS_MODE_P (mode))
8867 /* If we pass bounds in BT then just update remained bounds count. */
8868 if (cum->bnds_in_bt)
8874 /* Update remained number of bounds to force. */
8875 if (cum->force_bnd_pass)
8876 cum->force_bnd_pass--;
8883 /* The first arg not going to Bounds Tables resets this counter. */
8884 cum->bnds_in_bt = 0;
8885 /* For unnamed args we always pass bounds to avoid bounds mess when
8886 passed and received types do not match. If bounds do not follow
8887 unnamed arg, still pretend required number of bounds were passed. */
8888 if (cum->force_bnd_pass)
8890 cum->bnd_regno += cum->force_bnd_pass;
8891 cum->force_bnd_pass = 0;
8896 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8898 if (call_abi == MS_ABI)
8899 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8901 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8904 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8906 /* For stdarg we expect bounds to be passed for each value passed
8909 cum->force_bnd_pass = nregs;
8910 /* For pointers passed in memory we expect bounds passed in Bounds
8913 cum->bnds_in_bt = chkp_type_bounds_count (type);
8916 /* Define where to put the arguments to a function.
8917 Value is zero to push the argument on the stack,
8918 or a hard register in which to store the argument.
8920 MODE is the argument's machine mode.
8921 TYPE is the data type of the argument (as a tree).
8922 This is null for libcalls where that information may
8924 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8925 the preceding args and about the function being called.
8926 NAMED is nonzero if this argument is a named parameter
8927 (otherwise it is an extra parameter matching an ellipsis). */
8930 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8931 machine_mode orig_mode, const_tree type,
8932 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8934 bool error_p = false;
8935 /* Avoid the AL settings for the Unix64 ABI. */
8936 if (mode == VOIDmode)
8941 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8942 bytes in registers. */
8943 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8962 if (words <= cum->nregs)
8964 int regno = cum->regno;
8966 /* Fastcall allocates the first two DWORD (SImode) or
8967 smaller arguments to ECX and EDX if it isn't an
8973 || (type && AGGREGATE_TYPE_P (type)))
8976 /* ECX not EAX is the first allocated register. */
8977 if (regno == AX_REG)
8980 return gen_rtx_REG (mode, regno);
8985 if (cum->float_in_sse == -1)
8987 if (cum->float_in_sse < 2)
8990 if (cum->float_in_sse == -1)
8992 if (cum->float_in_sse < 1)
8996 /* In 32bit, we pass TImode in xmm registers. */
9003 if (!type || !AGGREGATE_TYPE_P (type))
9006 return gen_reg_or_parallel (mode, orig_mode,
9007 cum->sse_regno + FIRST_SSE_REG);
9013 /* OImode and XImode shouldn't be used directly. */
9028 if (!type || !AGGREGATE_TYPE_P (type))
9031 return gen_reg_or_parallel (mode, orig_mode,
9032 cum->sse_regno + FIRST_SSE_REG);
9042 if (!type || !AGGREGATE_TYPE_P (type))
9045 return gen_reg_or_parallel (mode, orig_mode,
9046 cum->mmx_regno + FIRST_MMX_REG);
9052 cum->float_in_sse = 0;
9053 error ("calling %qD with SSE calling convention without "
9054 "SSE/SSE2 enabled", cum->decl);
9055 sorry ("this is a GCC bug that can be worked around by adding "
9056 "attribute used to function called");
9063 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9064 machine_mode orig_mode, const_tree type, bool named)
9066 /* Handle a hidden AL argument containing number of registers
9067 for varargs x86-64 functions. */
9068 if (mode == VOIDmode)
9069 return GEN_INT (cum->maybe_vaarg
9070 ? (cum->sse_nregs < 0
9071 ? X86_64_SSE_REGPARM_MAX
9092 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9098 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9100 &x86_64_int_parameter_registers [cum->regno],
9105 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9106 machine_mode orig_mode, bool named,
9107 HOST_WIDE_INT bytes)
9111 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9112 We use value of -2 to specify that current function call is MSABI. */
9113 if (mode == VOIDmode)
9114 return GEN_INT (-2);
9116 /* If we've run out of registers, it goes on the stack. */
9117 if (cum->nregs == 0)
9120 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9122 /* Only floating point modes are passed in anything but integer regs. */
9123 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9126 regno = cum->regno + FIRST_SSE_REG;
9131 /* Unnamed floating parameters are passed in both the
9132 SSE and integer registers. */
9133 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9134 t2 = gen_rtx_REG (mode, regno);
9135 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9136 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9137 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9140 /* Handle aggregated types passed in register. */
9141 if (orig_mode == BLKmode)
9143 if (bytes > 0 && bytes <= 8)
9144 mode = (bytes > 4 ? DImode : SImode);
9145 if (mode == BLKmode)
9149 return gen_reg_or_parallel (mode, orig_mode, regno);
9152 /* Return where to put the arguments to a function.
9153 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9155 MODE is the argument's machine mode. TYPE is the data type of the
9156 argument. It is null for libcalls where that information may not be
9157 available. CUM gives information about the preceding args and about
9158 the function being called. NAMED is nonzero if this argument is a
9159 named parameter (otherwise it is an extra parameter matching an
9163 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9164 const_tree type, bool named)
9166 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9167 machine_mode mode = omode;
9168 HOST_WIDE_INT bytes, words;
9171 /* All pointer bounds arguments are handled separately here. */
9172 if ((type && POINTER_BOUNDS_TYPE_P (type))
9173 || POINTER_BOUNDS_MODE_P (mode))
9175 /* Return NULL if bounds are forced to go in Bounds Table. */
9176 if (cum->bnds_in_bt)
9178 /* Return the next available bound reg if any. */
9179 else if (cum->bnd_regno <= LAST_BND_REG)
9180 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9181 /* Return the next special slot number otherwise. */
9183 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9188 if (mode == BLKmode)
9189 bytes = int_size_in_bytes (type);
9191 bytes = GET_MODE_SIZE (mode);
9192 words = CEIL (bytes, UNITS_PER_WORD);
9194 /* To simplify the code below, represent vector types with a vector mode
9195 even if MMX/SSE are not active. */
9196 if (type && TREE_CODE (type) == VECTOR_TYPE)
9197 mode = type_natural_mode (type, cum, false);
9201 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9203 if (call_abi == MS_ABI)
9204 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9206 arg = function_arg_64 (cum, mode, omode, type, named);
9209 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9214 /* A C expression that indicates when an argument must be passed by
9215 reference. If nonzero for an argument, a copy of that argument is
9216 made in memory and a pointer to the argument is passed instead of
9217 the argument itself. The pointer is passed in whatever way is
9218 appropriate for passing a pointer to that type. */
9221 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9222 const_tree type, bool)
9224 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9226 /* Bounds are never passed by reference. */
9227 if ((type && POINTER_BOUNDS_TYPE_P (type))
9228 || POINTER_BOUNDS_MODE_P (mode))
9233 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9235 /* See Windows x64 Software Convention. */
9236 if (call_abi == MS_ABI)
9238 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9242 /* Arrays are passed by reference. */
9243 if (TREE_CODE (type) == ARRAY_TYPE)
9246 if (RECORD_OR_UNION_TYPE_P (type))
9248 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9249 are passed by reference. */
9250 msize = int_size_in_bytes (type);
9254 /* __m128 is passed by reference. */
9255 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9257 else if (type && int_size_in_bytes (type) == -1)
9264 /* Return true when TYPE should be 128bit aligned for 32bit argument
9265 passing ABI. XXX: This function is obsolete and is only used for
9266 checking psABI compatibility with previous versions of GCC. */
9269 ix86_compat_aligned_value_p (const_tree type)
9271 machine_mode mode = TYPE_MODE (type);
9272 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9276 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9278 if (TYPE_ALIGN (type) < 128)
9281 if (AGGREGATE_TYPE_P (type))
9283 /* Walk the aggregates recursively. */
9284 switch (TREE_CODE (type))
9288 case QUAL_UNION_TYPE:
9292 /* Walk all the structure fields. */
9293 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9295 if (TREE_CODE (field) == FIELD_DECL
9296 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9303 /* Just for use if some languages passes arrays by value. */
9304 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9315 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9316 XXX: This function is obsolete and is only used for checking psABI
9317 compatibility with previous versions of GCC. */
9320 ix86_compat_function_arg_boundary (machine_mode mode,
9321 const_tree type, unsigned int align)
9323 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9324 natural boundaries. */
9325 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9327 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9328 make an exception for SSE modes since these require 128bit
9331 The handling here differs from field_alignment. ICC aligns MMX
9332 arguments to 4 byte boundaries, while structure fields are aligned
9333 to 8 byte boundaries. */
9336 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9337 align = PARM_BOUNDARY;
9341 if (!ix86_compat_aligned_value_p (type))
9342 align = PARM_BOUNDARY;
9345 if (align > BIGGEST_ALIGNMENT)
9346 align = BIGGEST_ALIGNMENT;
9350 /* Return true when TYPE should be 128bit aligned for 32bit argument
9354 ix86_contains_aligned_value_p (const_tree type)
9356 machine_mode mode = TYPE_MODE (type);
9358 if (mode == XFmode || mode == XCmode)
9361 if (TYPE_ALIGN (type) < 128)
9364 if (AGGREGATE_TYPE_P (type))
9366 /* Walk the aggregates recursively. */
9367 switch (TREE_CODE (type))
9371 case QUAL_UNION_TYPE:
9375 /* Walk all the structure fields. */
9376 for (field = TYPE_FIELDS (type);
9378 field = DECL_CHAIN (field))
9380 if (TREE_CODE (field) == FIELD_DECL
9381 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9388 /* Just for use if some languages passes arrays by value. */
9389 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9398 return TYPE_ALIGN (type) >= 128;
9403 /* Gives the alignment boundary, in bits, of an argument with the
9404 specified mode and type. */
9407 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9412 /* Since the main variant type is used for call, we convert it to
9413 the main variant type. */
9414 type = TYPE_MAIN_VARIANT (type);
9415 align = TYPE_ALIGN (type);
9418 align = GET_MODE_ALIGNMENT (mode);
9419 if (align < PARM_BOUNDARY)
9420 align = PARM_BOUNDARY;
9424 unsigned int saved_align = align;
9428 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9431 if (mode == XFmode || mode == XCmode)
9432 align = PARM_BOUNDARY;
9434 else if (!ix86_contains_aligned_value_p (type))
9435 align = PARM_BOUNDARY;
9438 align = PARM_BOUNDARY;
9443 && align != ix86_compat_function_arg_boundary (mode, type,
9447 inform (input_location,
9448 "The ABI for passing parameters with %d-byte"
9449 " alignment has changed in GCC 4.6",
9450 align / BITS_PER_UNIT);
9457 /* Return true if N is a possible register number of function value. */
9460 ix86_function_value_regno_p (const unsigned int regno)
9467 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9470 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9474 return chkp_function_instrumented_p (current_function_decl);
9476 /* Complex values are returned in %st(0)/%st(1) pair. */
9479 /* TODO: The function should depend on current function ABI but
9480 builtins.c would need updating then. Therefore we use the
9482 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9484 return TARGET_FLOAT_RETURNS_IN_80387;
9486 /* Complex values are returned in %xmm0/%xmm1 pair. */
9492 if (TARGET_MACHO || TARGET_64BIT)
9500 /* Define how to find the value returned by a function.
9501 VALTYPE is the data type of the value (as a tree).
9502 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9503 otherwise, FUNC is 0. */
9506 function_value_32 (machine_mode orig_mode, machine_mode mode,
9507 const_tree fntype, const_tree fn)
9511 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9512 we normally prevent this case when mmx is not available. However
9513 some ABIs may require the result to be returned like DImode. */
9514 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9515 regno = FIRST_MMX_REG;
9517 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9518 we prevent this case when sse is not available. However some ABIs
9519 may require the result to be returned like integer TImode. */
9520 else if (mode == TImode
9521 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9522 regno = FIRST_SSE_REG;
9524 /* 32-byte vector modes in %ymm0. */
9525 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9526 regno = FIRST_SSE_REG;
9528 /* 64-byte vector modes in %zmm0. */
9529 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9530 regno = FIRST_SSE_REG;
9532 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9533 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9534 regno = FIRST_FLOAT_REG;
9536 /* Most things go in %eax. */
9539 /* Override FP return register with %xmm0 for local functions when
9540 SSE math is enabled or for functions with sseregparm attribute. */
9541 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9543 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9544 if (sse_level == -1)
9546 error ("calling %qD with SSE caling convention without "
9547 "SSE/SSE2 enabled", fn);
9548 sorry ("this is a GCC bug that can be worked around by adding "
9549 "attribute used to function called");
9551 else if ((sse_level >= 1 && mode == SFmode)
9552 || (sse_level == 2 && mode == DFmode))
9553 regno = FIRST_SSE_REG;
9556 /* OImode shouldn't be used directly. */
9557 gcc_assert (mode != OImode);
9559 return gen_rtx_REG (orig_mode, regno);
9563 function_value_64 (machine_mode orig_mode, machine_mode mode,
9568 /* Handle libcalls, which don't provide a type node. */
9569 if (valtype == NULL)
9583 regno = FIRST_SSE_REG;
9587 regno = FIRST_FLOAT_REG;
9595 return gen_rtx_REG (mode, regno);
9597 else if (POINTER_TYPE_P (valtype))
9599 /* Pointers are always returned in word_mode. */
9603 ret = construct_container (mode, orig_mode, valtype, 1,
9604 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9605 x86_64_int_return_registers, 0);
9607 /* For zero sized structures, construct_container returns NULL, but we
9608 need to keep rest of compiler happy by returning meaningful value. */
9610 ret = gen_rtx_REG (orig_mode, AX_REG);
9616 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9619 unsigned int regno = AX_REG;
9623 switch (GET_MODE_SIZE (mode))
9626 if (valtype != NULL_TREE
9627 && !VECTOR_INTEGER_TYPE_P (valtype)
9628 && !VECTOR_INTEGER_TYPE_P (valtype)
9629 && !INTEGRAL_TYPE_P (valtype)
9630 && !VECTOR_FLOAT_TYPE_P (valtype))
9632 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9633 && !COMPLEX_MODE_P (mode))
9634 regno = FIRST_SSE_REG;
9638 if (mode == SFmode || mode == DFmode)
9639 regno = FIRST_SSE_REG;
9645 return gen_rtx_REG (orig_mode, regno);
9649 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9650 machine_mode orig_mode, machine_mode mode)
9652 const_tree fn, fntype;
9655 if (fntype_or_decl && DECL_P (fntype_or_decl))
9656 fn = fntype_or_decl;
9657 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9659 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9660 || POINTER_BOUNDS_MODE_P (mode))
9661 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9662 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9663 return function_value_ms_64 (orig_mode, mode, valtype);
9664 else if (TARGET_64BIT)
9665 return function_value_64 (orig_mode, mode, valtype);
9667 return function_value_32 (orig_mode, mode, fntype, fn);
9671 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9673 machine_mode mode, orig_mode;
9675 orig_mode = TYPE_MODE (valtype);
9676 mode = type_natural_mode (valtype, NULL, true);
9677 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9680 /* Return an RTX representing a place where a function returns
9681 or recieves pointer bounds or NULL if no bounds are returned.
9683 VALTYPE is a data type of a value returned by the function.
9685 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9686 or FUNCTION_TYPE of the function.
9688 If OUTGOING is false, return a place in which the caller will
9689 see the return value. Otherwise, return a place where a
9690 function returns a value. */
9693 ix86_function_value_bounds (const_tree valtype,
9694 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9695 bool outgoing ATTRIBUTE_UNUSED)
9699 if (BOUNDED_TYPE_P (valtype))
9700 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9701 else if (chkp_type_has_pointer (valtype))
9706 unsigned i, bnd_no = 0;
9708 bitmap_obstack_initialize (NULL);
9709 slots = BITMAP_ALLOC (NULL);
9710 chkp_find_bound_slots (valtype, slots);
9712 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9714 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9715 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9716 gcc_assert (bnd_no < 2);
9717 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9720 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9722 BITMAP_FREE (slots);
9723 bitmap_obstack_release (NULL);
9731 /* Pointer function arguments and return values are promoted to
9735 ix86_promote_function_mode (const_tree type, machine_mode mode,
9736 int *punsignedp, const_tree fntype,
9739 if (type != NULL_TREE && POINTER_TYPE_P (type))
9741 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9744 return default_promote_function_mode (type, mode, punsignedp, fntype,
9748 /* Return true if a structure, union or array with MODE containing FIELD
9749 should be accessed using BLKmode. */
9752 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9754 /* Union with XFmode must be in BLKmode. */
9755 return (mode == XFmode
9756 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9757 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9761 ix86_libcall_value (machine_mode mode)
9763 return ix86_function_value_1 (NULL, NULL, mode, mode);
9766 /* Return true iff type is returned in memory. */
9769 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9771 #ifdef SUBTARGET_RETURN_IN_MEMORY
9772 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9774 const machine_mode mode = type_natural_mode (type, NULL, true);
9777 if (POINTER_BOUNDS_TYPE_P (type))
9782 if (ix86_function_type_abi (fntype) == MS_ABI)
9784 size = int_size_in_bytes (type);
9786 /* __m128 is returned in xmm0. */
9787 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9788 || INTEGRAL_TYPE_P (type)
9789 || VECTOR_FLOAT_TYPE_P (type))
9790 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9791 && !COMPLEX_MODE_P (mode)
9792 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9795 /* Otherwise, the size must be exactly in [1248]. */
9796 return size != 1 && size != 2 && size != 4 && size != 8;
9800 int needed_intregs, needed_sseregs;
9802 return examine_argument (mode, type, 1,
9803 &needed_intregs, &needed_sseregs);
9808 size = int_size_in_bytes (type);
9810 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9811 bytes in registers. */
9813 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9815 if (mode == BLKmode)
9818 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9821 if (VECTOR_MODE_P (mode) || mode == TImode)
9823 /* User-created vectors small enough to fit in EAX. */
9827 /* Unless ABI prescibes otherwise,
9828 MMX/3dNow values are returned in MM0 if available. */
9831 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9833 /* SSE values are returned in XMM0 if available. */
9837 /* AVX values are returned in YMM0 if available. */
9841 /* AVX512F values are returned in ZMM0 if available. */
9843 return !TARGET_AVX512F;
9852 /* OImode shouldn't be used directly. */
9853 gcc_assert (mode != OImode);
9861 /* Create the va_list data type. */
9864 ix86_build_builtin_va_list_64 (void)
9866 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9868 record = lang_hooks.types.make_type (RECORD_TYPE);
9869 type_decl = build_decl (BUILTINS_LOCATION,
9870 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9872 f_gpr = build_decl (BUILTINS_LOCATION,
9873 FIELD_DECL, get_identifier ("gp_offset"),
9874 unsigned_type_node);
9875 f_fpr = build_decl (BUILTINS_LOCATION,
9876 FIELD_DECL, get_identifier ("fp_offset"),
9877 unsigned_type_node);
9878 f_ovf = build_decl (BUILTINS_LOCATION,
9879 FIELD_DECL, get_identifier ("overflow_arg_area"),
9881 f_sav = build_decl (BUILTINS_LOCATION,
9882 FIELD_DECL, get_identifier ("reg_save_area"),
9885 va_list_gpr_counter_field = f_gpr;
9886 va_list_fpr_counter_field = f_fpr;
9888 DECL_FIELD_CONTEXT (f_gpr) = record;
9889 DECL_FIELD_CONTEXT (f_fpr) = record;
9890 DECL_FIELD_CONTEXT (f_ovf) = record;
9891 DECL_FIELD_CONTEXT (f_sav) = record;
9893 TYPE_STUB_DECL (record) = type_decl;
9894 TYPE_NAME (record) = type_decl;
9895 TYPE_FIELDS (record) = f_gpr;
9896 DECL_CHAIN (f_gpr) = f_fpr;
9897 DECL_CHAIN (f_fpr) = f_ovf;
9898 DECL_CHAIN (f_ovf) = f_sav;
9900 layout_type (record);
9902 /* The correct type is an array type of one element. */
9903 return build_array_type (record, build_index_type (size_zero_node));
9906 /* Setup the builtin va_list data type and for 64-bit the additional
9907 calling convention specific va_list data types. */
9910 ix86_build_builtin_va_list (void)
9914 /* Initialize ABI specific va_list builtin types. */
9915 tree sysv_va_list, ms_va_list;
9917 sysv_va_list = ix86_build_builtin_va_list_64 ();
9918 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9920 /* For MS_ABI we use plain pointer to argument area. */
9921 ms_va_list = build_pointer_type (char_type_node);
9922 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9924 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9928 /* For i386 we use plain pointer to argument area. */
9929 return build_pointer_type (char_type_node);
9933 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9936 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9942 /* GPR size of varargs save area. */
9943 if (cfun->va_list_gpr_size)
9944 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9946 ix86_varargs_gpr_size = 0;
9948 /* FPR size of varargs save area. We don't need it if we don't pass
9949 anything in SSE registers. */
9950 if (TARGET_SSE && cfun->va_list_fpr_size)
9951 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9953 ix86_varargs_fpr_size = 0;
9955 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9958 save_area = frame_pointer_rtx;
9959 set = get_varargs_alias_set ();
9961 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9962 if (max > X86_64_REGPARM_MAX)
9963 max = X86_64_REGPARM_MAX;
9965 for (i = cum->regno; i < max; i++)
9967 mem = gen_rtx_MEM (word_mode,
9968 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9969 MEM_NOTRAP_P (mem) = 1;
9970 set_mem_alias_set (mem, set);
9971 emit_move_insn (mem,
9972 gen_rtx_REG (word_mode,
9973 x86_64_int_parameter_registers[i]));
9976 if (ix86_varargs_fpr_size)
9979 rtx_code_label *label;
9982 /* Now emit code to save SSE registers. The AX parameter contains number
9983 of SSE parameter registers used to call this function, though all we
9984 actually check here is the zero/non-zero status. */
9986 label = gen_label_rtx ();
9987 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9988 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9991 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9992 we used movdqa (i.e. TImode) instead? Perhaps even better would
9993 be if we could determine the real mode of the data, via a hook
9994 into pass_stdarg. Ignore all that for now. */
9996 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9997 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9999 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
10000 if (max > X86_64_SSE_REGPARM_MAX)
10001 max = X86_64_SSE_REGPARM_MAX;
10003 for (i = cum->sse_regno; i < max; ++i)
10005 mem = plus_constant (Pmode, save_area,
10006 i * 16 + ix86_varargs_gpr_size);
10007 mem = gen_rtx_MEM (smode, mem);
10008 MEM_NOTRAP_P (mem) = 1;
10009 set_mem_alias_set (mem, set);
10010 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
10012 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
10015 emit_label (label);
10020 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
10022 alias_set_type set = get_varargs_alias_set ();
10025 /* Reset to zero, as there might be a sysv vaarg used
10027 ix86_varargs_gpr_size = 0;
10028 ix86_varargs_fpr_size = 0;
10030 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
10034 mem = gen_rtx_MEM (Pmode,
10035 plus_constant (Pmode, virtual_incoming_args_rtx,
10036 i * UNITS_PER_WORD));
10037 MEM_NOTRAP_P (mem) = 1;
10038 set_mem_alias_set (mem, set);
10040 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10041 emit_move_insn (mem, reg);
10046 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10047 tree type, int *, int no_rtl)
10049 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10050 CUMULATIVE_ARGS next_cum;
10053 /* This argument doesn't appear to be used anymore. Which is good,
10054 because the old code here didn't suppress rtl generation. */
10055 gcc_assert (!no_rtl);
10060 fntype = TREE_TYPE (current_function_decl);
10062 /* For varargs, we do not want to skip the dummy va_dcl argument.
10063 For stdargs, we do want to skip the last named argument. */
10065 if (stdarg_p (fntype))
10066 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10069 if (cum->call_abi == MS_ABI)
10070 setup_incoming_varargs_ms_64 (&next_cum);
10072 setup_incoming_varargs_64 (&next_cum);
10076 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10077 enum machine_mode mode,
10079 int *pretend_size ATTRIBUTE_UNUSED,
10082 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10083 CUMULATIVE_ARGS next_cum;
10086 int bnd_reg, i, max;
10088 gcc_assert (!no_rtl);
10090 /* Do nothing if we use plain pointer to argument area. */
10091 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10094 fntype = TREE_TYPE (current_function_decl);
10096 /* For varargs, we do not want to skip the dummy va_dcl argument.
10097 For stdargs, we do want to skip the last named argument. */
10099 if (stdarg_p (fntype))
10100 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10102 save_area = frame_pointer_rtx;
10104 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10105 if (max > X86_64_REGPARM_MAX)
10106 max = X86_64_REGPARM_MAX;
10108 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10109 if (chkp_function_instrumented_p (current_function_decl))
10110 for (i = cum->regno; i < max; i++)
10112 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10113 rtx ptr = gen_rtx_REG (Pmode,
10114 x86_64_int_parameter_registers[i]);
10117 if (bnd_reg <= LAST_BND_REG)
10118 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10122 plus_constant (Pmode, arg_pointer_rtx,
10123 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10124 bounds = gen_reg_rtx (BNDmode);
10125 emit_insn (BNDmode == BND64mode
10126 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10127 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10130 emit_insn (BNDmode == BND64mode
10131 ? gen_bnd64_stx (addr, ptr, bounds)
10132 : gen_bnd32_stx (addr, ptr, bounds));
10139 /* Checks if TYPE is of kind va_list char *. */
10142 is_va_list_char_pointer (tree type)
10146 /* For 32-bit it is always true. */
10149 canonic = ix86_canonical_va_list_type (type);
10150 return (canonic == ms_va_list_type_node
10151 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10154 /* Implement va_start. */
10157 ix86_va_start (tree valist, rtx nextarg)
10159 HOST_WIDE_INT words, n_gpr, n_fpr;
10160 tree f_gpr, f_fpr, f_ovf, f_sav;
10161 tree gpr, fpr, ovf, sav, t;
10165 if (flag_split_stack
10166 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10168 unsigned int scratch_regno;
10170 /* When we are splitting the stack, we can't refer to the stack
10171 arguments using internal_arg_pointer, because they may be on
10172 the old stack. The split stack prologue will arrange to
10173 leave a pointer to the old stack arguments in a scratch
10174 register, which we here copy to a pseudo-register. The split
10175 stack prologue can't set the pseudo-register directly because
10176 it (the prologue) runs before any registers have been saved. */
10178 scratch_regno = split_stack_prologue_scratch_regno ();
10179 if (scratch_regno != INVALID_REGNUM)
10184 reg = gen_reg_rtx (Pmode);
10185 cfun->machine->split_stack_varargs_pointer = reg;
10188 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10189 seq = get_insns ();
10192 push_topmost_sequence ();
10193 emit_insn_after (seq, entry_of_function ());
10194 pop_topmost_sequence ();
10198 /* Only 64bit target needs something special. */
10199 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10201 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10202 std_expand_builtin_va_start (valist, nextarg);
10207 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10208 next = expand_binop (ptr_mode, add_optab,
10209 cfun->machine->split_stack_varargs_pointer,
10210 crtl->args.arg_offset_rtx,
10211 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10212 convert_move (va_r, next, 0);
10214 /* Store zero bounds for va_list. */
10215 if (chkp_function_instrumented_p (current_function_decl))
10216 chkp_expand_bounds_reset_for_mem (valist,
10217 make_tree (TREE_TYPE (valist),
10224 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10225 f_fpr = DECL_CHAIN (f_gpr);
10226 f_ovf = DECL_CHAIN (f_fpr);
10227 f_sav = DECL_CHAIN (f_ovf);
10229 valist = build_simple_mem_ref (valist);
10230 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10231 /* The following should be folded into the MEM_REF offset. */
10232 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10234 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10236 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10238 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10241 /* Count number of gp and fp argument registers used. */
10242 words = crtl->args.info.words;
10243 n_gpr = crtl->args.info.regno;
10244 n_fpr = crtl->args.info.sse_regno;
10246 if (cfun->va_list_gpr_size)
10248 type = TREE_TYPE (gpr);
10249 t = build2 (MODIFY_EXPR, type,
10250 gpr, build_int_cst (type, n_gpr * 8));
10251 TREE_SIDE_EFFECTS (t) = 1;
10252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10255 if (TARGET_SSE && cfun->va_list_fpr_size)
10257 type = TREE_TYPE (fpr);
10258 t = build2 (MODIFY_EXPR, type, fpr,
10259 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10260 TREE_SIDE_EFFECTS (t) = 1;
10261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10264 /* Find the overflow area. */
10265 type = TREE_TYPE (ovf);
10266 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10267 ovf_rtx = crtl->args.internal_arg_pointer;
10269 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10270 t = make_tree (type, ovf_rtx);
10272 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10274 /* Store zero bounds for overflow area pointer. */
10275 if (chkp_function_instrumented_p (current_function_decl))
10276 chkp_expand_bounds_reset_for_mem (ovf, t);
10278 t = build2 (MODIFY_EXPR, type, ovf, t);
10279 TREE_SIDE_EFFECTS (t) = 1;
10280 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10282 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10284 /* Find the register save area.
10285 Prologue of the function save it right above stack frame. */
10286 type = TREE_TYPE (sav);
10287 t = make_tree (type, frame_pointer_rtx);
10288 if (!ix86_varargs_gpr_size)
10289 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10291 /* Store zero bounds for save area pointer. */
10292 if (chkp_function_instrumented_p (current_function_decl))
10293 chkp_expand_bounds_reset_for_mem (sav, t);
10295 t = build2 (MODIFY_EXPR, type, sav, t);
10296 TREE_SIDE_EFFECTS (t) = 1;
10297 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10301 /* Implement va_arg. */
10304 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10305 gimple_seq *post_p)
10307 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10308 tree f_gpr, f_fpr, f_ovf, f_sav;
10309 tree gpr, fpr, ovf, sav, t;
10311 tree lab_false, lab_over = NULL_TREE;
10314 int indirect_p = 0;
10316 machine_mode nat_mode;
10317 unsigned int arg_boundary;
10319 /* Only 64bit target needs something special. */
10320 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10321 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10323 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10324 f_fpr = DECL_CHAIN (f_gpr);
10325 f_ovf = DECL_CHAIN (f_fpr);
10326 f_sav = DECL_CHAIN (f_ovf);
10328 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10329 valist, f_gpr, NULL_TREE);
10331 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10332 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10333 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10335 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10337 type = build_pointer_type (type);
10338 size = int_size_in_bytes (type);
10339 rsize = CEIL (size, UNITS_PER_WORD);
10341 nat_mode = type_natural_mode (type, NULL, false);
10356 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10357 if (!TARGET_64BIT_MS_ABI)
10364 container = construct_container (nat_mode, TYPE_MODE (type),
10365 type, 0, X86_64_REGPARM_MAX,
10366 X86_64_SSE_REGPARM_MAX, intreg,
10371 /* Pull the value out of the saved registers. */
10373 addr = create_tmp_var (ptr_type_node, "addr");
10377 int needed_intregs, needed_sseregs;
10379 tree int_addr, sse_addr;
10381 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10382 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10384 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10386 need_temp = (!REG_P (container)
10387 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10388 || TYPE_ALIGN (type) > 128));
10390 /* In case we are passing structure, verify that it is consecutive block
10391 on the register save area. If not we need to do moves. */
10392 if (!need_temp && !REG_P (container))
10394 /* Verify that all registers are strictly consecutive */
10395 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10399 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10401 rtx slot = XVECEXP (container, 0, i);
10402 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10403 || INTVAL (XEXP (slot, 1)) != i * 16)
10411 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10413 rtx slot = XVECEXP (container, 0, i);
10414 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10415 || INTVAL (XEXP (slot, 1)) != i * 8)
10427 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10428 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10431 /* First ensure that we fit completely in registers. */
10432 if (needed_intregs)
10434 t = build_int_cst (TREE_TYPE (gpr),
10435 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10436 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10437 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10438 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10439 gimplify_and_add (t, pre_p);
10441 if (needed_sseregs)
10443 t = build_int_cst (TREE_TYPE (fpr),
10444 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10445 + X86_64_REGPARM_MAX * 8);
10446 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10447 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10448 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10449 gimplify_and_add (t, pre_p);
10452 /* Compute index to start of area used for integer regs. */
10453 if (needed_intregs)
10455 /* int_addr = gpr + sav; */
10456 t = fold_build_pointer_plus (sav, gpr);
10457 gimplify_assign (int_addr, t, pre_p);
10459 if (needed_sseregs)
10461 /* sse_addr = fpr + sav; */
10462 t = fold_build_pointer_plus (sav, fpr);
10463 gimplify_assign (sse_addr, t, pre_p);
10467 int i, prev_size = 0;
10468 tree temp = create_tmp_var (type, "va_arg_tmp");
10470 /* addr = &temp; */
10471 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10472 gimplify_assign (addr, t, pre_p);
10474 for (i = 0; i < XVECLEN (container, 0); i++)
10476 rtx slot = XVECEXP (container, 0, i);
10477 rtx reg = XEXP (slot, 0);
10478 machine_mode mode = GET_MODE (reg);
10482 tree src_addr, src;
10484 tree dest_addr, dest;
10485 int cur_size = GET_MODE_SIZE (mode);
10487 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10488 prev_size = INTVAL (XEXP (slot, 1));
10489 if (prev_size + cur_size > size)
10491 cur_size = size - prev_size;
10492 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10493 if (mode == BLKmode)
10496 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10497 if (mode == GET_MODE (reg))
10498 addr_type = build_pointer_type (piece_type);
10500 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10502 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10505 if (SSE_REGNO_P (REGNO (reg)))
10507 src_addr = sse_addr;
10508 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10512 src_addr = int_addr;
10513 src_offset = REGNO (reg) * 8;
10515 src_addr = fold_convert (addr_type, src_addr);
10516 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10518 dest_addr = fold_convert (daddr_type, addr);
10519 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10520 if (cur_size == GET_MODE_SIZE (mode))
10522 src = build_va_arg_indirect_ref (src_addr);
10523 dest = build_va_arg_indirect_ref (dest_addr);
10525 gimplify_assign (dest, src, pre_p);
10530 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10531 3, dest_addr, src_addr,
10532 size_int (cur_size));
10533 gimplify_and_add (copy, pre_p);
10535 prev_size += cur_size;
10539 if (needed_intregs)
10541 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10542 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10543 gimplify_assign (gpr, t, pre_p);
10546 if (needed_sseregs)
10548 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10549 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10550 gimplify_assign (unshare_expr (fpr), t, pre_p);
10553 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10555 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10558 /* ... otherwise out of the overflow area. */
10560 /* When we align parameter on stack for caller, if the parameter
10561 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10562 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10563 here with caller. */
10564 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10565 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10566 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10568 /* Care for on-stack alignment if needed. */
10569 if (arg_boundary <= 64 || size == 0)
10573 HOST_WIDE_INT align = arg_boundary / 8;
10574 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10575 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10576 build_int_cst (TREE_TYPE (t), -align));
10579 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10580 gimplify_assign (addr, t, pre_p);
10582 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10583 gimplify_assign (unshare_expr (ovf), t, pre_p);
10586 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10588 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10589 addr = fold_convert (ptrtype, addr);
10592 addr = build_va_arg_indirect_ref (addr);
10593 return build_va_arg_indirect_ref (addr);
10596 /* Return true if OPNUM's MEM should be matched
10597 in movabs* patterns. */
10600 ix86_check_movabs (rtx insn, int opnum)
10604 set = PATTERN (insn);
10605 if (GET_CODE (set) == PARALLEL)
10606 set = XVECEXP (set, 0, 0);
10607 gcc_assert (GET_CODE (set) == SET);
10608 mem = XEXP (set, opnum);
10609 while (SUBREG_P (mem))
10610 mem = SUBREG_REG (mem);
10611 gcc_assert (MEM_P (mem));
10612 return volatile_ok || !MEM_VOLATILE_P (mem);
10615 /* Return false if INSN contains a MEM with a non-default address space. */
10617 ix86_check_no_addr_space (rtx insn)
10619 subrtx_var_iterator::array_type array;
10620 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10623 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10629 /* Initialize the table of extra 80387 mathematical constants. */
10632 init_ext_80387_constants (void)
10634 static const char * cst[5] =
10636 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10637 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10638 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10639 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10640 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10644 for (i = 0; i < 5; i++)
10646 real_from_string (&ext_80387_constants_table[i], cst[i]);
10647 /* Ensure each constant is rounded to XFmode precision. */
10648 real_convert (&ext_80387_constants_table[i],
10649 XFmode, &ext_80387_constants_table[i]);
10652 ext_80387_constants_init = 1;
10655 /* Return non-zero if the constant is something that
10656 can be loaded with a special instruction. */
10659 standard_80387_constant_p (rtx x)
10661 machine_mode mode = GET_MODE (x);
10663 const REAL_VALUE_TYPE *r;
10665 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10668 if (x == CONST0_RTX (mode))
10670 if (x == CONST1_RTX (mode))
10673 r = CONST_DOUBLE_REAL_VALUE (x);
10675 /* For XFmode constants, try to find a special 80387 instruction when
10676 optimizing for size or on those CPUs that benefit from them. */
10678 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10682 if (! ext_80387_constants_init)
10683 init_ext_80387_constants ();
10685 for (i = 0; i < 5; i++)
10686 if (real_identical (r, &ext_80387_constants_table[i]))
10690 /* Load of the constant -0.0 or -1.0 will be split as
10691 fldz;fchs or fld1;fchs sequence. */
10692 if (real_isnegzero (r))
10694 if (real_identical (r, &dconstm1))
10700 /* Return the opcode of the special instruction to be used to load
10704 standard_80387_constant_opcode (rtx x)
10706 switch (standard_80387_constant_p (x))
10726 gcc_unreachable ();
10730 /* Return the CONST_DOUBLE representing the 80387 constant that is
10731 loaded by the specified special instruction. The argument IDX
10732 matches the return value from standard_80387_constant_p. */
10735 standard_80387_constant_rtx (int idx)
10739 if (! ext_80387_constants_init)
10740 init_ext_80387_constants ();
10753 gcc_unreachable ();
10756 return const_double_from_real_value (ext_80387_constants_table[i],
10760 /* Return 1 if X is all 0s and 2 if x is all 1s
10761 in supported SSE/AVX vector mode. */
10764 standard_sse_constant_p (rtx x)
10771 mode = GET_MODE (x);
10773 if (x == const0_rtx || x == CONST0_RTX (mode))
10775 if (vector_all_ones_operand (x, mode))
10794 if (TARGET_AVX512F)
10803 /* Return the opcode of the special instruction to be used to load
10807 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10809 switch (standard_sse_constant_p (x))
10812 switch (get_attr_mode (insn))
10815 return "vpxord\t%g0, %g0, %g0";
10817 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10818 : "vpxord\t%g0, %g0, %g0";
10820 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10821 : "vpxorq\t%g0, %g0, %g0";
10823 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10824 : "%vpxor\t%0, %d0";
10826 return "%vxorpd\t%0, %d0";
10828 return "%vxorps\t%0, %d0";
10831 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10832 : "vpxor\t%x0, %x0, %x0";
10834 return "vxorpd\t%x0, %x0, %x0";
10836 return "vxorps\t%x0, %x0, %x0";
10843 if (TARGET_AVX512VL
10844 || get_attr_mode (insn) == MODE_XI
10845 || get_attr_mode (insn) == MODE_V8DF
10846 || get_attr_mode (insn) == MODE_V16SF)
10847 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10849 return "vpcmpeqd\t%0, %0, %0";
10851 return "pcmpeqd\t%0, %0";
10856 gcc_unreachable ();
10859 /* Returns true if OP contains a symbol reference */
10862 symbolic_reference_mentioned_p (rtx op)
10867 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10870 fmt = GET_RTX_FORMAT (GET_CODE (op));
10871 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10877 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10878 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10882 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10889 /* Return true if it is appropriate to emit `ret' instructions in the
10890 body of a function. Do this only if the epilogue is simple, needing a
10891 couple of insns. Prior to reloading, we can't tell how many registers
10892 must be saved, so return false then. Return false if there is no frame
10893 marker to de-allocate. */
10896 ix86_can_use_return_insn_p (void)
10898 struct ix86_frame frame;
10900 if (! reload_completed || frame_pointer_needed)
10903 /* Don't allow more than 32k pop, since that's all we can do
10904 with one instruction. */
10905 if (crtl->args.pops_args && crtl->args.size >= 32768)
10908 ix86_compute_frame_layout (&frame);
10909 return (frame.stack_pointer_offset == UNITS_PER_WORD
10910 && (frame.nregs + frame.nsseregs) == 0);
10913 /* Value should be nonzero if functions must have frame pointers.
10914 Zero means the frame pointer need not be set up (and parms may
10915 be accessed via the stack pointer) in functions that seem suitable. */
10918 ix86_frame_pointer_required (void)
10920 /* If we accessed previous frames, then the generated code expects
10921 to be able to access the saved ebp value in our frame. */
10922 if (cfun->machine->accesses_prev_frame)
10925 /* Several x86 os'es need a frame pointer for other reasons,
10926 usually pertaining to setjmp. */
10927 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10930 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10931 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10934 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10935 allocation is 4GB. */
10936 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10939 /* SSE saves require frame-pointer when stack is misaligned. */
10940 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10943 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10944 turns off the frame pointer by default. Turn it back on now if
10945 we've not got a leaf function. */
10946 if (TARGET_OMIT_LEAF_FRAME_POINTER
10948 || ix86_current_function_calls_tls_descriptor))
10951 if (crtl->profile && !flag_fentry)
10957 /* Record that the current function accesses previous call frames. */
10960 ix86_setup_frame_addresses (void)
10962 cfun->machine->accesses_prev_frame = 1;
10965 #ifndef USE_HIDDEN_LINKONCE
10966 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10967 # define USE_HIDDEN_LINKONCE 1
10969 # define USE_HIDDEN_LINKONCE 0
10973 static int pic_labels_used;
10975 /* Fills in the label name that should be used for a pc thunk for
10976 the given register. */
10979 get_pc_thunk_name (char name[32], unsigned int regno)
10981 gcc_assert (!TARGET_64BIT);
10983 if (USE_HIDDEN_LINKONCE)
10984 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10986 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10990 /* This function generates code for -fpic that loads %ebx with
10991 the return address of the caller and then returns. */
10994 ix86_code_end (void)
10999 for (regno = AX_REG; regno <= SP_REG; regno++)
11004 if (!(pic_labels_used & (1 << regno)))
11007 get_pc_thunk_name (name, regno);
11009 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11010 get_identifier (name),
11011 build_function_type_list (void_type_node, NULL_TREE));
11012 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11013 NULL_TREE, void_type_node);
11014 TREE_PUBLIC (decl) = 1;
11015 TREE_STATIC (decl) = 1;
11016 DECL_IGNORED_P (decl) = 1;
11021 switch_to_section (darwin_sections[text_coal_section]);
11022 fputs ("\t.weak_definition\t", asm_out_file);
11023 assemble_name (asm_out_file, name);
11024 fputs ("\n\t.private_extern\t", asm_out_file);
11025 assemble_name (asm_out_file, name);
11026 putc ('\n', asm_out_file);
11027 ASM_OUTPUT_LABEL (asm_out_file, name);
11028 DECL_WEAK (decl) = 1;
11032 if (USE_HIDDEN_LINKONCE)
11034 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11036 targetm.asm_out.unique_section (decl, 0);
11037 switch_to_section (get_named_section (decl, NULL, 0));
11039 targetm.asm_out.globalize_label (asm_out_file, name);
11040 fputs ("\t.hidden\t", asm_out_file);
11041 assemble_name (asm_out_file, name);
11042 putc ('\n', asm_out_file);
11043 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11047 switch_to_section (text_section);
11048 ASM_OUTPUT_LABEL (asm_out_file, name);
11051 DECL_INITIAL (decl) = make_node (BLOCK);
11052 current_function_decl = decl;
11053 allocate_struct_function (decl, false);
11054 init_function_start (decl);
11055 first_function_block_is_cold = false;
11056 /* Make sure unwind info is emitted for the thunk if needed. */
11057 final_start_function (emit_barrier (), asm_out_file, 1);
11059 /* Pad stack IP move with 4 instructions (two NOPs count
11060 as one instruction). */
11061 if (TARGET_PAD_SHORT_FUNCTION)
11066 fputs ("\tnop\n", asm_out_file);
11069 xops[0] = gen_rtx_REG (Pmode, regno);
11070 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11071 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11072 output_asm_insn ("%!ret", NULL);
11073 final_end_function ();
11074 init_insn_lengths ();
11075 free_after_compilation (cfun);
11077 current_function_decl = NULL;
11080 if (flag_split_stack)
11081 file_end_indicate_split_stack ();
11084 /* Emit code for the SET_GOT patterns. */
11087 output_set_got (rtx dest, rtx label)
11093 if (TARGET_VXWORKS_RTP && flag_pic)
11095 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11096 xops[2] = gen_rtx_MEM (Pmode,
11097 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11098 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11100 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11101 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11102 an unadorned address. */
11103 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11104 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11105 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11109 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11114 get_pc_thunk_name (name, REGNO (dest));
11115 pic_labels_used |= 1 << REGNO (dest);
11117 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11118 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11119 output_asm_insn ("%!call\t%X2", xops);
11122 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11123 This is what will be referenced by the Mach-O PIC subsystem. */
11124 if (machopic_should_output_picbase_label () || !label)
11125 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11127 /* When we are restoring the pic base at the site of a nonlocal label,
11128 and we decided to emit the pic base above, we will still output a
11129 local label used for calculating the correction offset (even though
11130 the offset will be 0 in that case). */
11132 targetm.asm_out.internal_label (asm_out_file, "L",
11133 CODE_LABEL_NUMBER (label));
11139 /* We don't need a pic base, we're not producing pic. */
11140 gcc_unreachable ();
11142 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11143 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11144 targetm.asm_out.internal_label (asm_out_file, "L",
11145 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11149 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11154 /* Generate an "push" pattern for input ARG. */
11159 struct machine_function *m = cfun->machine;
11161 if (m->fs.cfa_reg == stack_pointer_rtx)
11162 m->fs.cfa_offset += UNITS_PER_WORD;
11163 m->fs.sp_offset += UNITS_PER_WORD;
11165 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11166 arg = gen_rtx_REG (word_mode, REGNO (arg));
11168 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11169 gen_rtx_PRE_DEC (Pmode,
11170 stack_pointer_rtx)),
11174 /* Generate an "pop" pattern for input ARG. */
11179 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11180 arg = gen_rtx_REG (word_mode, REGNO (arg));
11182 return gen_rtx_SET (arg,
11183 gen_rtx_MEM (word_mode,
11184 gen_rtx_POST_INC (Pmode,
11185 stack_pointer_rtx)));
11188 /* Return >= 0 if there is an unused call-clobbered register available
11189 for the entire function. */
11191 static unsigned int
11192 ix86_select_alt_pic_regnum (void)
11194 if (ix86_use_pseudo_pic_reg ())
11195 return INVALID_REGNUM;
11199 && !ix86_current_function_calls_tls_descriptor)
11202 /* Can't use the same register for both PIC and DRAP. */
11203 if (crtl->drap_reg)
11204 drap = REGNO (crtl->drap_reg);
11207 for (i = 2; i >= 0; --i)
11208 if (i != drap && !df_regs_ever_live_p (i))
11212 return INVALID_REGNUM;
11215 /* Return TRUE if we need to save REGNO. */
11218 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11220 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11221 && pic_offset_table_rtx)
11223 if (ix86_use_pseudo_pic_reg ())
11225 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11226 _mcount in prologue. */
11227 if (!TARGET_64BIT && flag_pic && crtl->profile)
11230 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11232 || crtl->calls_eh_return
11233 || crtl->uses_const_pool
11234 || cfun->has_nonlocal_label)
11235 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11238 if (crtl->calls_eh_return && maybe_eh_return)
11243 unsigned test = EH_RETURN_DATA_REGNO (i);
11244 if (test == INVALID_REGNUM)
11252 && regno == REGNO (crtl->drap_reg)
11253 && !cfun->machine->no_drap_save_restore)
11256 return (df_regs_ever_live_p (regno)
11257 && !call_used_regs[regno]
11258 && !fixed_regs[regno]
11259 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11262 /* Return number of saved general prupose registers. */
11265 ix86_nsaved_regs (void)
11270 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11271 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11276 /* Return number of saved SSE registers. */
11279 ix86_nsaved_sseregs (void)
11284 if (!TARGET_64BIT_MS_ABI)
11286 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11287 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11292 /* Given FROM and TO register numbers, say whether this elimination is
11293 allowed. If stack alignment is needed, we can only replace argument
11294 pointer with hard frame pointer, or replace frame pointer with stack
11295 pointer. Otherwise, frame pointer elimination is automatically
11296 handled and all other eliminations are valid. */
11299 ix86_can_eliminate (const int from, const int to)
11301 if (stack_realign_fp)
11302 return ((from == ARG_POINTER_REGNUM
11303 && to == HARD_FRAME_POINTER_REGNUM)
11304 || (from == FRAME_POINTER_REGNUM
11305 && to == STACK_POINTER_REGNUM));
11307 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11310 /* Return the offset between two registers, one to be eliminated, and the other
11311 its replacement, at the start of a routine. */
11314 ix86_initial_elimination_offset (int from, int to)
11316 struct ix86_frame frame;
11317 ix86_compute_frame_layout (&frame);
11319 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11320 return frame.hard_frame_pointer_offset;
11321 else if (from == FRAME_POINTER_REGNUM
11322 && to == HARD_FRAME_POINTER_REGNUM)
11323 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11326 gcc_assert (to == STACK_POINTER_REGNUM);
11328 if (from == ARG_POINTER_REGNUM)
11329 return frame.stack_pointer_offset;
11331 gcc_assert (from == FRAME_POINTER_REGNUM);
11332 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11336 /* In a dynamically-aligned function, we can't know the offset from
11337 stack pointer to frame pointer, so we must ensure that setjmp
11338 eliminates fp against the hard fp (%ebp) rather than trying to
11339 index from %esp up to the top of the frame across a gap that is
11340 of unknown (at compile-time) size. */
11342 ix86_builtin_setjmp_frame_value (void)
11344 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11347 /* When using -fsplit-stack, the allocation routines set a field in
11348 the TCB to the bottom of the stack plus this much space, measured
11351 #define SPLIT_STACK_AVAILABLE 256
11353 /* Fill structure ix86_frame about frame of currently computed function. */
11356 ix86_compute_frame_layout (struct ix86_frame *frame)
11358 unsigned HOST_WIDE_INT stack_alignment_needed;
11359 HOST_WIDE_INT offset;
11360 unsigned HOST_WIDE_INT preferred_alignment;
11361 HOST_WIDE_INT size = get_frame_size ();
11362 HOST_WIDE_INT to_allocate;
11364 frame->nregs = ix86_nsaved_regs ();
11365 frame->nsseregs = ix86_nsaved_sseregs ();
11367 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11368 except for function prologues, leaf functions and when the defult
11369 incoming stack boundary is overriden at command line or via
11370 force_align_arg_pointer attribute. */
11371 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11372 && (!crtl->is_leaf || cfun->calls_alloca != 0
11373 || ix86_current_function_calls_tls_descriptor
11374 || ix86_incoming_stack_boundary < 128))
11376 crtl->preferred_stack_boundary = 128;
11377 crtl->stack_alignment_needed = 128;
11380 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11381 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11383 gcc_assert (!size || stack_alignment_needed);
11384 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11385 gcc_assert (preferred_alignment <= stack_alignment_needed);
11387 /* For SEH we have to limit the amount of code movement into the prologue.
11388 At present we do this via a BLOCKAGE, at which point there's very little
11389 scheduling that can be done, which means that there's very little point
11390 in doing anything except PUSHs. */
11392 cfun->machine->use_fast_prologue_epilogue = false;
11394 /* During reload iteration the amount of registers saved can change.
11395 Recompute the value as needed. Do not recompute when amount of registers
11396 didn't change as reload does multiple calls to the function and does not
11397 expect the decision to change within single iteration. */
11398 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11399 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11401 int count = frame->nregs;
11402 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11404 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11406 /* The fast prologue uses move instead of push to save registers. This
11407 is significantly longer, but also executes faster as modern hardware
11408 can execute the moves in parallel, but can't do that for push/pop.
11410 Be careful about choosing what prologue to emit: When function takes
11411 many instructions to execute we may use slow version as well as in
11412 case function is known to be outside hot spot (this is known with
11413 feedback only). Weight the size of function by number of registers
11414 to save as it is cheap to use one or two push instructions but very
11415 slow to use many of them. */
11417 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11418 if (node->frequency < NODE_FREQUENCY_NORMAL
11419 || (flag_branch_probabilities
11420 && node->frequency < NODE_FREQUENCY_HOT))
11421 cfun->machine->use_fast_prologue_epilogue = false;
11423 cfun->machine->use_fast_prologue_epilogue
11424 = !expensive_function_p (count);
11427 frame->save_regs_using_mov
11428 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11429 /* If static stack checking is enabled and done with probes,
11430 the registers need to be saved before allocating the frame. */
11431 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11433 /* Skip return address. */
11434 offset = UNITS_PER_WORD;
11436 /* Skip pushed static chain. */
11437 if (ix86_static_chain_on_stack)
11438 offset += UNITS_PER_WORD;
11440 /* Skip saved base pointer. */
11441 if (frame_pointer_needed)
11442 offset += UNITS_PER_WORD;
11443 frame->hfp_save_offset = offset;
11445 /* The traditional frame pointer location is at the top of the frame. */
11446 frame->hard_frame_pointer_offset = offset;
11448 /* Register save area */
11449 offset += frame->nregs * UNITS_PER_WORD;
11450 frame->reg_save_offset = offset;
11452 /* On SEH target, registers are pushed just before the frame pointer
11455 frame->hard_frame_pointer_offset = offset;
11457 /* Align and set SSE register save area. */
11458 if (frame->nsseregs)
11460 /* The only ABI that has saved SSE registers (Win64) also has a
11461 16-byte aligned default stack, and thus we don't need to be
11462 within the re-aligned local stack frame to save them. In case
11463 incoming stack boundary is aligned to less than 16 bytes,
11464 unaligned move of SSE register will be emitted, so there is
11465 no point to round up the SSE register save area outside the
11466 re-aligned local stack frame to 16 bytes. */
11467 if (ix86_incoming_stack_boundary >= 128)
11468 offset = ROUND_UP (offset, 16);
11469 offset += frame->nsseregs * 16;
11471 frame->sse_reg_save_offset = offset;
11473 /* The re-aligned stack starts here. Values before this point are not
11474 directly comparable with values below this point. In order to make
11475 sure that no value happens to be the same before and after, force
11476 the alignment computation below to add a non-zero value. */
11477 if (stack_realign_fp)
11478 offset = ROUND_UP (offset, stack_alignment_needed);
11481 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11482 offset += frame->va_arg_size;
11484 /* Align start of frame for local function. */
11485 if (stack_realign_fp
11486 || offset != frame->sse_reg_save_offset
11489 || cfun->calls_alloca
11490 || ix86_current_function_calls_tls_descriptor)
11491 offset = ROUND_UP (offset, stack_alignment_needed);
11493 /* Frame pointer points here. */
11494 frame->frame_pointer_offset = offset;
11498 /* Add outgoing arguments area. Can be skipped if we eliminated
11499 all the function calls as dead code.
11500 Skipping is however impossible when function calls alloca. Alloca
11501 expander assumes that last crtl->outgoing_args_size
11502 of stack frame are unused. */
11503 if (ACCUMULATE_OUTGOING_ARGS
11504 && (!crtl->is_leaf || cfun->calls_alloca
11505 || ix86_current_function_calls_tls_descriptor))
11507 offset += crtl->outgoing_args_size;
11508 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11511 frame->outgoing_arguments_size = 0;
11513 /* Align stack boundary. Only needed if we're calling another function
11514 or using alloca. */
11515 if (!crtl->is_leaf || cfun->calls_alloca
11516 || ix86_current_function_calls_tls_descriptor)
11517 offset = ROUND_UP (offset, preferred_alignment);
11519 /* We've reached end of stack frame. */
11520 frame->stack_pointer_offset = offset;
11522 /* Size prologue needs to allocate. */
11523 to_allocate = offset - frame->sse_reg_save_offset;
11525 if ((!to_allocate && frame->nregs <= 1)
11526 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11527 frame->save_regs_using_mov = false;
11529 if (ix86_using_red_zone ()
11530 && crtl->sp_is_unchanging
11532 && !ix86_pc_thunk_call_expanded
11533 && !ix86_current_function_calls_tls_descriptor)
11535 frame->red_zone_size = to_allocate;
11536 if (frame->save_regs_using_mov)
11537 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11538 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11539 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11542 frame->red_zone_size = 0;
11543 frame->stack_pointer_offset -= frame->red_zone_size;
11545 /* The SEH frame pointer location is near the bottom of the frame.
11546 This is enforced by the fact that the difference between the
11547 stack pointer and the frame pointer is limited to 240 bytes in
11548 the unwind data structure. */
11551 HOST_WIDE_INT diff;
11553 /* If we can leave the frame pointer where it is, do so. Also, returns
11554 the establisher frame for __builtin_frame_address (0). */
11555 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11556 if (diff <= SEH_MAX_FRAME_SIZE
11557 && (diff > 240 || (diff & 15) != 0)
11558 && !crtl->accesses_prior_frames)
11560 /* Ideally we'd determine what portion of the local stack frame
11561 (within the constraint of the lowest 240) is most heavily used.
11562 But without that complication, simply bias the frame pointer
11563 by 128 bytes so as to maximize the amount of the local stack
11564 frame that is addressable with 8-bit offsets. */
11565 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11570 /* This is semi-inlined memory_address_length, but simplified
11571 since we know that we're always dealing with reg+offset, and
11572 to avoid having to create and discard all that rtl. */
11575 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11581 /* EBP and R13 cannot be encoded without an offset. */
11582 len = (regno == BP_REG || regno == R13_REG);
11584 else if (IN_RANGE (offset, -128, 127))
11587 /* ESP and R12 must be encoded with a SIB byte. */
11588 if (regno == SP_REG || regno == R12_REG)
11594 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11595 The valid base registers are taken from CFUN->MACHINE->FS. */
11598 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11600 const struct machine_function *m = cfun->machine;
11601 rtx base_reg = NULL;
11602 HOST_WIDE_INT base_offset = 0;
11604 if (m->use_fast_prologue_epilogue)
11606 /* Choose the base register most likely to allow the most scheduling
11607 opportunities. Generally FP is valid throughout the function,
11608 while DRAP must be reloaded within the epilogue. But choose either
11609 over the SP due to increased encoding size. */
11611 if (m->fs.fp_valid)
11613 base_reg = hard_frame_pointer_rtx;
11614 base_offset = m->fs.fp_offset - cfa_offset;
11616 else if (m->fs.drap_valid)
11618 base_reg = crtl->drap_reg;
11619 base_offset = 0 - cfa_offset;
11621 else if (m->fs.sp_valid)
11623 base_reg = stack_pointer_rtx;
11624 base_offset = m->fs.sp_offset - cfa_offset;
11629 HOST_WIDE_INT toffset;
11630 int len = 16, tlen;
11632 /* Choose the base register with the smallest address encoding.
11633 With a tie, choose FP > DRAP > SP. */
11634 if (m->fs.sp_valid)
11636 base_reg = stack_pointer_rtx;
11637 base_offset = m->fs.sp_offset - cfa_offset;
11638 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11640 if (m->fs.drap_valid)
11642 toffset = 0 - cfa_offset;
11643 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11646 base_reg = crtl->drap_reg;
11647 base_offset = toffset;
11651 if (m->fs.fp_valid)
11653 toffset = m->fs.fp_offset - cfa_offset;
11654 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11657 base_reg = hard_frame_pointer_rtx;
11658 base_offset = toffset;
11663 gcc_assert (base_reg != NULL);
11665 return plus_constant (Pmode, base_reg, base_offset);
11668 /* Emit code to save registers in the prologue. */
11671 ix86_emit_save_regs (void)
11673 unsigned int regno;
11676 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11677 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11679 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11680 RTX_FRAME_RELATED_P (insn) = 1;
11684 /* Emit a single register save at CFA - CFA_OFFSET. */
11687 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11688 HOST_WIDE_INT cfa_offset)
11690 struct machine_function *m = cfun->machine;
11691 rtx reg = gen_rtx_REG (mode, regno);
11692 rtx unspec = NULL_RTX;
11693 rtx mem, addr, base, insn;
11694 unsigned int align;
11696 addr = choose_baseaddr (cfa_offset);
11697 mem = gen_frame_mem (mode, addr);
11699 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11700 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11701 set_mem_align (mem, align);
11703 /* SSE saves are not within re-aligned local stack frame.
11704 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11705 to emit unaligned store. */
11706 if (mode == V4SFmode && align < 128)
11707 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11709 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11710 RTX_FRAME_RELATED_P (insn) = 1;
11713 if (GET_CODE (base) == PLUS)
11714 base = XEXP (base, 0);
11715 gcc_checking_assert (REG_P (base));
11717 /* When saving registers into a re-aligned local stack frame, avoid
11718 any tricky guessing by dwarf2out. */
11719 if (m->fs.realigned)
11721 gcc_checking_assert (stack_realign_drap);
11723 if (regno == REGNO (crtl->drap_reg))
11725 /* A bit of a hack. We force the DRAP register to be saved in
11726 the re-aligned stack frame, which provides us with a copy
11727 of the CFA that will last past the prologue. Install it. */
11728 gcc_checking_assert (cfun->machine->fs.fp_valid);
11729 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11730 cfun->machine->fs.fp_offset - cfa_offset);
11731 mem = gen_rtx_MEM (mode, addr);
11732 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11736 /* The frame pointer is a stable reference within the
11737 aligned frame. Use it. */
11738 gcc_checking_assert (cfun->machine->fs.fp_valid);
11739 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11740 cfun->machine->fs.fp_offset - cfa_offset);
11741 mem = gen_rtx_MEM (mode, addr);
11742 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11746 /* The memory may not be relative to the current CFA register,
11747 which means that we may need to generate a new pattern for
11748 use by the unwind info. */
11749 else if (base != m->fs.cfa_reg)
11751 addr = plus_constant (Pmode, m->fs.cfa_reg,
11752 m->fs.cfa_offset - cfa_offset);
11753 mem = gen_rtx_MEM (mode, addr);
11754 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11757 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11760 /* Emit code to save registers using MOV insns.
11761 First register is stored at CFA - CFA_OFFSET. */
11763 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11765 unsigned int regno;
11767 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11768 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11770 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11771 cfa_offset -= UNITS_PER_WORD;
11775 /* Emit code to save SSE registers using MOV insns.
11776 First register is stored at CFA - CFA_OFFSET. */
11778 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11780 unsigned int regno;
11782 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11783 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11785 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11786 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11790 static GTY(()) rtx queued_cfa_restores;
11792 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11793 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11794 Don't add the note if the previously saved value will be left untouched
11795 within stack red-zone till return, as unwinders can find the same value
11796 in the register and on the stack. */
11799 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11801 if (!crtl->shrink_wrapped
11802 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11807 add_reg_note (insn, REG_CFA_RESTORE, reg);
11808 RTX_FRAME_RELATED_P (insn) = 1;
11811 queued_cfa_restores
11812 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11815 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11818 ix86_add_queued_cfa_restore_notes (rtx insn)
11821 if (!queued_cfa_restores)
11823 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11825 XEXP (last, 1) = REG_NOTES (insn);
11826 REG_NOTES (insn) = queued_cfa_restores;
11827 queued_cfa_restores = NULL_RTX;
11828 RTX_FRAME_RELATED_P (insn) = 1;
11831 /* Expand prologue or epilogue stack adjustment.
11832 The pattern exist to put a dependency on all ebp-based memory accesses.
11833 STYLE should be negative if instructions should be marked as frame related,
11834 zero if %r11 register is live and cannot be freely used and positive
11838 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11839 int style, bool set_cfa)
11841 struct machine_function *m = cfun->machine;
11843 bool add_frame_related_expr = false;
11845 if (Pmode == SImode)
11846 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11847 else if (x86_64_immediate_operand (offset, DImode))
11848 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11852 /* r11 is used by indirect sibcall return as well, set before the
11853 epilogue and used after the epilogue. */
11855 tmp = gen_rtx_REG (DImode, R11_REG);
11858 gcc_assert (src != hard_frame_pointer_rtx
11859 && dest != hard_frame_pointer_rtx);
11860 tmp = hard_frame_pointer_rtx;
11862 insn = emit_insn (gen_rtx_SET (tmp, offset));
11864 add_frame_related_expr = true;
11866 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11869 insn = emit_insn (insn);
11871 ix86_add_queued_cfa_restore_notes (insn);
11877 gcc_assert (m->fs.cfa_reg == src);
11878 m->fs.cfa_offset += INTVAL (offset);
11879 m->fs.cfa_reg = dest;
11881 r = gen_rtx_PLUS (Pmode, src, offset);
11882 r = gen_rtx_SET (dest, r);
11883 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11884 RTX_FRAME_RELATED_P (insn) = 1;
11886 else if (style < 0)
11888 RTX_FRAME_RELATED_P (insn) = 1;
11889 if (add_frame_related_expr)
11891 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11892 r = gen_rtx_SET (dest, r);
11893 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11897 if (dest == stack_pointer_rtx)
11899 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11900 bool valid = m->fs.sp_valid;
11902 if (src == hard_frame_pointer_rtx)
11904 valid = m->fs.fp_valid;
11905 ooffset = m->fs.fp_offset;
11907 else if (src == crtl->drap_reg)
11909 valid = m->fs.drap_valid;
11914 /* Else there are two possibilities: SP itself, which we set
11915 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11916 taken care of this by hand along the eh_return path. */
11917 gcc_checking_assert (src == stack_pointer_rtx
11918 || offset == const0_rtx);
11921 m->fs.sp_offset = ooffset - INTVAL (offset);
11922 m->fs.sp_valid = valid;
11926 /* Find an available register to be used as dynamic realign argument
11927 pointer regsiter. Such a register will be written in prologue and
11928 used in begin of body, so it must not be
11929 1. parameter passing register.
11931 We reuse static-chain register if it is available. Otherwise, we
11932 use DI for i386 and R13 for x86-64. We chose R13 since it has
11935 Return: the regno of chosen register. */
11937 static unsigned int
11938 find_drap_reg (void)
11940 tree decl = cfun->decl;
11944 /* Use R13 for nested function or function need static chain.
11945 Since function with tail call may use any caller-saved
11946 registers in epilogue, DRAP must not use caller-saved
11947 register in such case. */
11948 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11955 /* Use DI for nested function or function need static chain.
11956 Since function with tail call may use any caller-saved
11957 registers in epilogue, DRAP must not use caller-saved
11958 register in such case. */
11959 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11962 /* Reuse static chain register if it isn't used for parameter
11964 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11966 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11967 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11974 /* Handle a "force_align_arg_pointer" attribute. */
11977 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11978 tree, int, bool *no_add_attrs)
11980 if (TREE_CODE (*node) != FUNCTION_TYPE
11981 && TREE_CODE (*node) != METHOD_TYPE
11982 && TREE_CODE (*node) != FIELD_DECL
11983 && TREE_CODE (*node) != TYPE_DECL)
11985 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11987 *no_add_attrs = true;
11993 /* Return minimum incoming stack alignment. */
11995 static unsigned int
11996 ix86_minimum_incoming_stack_boundary (bool sibcall)
11998 unsigned int incoming_stack_boundary;
12000 /* Prefer the one specified at command line. */
12001 if (ix86_user_incoming_stack_boundary)
12002 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12003 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12004 if -mstackrealign is used, it isn't used for sibcall check and
12005 estimated stack alignment is 128bit. */
12007 && ix86_force_align_arg_pointer
12008 && crtl->stack_alignment_estimated == 128)
12009 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12011 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12013 /* Incoming stack alignment can be changed on individual functions
12014 via force_align_arg_pointer attribute. We use the smallest
12015 incoming stack boundary. */
12016 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12017 && lookup_attribute (ix86_force_align_arg_pointer_string,
12018 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12019 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12021 /* The incoming stack frame has to be aligned at least at
12022 parm_stack_boundary. */
12023 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12024 incoming_stack_boundary = crtl->parm_stack_boundary;
12026 /* Stack at entrance of main is aligned by runtime. We use the
12027 smallest incoming stack boundary. */
12028 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12029 && DECL_NAME (current_function_decl)
12030 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12031 && DECL_FILE_SCOPE_P (current_function_decl))
12032 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12034 return incoming_stack_boundary;
12037 /* Update incoming stack boundary and estimated stack alignment. */
12040 ix86_update_stack_boundary (void)
12042 ix86_incoming_stack_boundary
12043 = ix86_minimum_incoming_stack_boundary (false);
12045 /* x86_64 vararg needs 16byte stack alignment for register save
12049 && crtl->stack_alignment_estimated < 128)
12050 crtl->stack_alignment_estimated = 128;
12052 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12053 if (ix86_tls_descriptor_calls_expanded_in_cfun
12054 && crtl->preferred_stack_boundary < 128)
12055 crtl->preferred_stack_boundary = 128;
12058 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12059 needed or an rtx for DRAP otherwise. */
12062 ix86_get_drap_rtx (void)
12064 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12065 crtl->need_drap = true;
12067 if (stack_realign_drap)
12069 /* Assign DRAP to vDRAP and returns vDRAP */
12070 unsigned int regno = find_drap_reg ();
12073 rtx_insn *seq, *insn;
12075 arg_ptr = gen_rtx_REG (Pmode, regno);
12076 crtl->drap_reg = arg_ptr;
12079 drap_vreg = copy_to_reg (arg_ptr);
12080 seq = get_insns ();
12083 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12086 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12087 RTX_FRAME_RELATED_P (insn) = 1;
12095 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12098 ix86_internal_arg_pointer (void)
12100 return virtual_incoming_args_rtx;
12103 struct scratch_reg {
12108 /* Return a short-lived scratch register for use on function entry.
12109 In 32-bit mode, it is valid only after the registers are saved
12110 in the prologue. This register must be released by means of
12111 release_scratch_register_on_entry once it is dead. */
12114 get_scratch_register_on_entry (struct scratch_reg *sr)
12122 /* We always use R11 in 64-bit mode. */
12127 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12129 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12131 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12132 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12133 int regparm = ix86_function_regparm (fntype, decl);
12135 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12137 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12138 for the static chain register. */
12139 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12140 && drap_regno != AX_REG)
12142 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12143 for the static chain register. */
12144 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12146 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12148 /* ecx is the static chain register. */
12149 else if (regparm < 3 && !fastcall_p && !thiscall_p
12151 && drap_regno != CX_REG)
12153 else if (ix86_save_reg (BX_REG, true))
12155 /* esi is the static chain register. */
12156 else if (!(regparm == 3 && static_chain_p)
12157 && ix86_save_reg (SI_REG, true))
12159 else if (ix86_save_reg (DI_REG, true))
12163 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12168 sr->reg = gen_rtx_REG (Pmode, regno);
12171 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12172 RTX_FRAME_RELATED_P (insn) = 1;
12176 /* Release a scratch register obtained from the preceding function. */
12179 release_scratch_register_on_entry (struct scratch_reg *sr)
12183 struct machine_function *m = cfun->machine;
12184 rtx x, insn = emit_insn (gen_pop (sr->reg));
12186 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12187 RTX_FRAME_RELATED_P (insn) = 1;
12188 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12189 x = gen_rtx_SET (stack_pointer_rtx, x);
12190 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12191 m->fs.sp_offset -= UNITS_PER_WORD;
12195 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12197 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12200 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12202 /* We skip the probe for the first interval + a small dope of 4 words and
12203 probe that many bytes past the specified size to maintain a protection
12204 area at the botton of the stack. */
12205 const int dope = 4 * UNITS_PER_WORD;
12206 rtx size_rtx = GEN_INT (size), last;
12208 /* See if we have a constant small number of probes to generate. If so,
12209 that's the easy case. The run-time loop is made up of 9 insns in the
12210 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12211 for n # of intervals. */
12212 if (size <= 4 * PROBE_INTERVAL)
12214 HOST_WIDE_INT i, adjust;
12215 bool first_probe = true;
12217 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12218 values of N from 1 until it exceeds SIZE. If only one probe is
12219 needed, this will not generate any code. Then adjust and probe
12220 to PROBE_INTERVAL + SIZE. */
12221 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12225 adjust = 2 * PROBE_INTERVAL + dope;
12226 first_probe = false;
12229 adjust = PROBE_INTERVAL;
12231 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12232 plus_constant (Pmode, stack_pointer_rtx,
12234 emit_stack_probe (stack_pointer_rtx);
12238 adjust = size + PROBE_INTERVAL + dope;
12240 adjust = size + PROBE_INTERVAL - i;
12242 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12243 plus_constant (Pmode, stack_pointer_rtx,
12245 emit_stack_probe (stack_pointer_rtx);
12247 /* Adjust back to account for the additional first interval. */
12248 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12249 plus_constant (Pmode, stack_pointer_rtx,
12250 PROBE_INTERVAL + dope)));
12253 /* Otherwise, do the same as above, but in a loop. Note that we must be
12254 extra careful with variables wrapping around because we might be at
12255 the very top (or the very bottom) of the address space and we have
12256 to be able to handle this case properly; in particular, we use an
12257 equality test for the loop condition. */
12260 HOST_WIDE_INT rounded_size;
12261 struct scratch_reg sr;
12263 get_scratch_register_on_entry (&sr);
12266 /* Step 1: round SIZE to the previous multiple of the interval. */
12268 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12271 /* Step 2: compute initial and final value of the loop counter. */
12273 /* SP = SP_0 + PROBE_INTERVAL. */
12274 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12275 plus_constant (Pmode, stack_pointer_rtx,
12276 - (PROBE_INTERVAL + dope))));
12278 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12279 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12280 emit_insn (gen_rtx_SET (sr.reg,
12281 plus_constant (Pmode, stack_pointer_rtx,
12285 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12286 emit_insn (gen_rtx_SET (sr.reg,
12287 gen_rtx_PLUS (Pmode, sr.reg,
12288 stack_pointer_rtx)));
12292 /* Step 3: the loop
12296 SP = SP + PROBE_INTERVAL
12299 while (SP != LAST_ADDR)
12301 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12302 values of N from 1 until it is equal to ROUNDED_SIZE. */
12304 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12307 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12308 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12310 if (size != rounded_size)
12312 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12313 plus_constant (Pmode, stack_pointer_rtx,
12314 rounded_size - size)));
12315 emit_stack_probe (stack_pointer_rtx);
12318 /* Adjust back to account for the additional first interval. */
12319 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12320 plus_constant (Pmode, stack_pointer_rtx,
12321 PROBE_INTERVAL + dope)));
12323 release_scratch_register_on_entry (&sr);
12326 /* Even if the stack pointer isn't the CFA register, we need to correctly
12327 describe the adjustments made to it, in particular differentiate the
12328 frame-related ones from the frame-unrelated ones. */
12331 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12332 XVECEXP (expr, 0, 0)
12333 = gen_rtx_SET (stack_pointer_rtx,
12334 plus_constant (Pmode, stack_pointer_rtx, -size));
12335 XVECEXP (expr, 0, 1)
12336 = gen_rtx_SET (stack_pointer_rtx,
12337 plus_constant (Pmode, stack_pointer_rtx,
12338 PROBE_INTERVAL + dope + size));
12339 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12340 RTX_FRAME_RELATED_P (last) = 1;
12342 cfun->machine->fs.sp_offset += size;
12345 /* Make sure nothing is scheduled before we are done. */
12346 emit_insn (gen_blockage ());
12349 /* Adjust the stack pointer up to REG while probing it. */
12352 output_adjust_stack_and_probe (rtx reg)
12354 static int labelno = 0;
12358 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12361 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12363 /* SP = SP + PROBE_INTERVAL. */
12364 xops[0] = stack_pointer_rtx;
12365 xops[1] = GEN_INT (PROBE_INTERVAL);
12366 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12369 xops[1] = const0_rtx;
12370 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12372 /* Test if SP == LAST_ADDR. */
12373 xops[0] = stack_pointer_rtx;
12375 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12378 fputs ("\tjne\t", asm_out_file);
12379 assemble_name_raw (asm_out_file, loop_lab);
12380 fputc ('\n', asm_out_file);
12385 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12386 inclusive. These are offsets from the current stack pointer. */
12389 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12391 /* See if we have a constant small number of probes to generate. If so,
12392 that's the easy case. The run-time loop is made up of 6 insns in the
12393 generic case while the compile-time loop is made up of n insns for n #
12395 if (size <= 6 * PROBE_INTERVAL)
12399 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12400 it exceeds SIZE. If only one probe is needed, this will not
12401 generate any code. Then probe at FIRST + SIZE. */
12402 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12403 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12406 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12410 /* Otherwise, do the same as above, but in a loop. Note that we must be
12411 extra careful with variables wrapping around because we might be at
12412 the very top (or the very bottom) of the address space and we have
12413 to be able to handle this case properly; in particular, we use an
12414 equality test for the loop condition. */
12417 HOST_WIDE_INT rounded_size, last;
12418 struct scratch_reg sr;
12420 get_scratch_register_on_entry (&sr);
12423 /* Step 1: round SIZE to the previous multiple of the interval. */
12425 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12428 /* Step 2: compute initial and final value of the loop counter. */
12430 /* TEST_OFFSET = FIRST. */
12431 emit_move_insn (sr.reg, GEN_INT (-first));
12433 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12434 last = first + rounded_size;
12437 /* Step 3: the loop
12441 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12444 while (TEST_ADDR != LAST_ADDR)
12446 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12447 until it is equal to ROUNDED_SIZE. */
12449 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12452 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12453 that SIZE is equal to ROUNDED_SIZE. */
12455 if (size != rounded_size)
12456 emit_stack_probe (plus_constant (Pmode,
12457 gen_rtx_PLUS (Pmode,
12460 rounded_size - size));
12462 release_scratch_register_on_entry (&sr);
12465 /* Make sure nothing is scheduled before we are done. */
12466 emit_insn (gen_blockage ());
12469 /* Probe a range of stack addresses from REG to END, inclusive. These are
12470 offsets from the current stack pointer. */
12473 output_probe_stack_range (rtx reg, rtx end)
12475 static int labelno = 0;
12479 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12482 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12484 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12486 xops[1] = GEN_INT (PROBE_INTERVAL);
12487 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12489 /* Probe at TEST_ADDR. */
12490 xops[0] = stack_pointer_rtx;
12492 xops[2] = const0_rtx;
12493 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12495 /* Test if TEST_ADDR == LAST_ADDR. */
12498 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12501 fputs ("\tjne\t", asm_out_file);
12502 assemble_name_raw (asm_out_file, loop_lab);
12503 fputc ('\n', asm_out_file);
12508 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12509 to be generated in correct form. */
12511 ix86_finalize_stack_realign_flags (void)
12513 /* Check if stack realign is really needed after reload, and
12514 stores result in cfun */
12515 unsigned int incoming_stack_boundary
12516 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12517 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12518 unsigned int stack_realign
12519 = (incoming_stack_boundary
12520 < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12521 ? crtl->max_used_stack_slot_alignment
12522 : crtl->stack_alignment_needed));
12524 if (crtl->stack_realign_finalized)
12526 /* After stack_realign_needed is finalized, we can't no longer
12528 gcc_assert (crtl->stack_realign_needed == stack_realign);
12532 /* If the only reason for frame_pointer_needed is that we conservatively
12533 assumed stack realignment might be needed, but in the end nothing that
12534 needed the stack alignment had been spilled, clear frame_pointer_needed
12535 and say we don't need stack realignment. */
12537 && frame_pointer_needed
12539 && flag_omit_frame_pointer
12540 && crtl->sp_is_unchanging
12541 && !ix86_current_function_calls_tls_descriptor
12542 && !crtl->accesses_prior_frames
12543 && !cfun->calls_alloca
12544 && !crtl->calls_eh_return
12545 /* See ira_setup_eliminable_regset for the rationale. */
12546 && !(STACK_CHECK_MOVING_SP
12547 && flag_stack_check
12549 && cfun->can_throw_non_call_exceptions)
12550 && !ix86_frame_pointer_required ()
12551 && get_frame_size () == 0
12552 && ix86_nsaved_sseregs () == 0
12553 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12555 HARD_REG_SET set_up_by_prologue, prologue_used;
12558 CLEAR_HARD_REG_SET (prologue_used);
12559 CLEAR_HARD_REG_SET (set_up_by_prologue);
12560 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12561 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12562 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12563 HARD_FRAME_POINTER_REGNUM);
12564 FOR_EACH_BB_FN (bb, cfun)
12567 FOR_BB_INSNS (bb, insn)
12568 if (NONDEBUG_INSN_P (insn)
12569 && requires_stack_frame_p (insn, prologue_used,
12570 set_up_by_prologue))
12572 crtl->stack_realign_needed = stack_realign;
12573 crtl->stack_realign_finalized = true;
12578 /* If drap has been set, but it actually isn't live at the start
12579 of the function, there is no reason to set it up. */
12580 if (crtl->drap_reg)
12582 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12583 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12585 crtl->drap_reg = NULL_RTX;
12586 crtl->need_drap = false;
12590 cfun->machine->no_drap_save_restore = true;
12592 frame_pointer_needed = false;
12593 stack_realign = false;
12594 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12595 crtl->stack_alignment_needed = incoming_stack_boundary;
12596 crtl->stack_alignment_estimated = incoming_stack_boundary;
12597 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12598 crtl->preferred_stack_boundary = incoming_stack_boundary;
12599 df_finish_pass (true);
12600 df_scan_alloc (NULL);
12602 df_compute_regs_ever_live (true);
12606 crtl->stack_realign_needed = stack_realign;
12607 crtl->stack_realign_finalized = true;
12610 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12613 ix86_elim_entry_set_got (rtx reg)
12615 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12616 rtx_insn *c_insn = BB_HEAD (bb);
12617 if (!NONDEBUG_INSN_P (c_insn))
12618 c_insn = next_nonnote_nondebug_insn (c_insn);
12619 if (c_insn && NONJUMP_INSN_P (c_insn))
12621 rtx pat = PATTERN (c_insn);
12622 if (GET_CODE (pat) == PARALLEL)
12624 rtx vec = XVECEXP (pat, 0, 0);
12625 if (GET_CODE (vec) == SET
12626 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12627 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12628 delete_insn (c_insn);
12633 /* Expand the prologue into a bunch of separate insns. */
12636 ix86_expand_prologue (void)
12638 struct machine_function *m = cfun->machine;
12640 struct ix86_frame frame;
12641 HOST_WIDE_INT allocate;
12642 bool int_registers_saved;
12643 bool sse_registers_saved;
12644 rtx static_chain = NULL_RTX;
12646 ix86_finalize_stack_realign_flags ();
12648 /* DRAP should not coexist with stack_realign_fp */
12649 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12651 memset (&m->fs, 0, sizeof (m->fs));
12653 /* Initialize CFA state for before the prologue. */
12654 m->fs.cfa_reg = stack_pointer_rtx;
12655 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12657 /* Track SP offset to the CFA. We continue tracking this after we've
12658 swapped the CFA register away from SP. In the case of re-alignment
12659 this is fudged; we're interested to offsets within the local frame. */
12660 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12661 m->fs.sp_valid = true;
12663 ix86_compute_frame_layout (&frame);
12665 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12667 /* We should have already generated an error for any use of
12668 ms_hook on a nested function. */
12669 gcc_checking_assert (!ix86_static_chain_on_stack);
12671 /* Check if profiling is active and we shall use profiling before
12672 prologue variant. If so sorry. */
12673 if (crtl->profile && flag_fentry != 0)
12674 sorry ("ms_hook_prologue attribute isn%'t compatible "
12675 "with -mfentry for 32-bit");
12677 /* In ix86_asm_output_function_label we emitted:
12678 8b ff movl.s %edi,%edi
12680 8b ec movl.s %esp,%ebp
12682 This matches the hookable function prologue in Win32 API
12683 functions in Microsoft Windows XP Service Pack 2 and newer.
12684 Wine uses this to enable Windows apps to hook the Win32 API
12685 functions provided by Wine.
12687 What that means is that we've already set up the frame pointer. */
12689 if (frame_pointer_needed
12690 && !(crtl->drap_reg && crtl->stack_realign_needed))
12694 /* We've decided to use the frame pointer already set up.
12695 Describe this to the unwinder by pretending that both
12696 push and mov insns happen right here.
12698 Putting the unwind info here at the end of the ms_hook
12699 is done so that we can make absolutely certain we get
12700 the required byte sequence at the start of the function,
12701 rather than relying on an assembler that can produce
12702 the exact encoding required.
12704 However it does mean (in the unpatched case) that we have
12705 a 1 insn window where the asynchronous unwind info is
12706 incorrect. However, if we placed the unwind info at
12707 its correct location we would have incorrect unwind info
12708 in the patched case. Which is probably all moot since
12709 I don't expect Wine generates dwarf2 unwind info for the
12710 system libraries that use this feature. */
12712 insn = emit_insn (gen_blockage ());
12714 push = gen_push (hard_frame_pointer_rtx);
12715 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12716 stack_pointer_rtx);
12717 RTX_FRAME_RELATED_P (push) = 1;
12718 RTX_FRAME_RELATED_P (mov) = 1;
12720 RTX_FRAME_RELATED_P (insn) = 1;
12721 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12722 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12724 /* Note that gen_push incremented m->fs.cfa_offset, even
12725 though we didn't emit the push insn here. */
12726 m->fs.cfa_reg = hard_frame_pointer_rtx;
12727 m->fs.fp_offset = m->fs.cfa_offset;
12728 m->fs.fp_valid = true;
12732 /* The frame pointer is not needed so pop %ebp again.
12733 This leaves us with a pristine state. */
12734 emit_insn (gen_pop (hard_frame_pointer_rtx));
12738 /* The first insn of a function that accepts its static chain on the
12739 stack is to push the register that would be filled in by a direct
12740 call. This insn will be skipped by the trampoline. */
12741 else if (ix86_static_chain_on_stack)
12743 static_chain = ix86_static_chain (cfun->decl, false);
12744 insn = emit_insn (gen_push (static_chain));
12745 emit_insn (gen_blockage ());
12747 /* We don't want to interpret this push insn as a register save,
12748 only as a stack adjustment. The real copy of the register as
12749 a save will be done later, if needed. */
12750 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12751 t = gen_rtx_SET (stack_pointer_rtx, t);
12752 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12753 RTX_FRAME_RELATED_P (insn) = 1;
12756 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12757 of DRAP is needed and stack realignment is really needed after reload */
12758 if (stack_realign_drap)
12760 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12762 /* Only need to push parameter pointer reg if it is caller saved. */
12763 if (!call_used_regs[REGNO (crtl->drap_reg)])
12765 /* Push arg pointer reg */
12766 insn = emit_insn (gen_push (crtl->drap_reg));
12767 RTX_FRAME_RELATED_P (insn) = 1;
12770 /* Grab the argument pointer. */
12771 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12772 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12773 RTX_FRAME_RELATED_P (insn) = 1;
12774 m->fs.cfa_reg = crtl->drap_reg;
12775 m->fs.cfa_offset = 0;
12777 /* Align the stack. */
12778 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12780 GEN_INT (-align_bytes)));
12781 RTX_FRAME_RELATED_P (insn) = 1;
12783 /* Replicate the return address on the stack so that return
12784 address can be reached via (argp - 1) slot. This is needed
12785 to implement macro RETURN_ADDR_RTX and intrinsic function
12786 expand_builtin_return_addr etc. */
12787 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12788 t = gen_frame_mem (word_mode, t);
12789 insn = emit_insn (gen_push (t));
12790 RTX_FRAME_RELATED_P (insn) = 1;
12792 /* For the purposes of frame and register save area addressing,
12793 we've started over with a new frame. */
12794 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12795 m->fs.realigned = true;
12799 /* Replicate static chain on the stack so that static chain
12800 can be reached via (argp - 2) slot. This is needed for
12801 nested function with stack realignment. */
12802 insn = emit_insn (gen_push (static_chain));
12803 RTX_FRAME_RELATED_P (insn) = 1;
12807 int_registers_saved = (frame.nregs == 0);
12808 sse_registers_saved = (frame.nsseregs == 0);
12810 if (frame_pointer_needed && !m->fs.fp_valid)
12812 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12813 slower on all targets. Also sdb doesn't like it. */
12814 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12815 RTX_FRAME_RELATED_P (insn) = 1;
12817 /* Push registers now, before setting the frame pointer
12819 if (!int_registers_saved
12821 && !frame.save_regs_using_mov)
12823 ix86_emit_save_regs ();
12824 int_registers_saved = true;
12825 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12828 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12830 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12831 RTX_FRAME_RELATED_P (insn) = 1;
12833 if (m->fs.cfa_reg == stack_pointer_rtx)
12834 m->fs.cfa_reg = hard_frame_pointer_rtx;
12835 m->fs.fp_offset = m->fs.sp_offset;
12836 m->fs.fp_valid = true;
12840 if (!int_registers_saved)
12842 /* If saving registers via PUSH, do so now. */
12843 if (!frame.save_regs_using_mov)
12845 ix86_emit_save_regs ();
12846 int_registers_saved = true;
12847 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12850 /* When using red zone we may start register saving before allocating
12851 the stack frame saving one cycle of the prologue. However, avoid
12852 doing this if we have to probe the stack; at least on x86_64 the
12853 stack probe can turn into a call that clobbers a red zone location. */
12854 else if (ix86_using_red_zone ()
12855 && (! TARGET_STACK_PROBE
12856 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12858 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12859 int_registers_saved = true;
12863 if (stack_realign_fp)
12865 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12866 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12868 /* The computation of the size of the re-aligned stack frame means
12869 that we must allocate the size of the register save area before
12870 performing the actual alignment. Otherwise we cannot guarantee
12871 that there's enough storage above the realignment point. */
12872 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12873 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12874 GEN_INT (m->fs.sp_offset
12875 - frame.sse_reg_save_offset),
12878 /* Align the stack. */
12879 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12881 GEN_INT (-align_bytes)));
12883 /* For the purposes of register save area addressing, the stack
12884 pointer is no longer valid. As for the value of sp_offset,
12885 see ix86_compute_frame_layout, which we need to match in order
12886 to pass verification of stack_pointer_offset at the end. */
12887 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12888 m->fs.sp_valid = false;
12891 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12893 if (flag_stack_usage_info)
12895 /* We start to count from ARG_POINTER. */
12896 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12898 /* If it was realigned, take into account the fake frame. */
12899 if (stack_realign_drap)
12901 if (ix86_static_chain_on_stack)
12902 stack_size += UNITS_PER_WORD;
12904 if (!call_used_regs[REGNO (crtl->drap_reg)])
12905 stack_size += UNITS_PER_WORD;
12907 /* This over-estimates by 1 minimal-stack-alignment-unit but
12908 mitigates that by counting in the new return address slot. */
12909 current_function_dynamic_stack_size
12910 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12913 current_function_static_stack_size = stack_size;
12916 /* On SEH target with very large frame size, allocate an area to save
12917 SSE registers (as the very large allocation won't be described). */
12919 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12920 && !sse_registers_saved)
12922 HOST_WIDE_INT sse_size =
12923 frame.sse_reg_save_offset - frame.reg_save_offset;
12925 gcc_assert (int_registers_saved);
12927 /* No need to do stack checking as the area will be immediately
12929 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12930 GEN_INT (-sse_size), -1,
12931 m->fs.cfa_reg == stack_pointer_rtx);
12932 allocate -= sse_size;
12933 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12934 sse_registers_saved = true;
12937 /* The stack has already been decremented by the instruction calling us
12938 so probe if the size is non-negative to preserve the protection area. */
12939 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12941 /* We expect the registers to be saved when probes are used. */
12942 gcc_assert (int_registers_saved);
12944 if (STACK_CHECK_MOVING_SP)
12946 if (!(crtl->is_leaf && !cfun->calls_alloca
12947 && allocate <= PROBE_INTERVAL))
12949 ix86_adjust_stack_and_probe (allocate);
12955 HOST_WIDE_INT size = allocate;
12957 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12958 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12960 if (TARGET_STACK_PROBE)
12962 if (crtl->is_leaf && !cfun->calls_alloca)
12964 if (size > PROBE_INTERVAL)
12965 ix86_emit_probe_stack_range (0, size);
12968 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12972 if (crtl->is_leaf && !cfun->calls_alloca)
12974 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12975 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12976 size - STACK_CHECK_PROTECT);
12979 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
12986 else if (!ix86_target_stack_probe ()
12987 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
12989 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12990 GEN_INT (-allocate), -1,
12991 m->fs.cfa_reg == stack_pointer_rtx);
12995 rtx eax = gen_rtx_REG (Pmode, AX_REG);
12997 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
12998 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
12999 bool eax_live = ix86_eax_live_at_start_p ();
13000 bool r10_live = false;
13003 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13007 insn = emit_insn (gen_push (eax));
13008 allocate -= UNITS_PER_WORD;
13009 /* Note that SEH directives need to continue tracking the stack
13010 pointer even after the frame pointer has been set up. */
13011 if (sp_is_cfa_reg || TARGET_SEH)
13014 m->fs.cfa_offset += UNITS_PER_WORD;
13015 RTX_FRAME_RELATED_P (insn) = 1;
13016 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13017 gen_rtx_SET (stack_pointer_rtx,
13018 plus_constant (Pmode, stack_pointer_rtx,
13019 -UNITS_PER_WORD)));
13025 r10 = gen_rtx_REG (Pmode, R10_REG);
13026 insn = emit_insn (gen_push (r10));
13027 allocate -= UNITS_PER_WORD;
13028 if (sp_is_cfa_reg || TARGET_SEH)
13031 m->fs.cfa_offset += UNITS_PER_WORD;
13032 RTX_FRAME_RELATED_P (insn) = 1;
13033 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13034 gen_rtx_SET (stack_pointer_rtx,
13035 plus_constant (Pmode, stack_pointer_rtx,
13036 -UNITS_PER_WORD)));
13040 emit_move_insn (eax, GEN_INT (allocate));
13041 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13043 /* Use the fact that AX still contains ALLOCATE. */
13044 adjust_stack_insn = (Pmode == DImode
13045 ? gen_pro_epilogue_adjust_stack_di_sub
13046 : gen_pro_epilogue_adjust_stack_si_sub);
13048 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13049 stack_pointer_rtx, eax));
13051 if (sp_is_cfa_reg || TARGET_SEH)
13054 m->fs.cfa_offset += allocate;
13055 RTX_FRAME_RELATED_P (insn) = 1;
13056 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13057 gen_rtx_SET (stack_pointer_rtx,
13058 plus_constant (Pmode, stack_pointer_rtx,
13061 m->fs.sp_offset += allocate;
13063 /* Use stack_pointer_rtx for relative addressing so that code
13064 works for realigned stack, too. */
13065 if (r10_live && eax_live)
13067 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13068 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13069 gen_frame_mem (word_mode, t));
13070 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13071 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13072 gen_frame_mem (word_mode, t));
13074 else if (eax_live || r10_live)
13076 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13077 emit_move_insn (gen_rtx_REG (word_mode,
13078 (eax_live ? AX_REG : R10_REG)),
13079 gen_frame_mem (word_mode, t));
13082 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13084 /* If we havn't already set up the frame pointer, do so now. */
13085 if (frame_pointer_needed && !m->fs.fp_valid)
13087 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13088 GEN_INT (frame.stack_pointer_offset
13089 - frame.hard_frame_pointer_offset));
13090 insn = emit_insn (insn);
13091 RTX_FRAME_RELATED_P (insn) = 1;
13092 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13094 if (m->fs.cfa_reg == stack_pointer_rtx)
13095 m->fs.cfa_reg = hard_frame_pointer_rtx;
13096 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13097 m->fs.fp_valid = true;
13100 if (!int_registers_saved)
13101 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13102 if (!sse_registers_saved)
13103 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13105 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13107 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13109 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13110 insn = emit_insn (gen_set_got (pic));
13111 RTX_FRAME_RELATED_P (insn) = 1;
13112 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13113 emit_insn (gen_prologue_use (pic));
13114 /* Deleting already emmitted SET_GOT if exist and allocated to
13115 REAL_PIC_OFFSET_TABLE_REGNUM. */
13116 ix86_elim_entry_set_got (pic);
13119 if (crtl->drap_reg && !crtl->stack_realign_needed)
13121 /* vDRAP is setup but after reload it turns out stack realign
13122 isn't necessary, here we will emit prologue to setup DRAP
13123 without stack realign adjustment */
13124 t = choose_baseaddr (0);
13125 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13128 /* Prevent instructions from being scheduled into register save push
13129 sequence when access to the redzone area is done through frame pointer.
13130 The offset between the frame pointer and the stack pointer is calculated
13131 relative to the value of the stack pointer at the end of the function
13132 prologue, and moving instructions that access redzone area via frame
13133 pointer inside push sequence violates this assumption. */
13134 if (frame_pointer_needed && frame.red_zone_size)
13135 emit_insn (gen_memory_blockage ());
13137 /* Emit cld instruction if stringops are used in the function. */
13138 if (TARGET_CLD && ix86_current_function_needs_cld)
13139 emit_insn (gen_cld ());
13141 /* SEH requires that the prologue end within 256 bytes of the start of
13142 the function. Prevent instruction schedules that would extend that.
13143 Further, prevent alloca modifications to the stack pointer from being
13144 combined with prologue modifications. */
13146 emit_insn (gen_prologue_use (stack_pointer_rtx));
13149 /* Emit code to restore REG using a POP insn. */
13152 ix86_emit_restore_reg_using_pop (rtx reg)
13154 struct machine_function *m = cfun->machine;
13155 rtx_insn *insn = emit_insn (gen_pop (reg));
13157 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13158 m->fs.sp_offset -= UNITS_PER_WORD;
13160 if (m->fs.cfa_reg == crtl->drap_reg
13161 && REGNO (reg) == REGNO (crtl->drap_reg))
13163 /* Previously we'd represented the CFA as an expression
13164 like *(%ebp - 8). We've just popped that value from
13165 the stack, which means we need to reset the CFA to
13166 the drap register. This will remain until we restore
13167 the stack pointer. */
13168 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13169 RTX_FRAME_RELATED_P (insn) = 1;
13171 /* This means that the DRAP register is valid for addressing too. */
13172 m->fs.drap_valid = true;
13176 if (m->fs.cfa_reg == stack_pointer_rtx)
13178 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13179 x = gen_rtx_SET (stack_pointer_rtx, x);
13180 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13181 RTX_FRAME_RELATED_P (insn) = 1;
13183 m->fs.cfa_offset -= UNITS_PER_WORD;
13186 /* When the frame pointer is the CFA, and we pop it, we are
13187 swapping back to the stack pointer as the CFA. This happens
13188 for stack frames that don't allocate other data, so we assume
13189 the stack pointer is now pointing at the return address, i.e.
13190 the function entry state, which makes the offset be 1 word. */
13191 if (reg == hard_frame_pointer_rtx)
13193 m->fs.fp_valid = false;
13194 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13196 m->fs.cfa_reg = stack_pointer_rtx;
13197 m->fs.cfa_offset -= UNITS_PER_WORD;
13199 add_reg_note (insn, REG_CFA_DEF_CFA,
13200 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13201 GEN_INT (m->fs.cfa_offset)));
13202 RTX_FRAME_RELATED_P (insn) = 1;
13207 /* Emit code to restore saved registers using POP insns. */
13210 ix86_emit_restore_regs_using_pop (void)
13212 unsigned int regno;
13214 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13215 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13216 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13219 /* Emit code and notes for the LEAVE instruction. */
13222 ix86_emit_leave (void)
13224 struct machine_function *m = cfun->machine;
13225 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13227 ix86_add_queued_cfa_restore_notes (insn);
13229 gcc_assert (m->fs.fp_valid);
13230 m->fs.sp_valid = true;
13231 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13232 m->fs.fp_valid = false;
13234 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13236 m->fs.cfa_reg = stack_pointer_rtx;
13237 m->fs.cfa_offset = m->fs.sp_offset;
13239 add_reg_note (insn, REG_CFA_DEF_CFA,
13240 plus_constant (Pmode, stack_pointer_rtx,
13242 RTX_FRAME_RELATED_P (insn) = 1;
13244 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13248 /* Emit code to restore saved registers using MOV insns.
13249 First register is restored from CFA - CFA_OFFSET. */
13251 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13252 bool maybe_eh_return)
13254 struct machine_function *m = cfun->machine;
13255 unsigned int regno;
13257 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13258 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13260 rtx reg = gen_rtx_REG (word_mode, regno);
13264 mem = choose_baseaddr (cfa_offset);
13265 mem = gen_frame_mem (word_mode, mem);
13266 insn = emit_move_insn (reg, mem);
13268 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13270 /* Previously we'd represented the CFA as an expression
13271 like *(%ebp - 8). We've just popped that value from
13272 the stack, which means we need to reset the CFA to
13273 the drap register. This will remain until we restore
13274 the stack pointer. */
13275 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13276 RTX_FRAME_RELATED_P (insn) = 1;
13278 /* This means that the DRAP register is valid for addressing. */
13279 m->fs.drap_valid = true;
13282 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13284 cfa_offset -= UNITS_PER_WORD;
13288 /* Emit code to restore saved registers using MOV insns.
13289 First register is restored from CFA - CFA_OFFSET. */
13291 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13292 bool maybe_eh_return)
13294 unsigned int regno;
13296 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13297 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13299 rtx reg = gen_rtx_REG (V4SFmode, regno);
13301 unsigned int align;
13303 mem = choose_baseaddr (cfa_offset);
13304 mem = gen_rtx_MEM (V4SFmode, mem);
13306 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13307 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13308 set_mem_align (mem, align);
13310 /* SSE saves are not within re-aligned local stack frame.
13311 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13312 to emit unaligned load. */
13315 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13317 emit_insn (gen_rtx_SET (reg, unspec));
13320 emit_insn (gen_rtx_SET (reg, mem));
13322 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13324 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13328 /* Restore function stack, frame, and registers. */
13331 ix86_expand_epilogue (int style)
13333 struct machine_function *m = cfun->machine;
13334 struct machine_frame_state frame_state_save = m->fs;
13335 struct ix86_frame frame;
13336 bool restore_regs_via_mov;
13339 ix86_finalize_stack_realign_flags ();
13340 ix86_compute_frame_layout (&frame);
13342 m->fs.sp_valid = (!frame_pointer_needed
13343 || (crtl->sp_is_unchanging
13344 && !stack_realign_fp));
13345 gcc_assert (!m->fs.sp_valid
13346 || m->fs.sp_offset == frame.stack_pointer_offset);
13348 /* The FP must be valid if the frame pointer is present. */
13349 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13350 gcc_assert (!m->fs.fp_valid
13351 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13353 /* We must have *some* valid pointer to the stack frame. */
13354 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13356 /* The DRAP is never valid at this point. */
13357 gcc_assert (!m->fs.drap_valid);
13359 /* See the comment about red zone and frame
13360 pointer usage in ix86_expand_prologue. */
13361 if (frame_pointer_needed && frame.red_zone_size)
13362 emit_insn (gen_memory_blockage ());
13364 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13365 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13367 /* Determine the CFA offset of the end of the red-zone. */
13368 m->fs.red_zone_offset = 0;
13369 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13371 /* The red-zone begins below the return address. */
13372 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13374 /* When the register save area is in the aligned portion of
13375 the stack, determine the maximum runtime displacement that
13376 matches up with the aligned frame. */
13377 if (stack_realign_drap)
13378 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13382 /* Special care must be taken for the normal return case of a function
13383 using eh_return: the eax and edx registers are marked as saved, but
13384 not restored along this path. Adjust the save location to match. */
13385 if (crtl->calls_eh_return && style != 2)
13386 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13388 /* EH_RETURN requires the use of moves to function properly. */
13389 if (crtl->calls_eh_return)
13390 restore_regs_via_mov = true;
13391 /* SEH requires the use of pops to identify the epilogue. */
13392 else if (TARGET_SEH)
13393 restore_regs_via_mov = false;
13394 /* If we're only restoring one register and sp is not valid then
13395 using a move instruction to restore the register since it's
13396 less work than reloading sp and popping the register. */
13397 else if (!m->fs.sp_valid && frame.nregs <= 1)
13398 restore_regs_via_mov = true;
13399 else if (TARGET_EPILOGUE_USING_MOVE
13400 && cfun->machine->use_fast_prologue_epilogue
13401 && (frame.nregs > 1
13402 || m->fs.sp_offset != frame.reg_save_offset))
13403 restore_regs_via_mov = true;
13404 else if (frame_pointer_needed
13406 && m->fs.sp_offset != frame.reg_save_offset)
13407 restore_regs_via_mov = true;
13408 else if (frame_pointer_needed
13409 && TARGET_USE_LEAVE
13410 && cfun->machine->use_fast_prologue_epilogue
13411 && frame.nregs == 1)
13412 restore_regs_via_mov = true;
13414 restore_regs_via_mov = false;
13416 if (restore_regs_via_mov || frame.nsseregs)
13418 /* Ensure that the entire register save area is addressable via
13419 the stack pointer, if we will restore via sp. */
13421 && m->fs.sp_offset > 0x7fffffff
13422 && !(m->fs.fp_valid || m->fs.drap_valid)
13423 && (frame.nsseregs + frame.nregs) != 0)
13425 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13426 GEN_INT (m->fs.sp_offset
13427 - frame.sse_reg_save_offset),
13429 m->fs.cfa_reg == stack_pointer_rtx);
13433 /* If there are any SSE registers to restore, then we have to do it
13434 via moves, since there's obviously no pop for SSE regs. */
13435 if (frame.nsseregs)
13436 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13439 if (restore_regs_via_mov)
13444 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13446 /* eh_return epilogues need %ecx added to the stack pointer. */
13449 rtx sa = EH_RETURN_STACKADJ_RTX;
13452 /* Stack align doesn't work with eh_return. */
13453 gcc_assert (!stack_realign_drap);
13454 /* Neither does regparm nested functions. */
13455 gcc_assert (!ix86_static_chain_on_stack);
13457 if (frame_pointer_needed)
13459 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13460 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13461 emit_insn (gen_rtx_SET (sa, t));
13463 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13464 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13466 /* Note that we use SA as a temporary CFA, as the return
13467 address is at the proper place relative to it. We
13468 pretend this happens at the FP restore insn because
13469 prior to this insn the FP would be stored at the wrong
13470 offset relative to SA, and after this insn we have no
13471 other reasonable register to use for the CFA. We don't
13472 bother resetting the CFA to the SP for the duration of
13473 the return insn. */
13474 add_reg_note (insn, REG_CFA_DEF_CFA,
13475 plus_constant (Pmode, sa, UNITS_PER_WORD));
13476 ix86_add_queued_cfa_restore_notes (insn);
13477 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13478 RTX_FRAME_RELATED_P (insn) = 1;
13480 m->fs.cfa_reg = sa;
13481 m->fs.cfa_offset = UNITS_PER_WORD;
13482 m->fs.fp_valid = false;
13484 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13485 const0_rtx, style, false);
13489 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13490 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13491 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13492 ix86_add_queued_cfa_restore_notes (insn);
13494 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13495 if (m->fs.cfa_offset != UNITS_PER_WORD)
13497 m->fs.cfa_offset = UNITS_PER_WORD;
13498 add_reg_note (insn, REG_CFA_DEF_CFA,
13499 plus_constant (Pmode, stack_pointer_rtx,
13501 RTX_FRAME_RELATED_P (insn) = 1;
13504 m->fs.sp_offset = UNITS_PER_WORD;
13505 m->fs.sp_valid = true;
13510 /* SEH requires that the function end with (1) a stack adjustment
13511 if necessary, (2) a sequence of pops, and (3) a return or
13512 jump instruction. Prevent insns from the function body from
13513 being scheduled into this sequence. */
13516 /* Prevent a catch region from being adjacent to the standard
13517 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13518 several other flags that would be interesting to test are
13520 if (flag_non_call_exceptions)
13521 emit_insn (gen_nops (const1_rtx));
13523 emit_insn (gen_blockage ());
13526 /* First step is to deallocate the stack frame so that we can
13527 pop the registers. Also do it on SEH target for very large
13528 frame as the emitted instructions aren't allowed by the ABI in
13530 if (!m->fs.sp_valid
13532 && (m->fs.sp_offset - frame.reg_save_offset
13533 >= SEH_MAX_FRAME_SIZE)))
13535 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13536 GEN_INT (m->fs.fp_offset
13537 - frame.reg_save_offset),
13540 else if (m->fs.sp_offset != frame.reg_save_offset)
13542 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13543 GEN_INT (m->fs.sp_offset
13544 - frame.reg_save_offset),
13546 m->fs.cfa_reg == stack_pointer_rtx);
13549 ix86_emit_restore_regs_using_pop ();
13552 /* If we used a stack pointer and haven't already got rid of it,
13554 if (m->fs.fp_valid)
13556 /* If the stack pointer is valid and pointing at the frame
13557 pointer store address, then we only need a pop. */
13558 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13559 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13560 /* Leave results in shorter dependency chains on CPUs that are
13561 able to grok it fast. */
13562 else if (TARGET_USE_LEAVE
13563 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13564 || !cfun->machine->use_fast_prologue_epilogue)
13565 ix86_emit_leave ();
13568 pro_epilogue_adjust_stack (stack_pointer_rtx,
13569 hard_frame_pointer_rtx,
13570 const0_rtx, style, !using_drap);
13571 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13577 int param_ptr_offset = UNITS_PER_WORD;
13580 gcc_assert (stack_realign_drap);
13582 if (ix86_static_chain_on_stack)
13583 param_ptr_offset += UNITS_PER_WORD;
13584 if (!call_used_regs[REGNO (crtl->drap_reg)])
13585 param_ptr_offset += UNITS_PER_WORD;
13587 insn = emit_insn (gen_rtx_SET
13588 (stack_pointer_rtx,
13589 gen_rtx_PLUS (Pmode,
13591 GEN_INT (-param_ptr_offset))));
13592 m->fs.cfa_reg = stack_pointer_rtx;
13593 m->fs.cfa_offset = param_ptr_offset;
13594 m->fs.sp_offset = param_ptr_offset;
13595 m->fs.realigned = false;
13597 add_reg_note (insn, REG_CFA_DEF_CFA,
13598 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13599 GEN_INT (param_ptr_offset)));
13600 RTX_FRAME_RELATED_P (insn) = 1;
13602 if (!call_used_regs[REGNO (crtl->drap_reg)])
13603 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13606 /* At this point the stack pointer must be valid, and we must have
13607 restored all of the registers. We may not have deallocated the
13608 entire stack frame. We've delayed this until now because it may
13609 be possible to merge the local stack deallocation with the
13610 deallocation forced by ix86_static_chain_on_stack. */
13611 gcc_assert (m->fs.sp_valid);
13612 gcc_assert (!m->fs.fp_valid);
13613 gcc_assert (!m->fs.realigned);
13614 if (m->fs.sp_offset != UNITS_PER_WORD)
13616 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13617 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13621 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13623 /* Sibcall epilogues don't want a return instruction. */
13626 m->fs = frame_state_save;
13630 if (crtl->args.pops_args && crtl->args.size)
13632 rtx popc = GEN_INT (crtl->args.pops_args);
13634 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13635 address, do explicit add, and jump indirectly to the caller. */
13637 if (crtl->args.pops_args >= 65536)
13639 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13642 /* There is no "pascal" calling convention in any 64bit ABI. */
13643 gcc_assert (!TARGET_64BIT);
13645 insn = emit_insn (gen_pop (ecx));
13646 m->fs.cfa_offset -= UNITS_PER_WORD;
13647 m->fs.sp_offset -= UNITS_PER_WORD;
13649 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13650 x = gen_rtx_SET (stack_pointer_rtx, x);
13651 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13652 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13653 RTX_FRAME_RELATED_P (insn) = 1;
13655 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13657 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13660 emit_jump_insn (gen_simple_return_pop_internal (popc));
13663 emit_jump_insn (gen_simple_return_internal ());
13665 /* Restore the state back to the state from the prologue,
13666 so that it's correct for the next epilogue. */
13667 m->fs = frame_state_save;
13670 /* Reset from the function's potential modifications. */
13673 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13675 if (pic_offset_table_rtx
13676 && !ix86_use_pseudo_pic_reg ())
13677 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13679 /* Mach-O doesn't support labels at the end of objects, so if
13680 it looks like we might want one, insert a NOP. */
13682 rtx_insn *insn = get_last_insn ();
13683 rtx_insn *deleted_debug_label = NULL;
13686 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13688 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13689 notes only, instead set their CODE_LABEL_NUMBER to -1,
13690 otherwise there would be code generation differences
13691 in between -g and -g0. */
13692 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13693 deleted_debug_label = insn;
13694 insn = PREV_INSN (insn);
13699 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13700 fputs ("\tnop\n", file);
13701 else if (deleted_debug_label)
13702 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13703 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13704 CODE_LABEL_NUMBER (insn) = -1;
13710 /* Return a scratch register to use in the split stack prologue. The
13711 split stack prologue is used for -fsplit-stack. It is the first
13712 instructions in the function, even before the regular prologue.
13713 The scratch register can be any caller-saved register which is not
13714 used for parameters or for the static chain. */
13716 static unsigned int
13717 split_stack_prologue_scratch_regno (void)
13723 bool is_fastcall, is_thiscall;
13726 is_fastcall = (lookup_attribute ("fastcall",
13727 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13729 is_thiscall = (lookup_attribute ("thiscall",
13730 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13732 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13736 if (DECL_STATIC_CHAIN (cfun->decl))
13738 sorry ("-fsplit-stack does not support fastcall with "
13739 "nested function");
13740 return INVALID_REGNUM;
13744 else if (is_thiscall)
13746 if (!DECL_STATIC_CHAIN (cfun->decl))
13750 else if (regparm < 3)
13752 if (!DECL_STATIC_CHAIN (cfun->decl))
13758 sorry ("-fsplit-stack does not support 2 register "
13759 "parameters for a nested function");
13760 return INVALID_REGNUM;
13767 /* FIXME: We could make this work by pushing a register
13768 around the addition and comparison. */
13769 sorry ("-fsplit-stack does not support 3 register parameters");
13770 return INVALID_REGNUM;
13775 /* A SYMBOL_REF for the function which allocates new stackspace for
13778 static GTY(()) rtx split_stack_fn;
13780 /* A SYMBOL_REF for the more stack function when using the large
13783 static GTY(()) rtx split_stack_fn_large;
13785 /* Handle -fsplit-stack. These are the first instructions in the
13786 function, even before the regular prologue. */
13789 ix86_expand_split_stack_prologue (void)
13791 struct ix86_frame frame;
13792 HOST_WIDE_INT allocate;
13793 unsigned HOST_WIDE_INT args_size;
13794 rtx_code_label *label;
13795 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13796 rtx scratch_reg = NULL_RTX;
13797 rtx_code_label *varargs_label = NULL;
13800 gcc_assert (flag_split_stack && reload_completed);
13802 ix86_finalize_stack_realign_flags ();
13803 ix86_compute_frame_layout (&frame);
13804 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13806 /* This is the label we will branch to if we have enough stack
13807 space. We expect the basic block reordering pass to reverse this
13808 branch if optimizing, so that we branch in the unlikely case. */
13809 label = gen_label_rtx ();
13811 /* We need to compare the stack pointer minus the frame size with
13812 the stack boundary in the TCB. The stack boundary always gives
13813 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13814 can compare directly. Otherwise we need to do an addition. */
13816 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13817 UNSPEC_STACK_CHECK);
13818 limit = gen_rtx_CONST (Pmode, limit);
13819 limit = gen_rtx_MEM (Pmode, limit);
13820 if (allocate < SPLIT_STACK_AVAILABLE)
13821 current = stack_pointer_rtx;
13824 unsigned int scratch_regno;
13827 /* We need a scratch register to hold the stack pointer minus
13828 the required frame size. Since this is the very start of the
13829 function, the scratch register can be any caller-saved
13830 register which is not used for parameters. */
13831 offset = GEN_INT (- allocate);
13832 scratch_regno = split_stack_prologue_scratch_regno ();
13833 if (scratch_regno == INVALID_REGNUM)
13835 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13836 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13838 /* We don't use ix86_gen_add3 in this case because it will
13839 want to split to lea, but when not optimizing the insn
13840 will not be split after this point. */
13841 emit_insn (gen_rtx_SET (scratch_reg,
13842 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13847 emit_move_insn (scratch_reg, offset);
13848 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13849 stack_pointer_rtx));
13851 current = scratch_reg;
13854 ix86_expand_branch (GEU, current, limit, label);
13855 jump_insn = get_last_insn ();
13856 JUMP_LABEL (jump_insn) = label;
13858 /* Mark the jump as very likely to be taken. */
13859 add_int_reg_note (jump_insn, REG_BR_PROB,
13860 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13862 if (split_stack_fn == NULL_RTX)
13864 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13865 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13867 fn = split_stack_fn;
13869 /* Get more stack space. We pass in the desired stack space and the
13870 size of the arguments to copy to the new stack. In 32-bit mode
13871 we push the parameters; __morestack will return on a new stack
13872 anyhow. In 64-bit mode we pass the parameters in r10 and
13874 allocate_rtx = GEN_INT (allocate);
13875 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13876 call_fusage = NULL_RTX;
13881 reg10 = gen_rtx_REG (Pmode, R10_REG);
13882 reg11 = gen_rtx_REG (Pmode, R11_REG);
13884 /* If this function uses a static chain, it will be in %r10.
13885 Preserve it across the call to __morestack. */
13886 if (DECL_STATIC_CHAIN (cfun->decl))
13890 rax = gen_rtx_REG (word_mode, AX_REG);
13891 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13892 use_reg (&call_fusage, rax);
13895 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13898 HOST_WIDE_INT argval;
13900 gcc_assert (Pmode == DImode);
13901 /* When using the large model we need to load the address
13902 into a register, and we've run out of registers. So we
13903 switch to a different calling convention, and we call a
13904 different function: __morestack_large. We pass the
13905 argument size in the upper 32 bits of r10 and pass the
13906 frame size in the lower 32 bits. */
13907 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13908 gcc_assert ((args_size & 0xffffffff) == args_size);
13910 if (split_stack_fn_large == NULL_RTX)
13912 split_stack_fn_large =
13913 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13914 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13916 if (ix86_cmodel == CM_LARGE_PIC)
13918 rtx_code_label *label;
13921 label = gen_label_rtx ();
13922 emit_label (label);
13923 LABEL_PRESERVE_P (label) = 1;
13924 emit_insn (gen_set_rip_rex64 (reg10, label));
13925 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13926 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13927 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13929 x = gen_rtx_CONST (Pmode, x);
13930 emit_move_insn (reg11, x);
13931 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13932 x = gen_const_mem (Pmode, x);
13933 emit_move_insn (reg11, x);
13936 emit_move_insn (reg11, split_stack_fn_large);
13940 argval = ((args_size << 16) << 16) + allocate;
13941 emit_move_insn (reg10, GEN_INT (argval));
13945 emit_move_insn (reg10, allocate_rtx);
13946 emit_move_insn (reg11, GEN_INT (args_size));
13947 use_reg (&call_fusage, reg11);
13950 use_reg (&call_fusage, reg10);
13954 emit_insn (gen_push (GEN_INT (args_size)));
13955 emit_insn (gen_push (allocate_rtx));
13957 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13958 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13960 add_function_usage_to (call_insn, call_fusage);
13962 /* In order to make call/return prediction work right, we now need
13963 to execute a return instruction. See
13964 libgcc/config/i386/morestack.S for the details on how this works.
13966 For flow purposes gcc must not see this as a return
13967 instruction--we need control flow to continue at the subsequent
13968 label. Therefore, we use an unspec. */
13969 gcc_assert (crtl->args.pops_args < 65536);
13970 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13972 /* If we are in 64-bit mode and this function uses a static chain,
13973 we saved %r10 in %rax before calling _morestack. */
13974 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13975 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13976 gen_rtx_REG (word_mode, AX_REG));
13978 /* If this function calls va_start, we need to store a pointer to
13979 the arguments on the old stack, because they may not have been
13980 all copied to the new stack. At this point the old stack can be
13981 found at the frame pointer value used by __morestack, because
13982 __morestack has set that up before calling back to us. Here we
13983 store that pointer in a scratch register, and in
13984 ix86_expand_prologue we store the scratch register in a stack
13986 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13988 unsigned int scratch_regno;
13992 scratch_regno = split_stack_prologue_scratch_regno ();
13993 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13994 frame_reg = gen_rtx_REG (Pmode, BP_REG);
13998 return address within this function
13999 return address of caller of this function
14001 So we add three words to get to the stack arguments.
14005 return address within this function
14006 first argument to __morestack
14007 second argument to __morestack
14008 return address of caller of this function
14010 So we add five words to get to the stack arguments.
14012 words = TARGET_64BIT ? 3 : 5;
14013 emit_insn (gen_rtx_SET (scratch_reg,
14014 gen_rtx_PLUS (Pmode, frame_reg,
14015 GEN_INT (words * UNITS_PER_WORD))));
14017 varargs_label = gen_label_rtx ();
14018 emit_jump_insn (gen_jump (varargs_label));
14019 JUMP_LABEL (get_last_insn ()) = varargs_label;
14024 emit_label (label);
14025 LABEL_NUSES (label) = 1;
14027 /* If this function calls va_start, we now have to set the scratch
14028 register for the case where we do not call __morestack. In this
14029 case we need to set it based on the stack pointer. */
14030 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14032 emit_insn (gen_rtx_SET (scratch_reg,
14033 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14034 GEN_INT (UNITS_PER_WORD))));
14036 emit_label (varargs_label);
14037 LABEL_NUSES (varargs_label) = 1;
14041 /* We may have to tell the dataflow pass that the split stack prologue
14042 is initializing a scratch register. */
14045 ix86_live_on_entry (bitmap regs)
14047 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14049 gcc_assert (flag_split_stack);
14050 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14054 /* Extract the parts of an RTL expression that is a valid memory address
14055 for an instruction. Return 0 if the structure of the address is
14056 grossly off. Return -1 if the address contains ASHIFT, so it is not
14057 strictly valid, but still used for computing length of lea instruction. */
14060 ix86_decompose_address (rtx addr, struct ix86_address *out)
14062 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14063 rtx base_reg, index_reg;
14064 HOST_WIDE_INT scale = 1;
14065 rtx scale_rtx = NULL_RTX;
14068 addr_space_t seg = ADDR_SPACE_GENERIC;
14070 /* Allow zero-extended SImode addresses,
14071 they will be emitted with addr32 prefix. */
14072 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14074 if (GET_CODE (addr) == ZERO_EXTEND
14075 && GET_MODE (XEXP (addr, 0)) == SImode)
14077 addr = XEXP (addr, 0);
14078 if (CONST_INT_P (addr))
14081 else if (GET_CODE (addr) == AND
14082 && const_32bit_mask (XEXP (addr, 1), DImode))
14084 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
14085 if (addr == NULL_RTX)
14088 if (CONST_INT_P (addr))
14093 /* Allow SImode subregs of DImode addresses,
14094 they will be emitted with addr32 prefix. */
14095 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14097 if (SUBREG_P (addr)
14098 && GET_MODE (SUBREG_REG (addr)) == DImode)
14100 addr = SUBREG_REG (addr);
14101 if (CONST_INT_P (addr))
14108 else if (SUBREG_P (addr))
14110 if (REG_P (SUBREG_REG (addr)))
14115 else if (GET_CODE (addr) == PLUS)
14117 rtx addends[4], op;
14125 addends[n++] = XEXP (op, 1);
14128 while (GET_CODE (op) == PLUS);
14133 for (i = n; i >= 0; --i)
14136 switch (GET_CODE (op))
14141 index = XEXP (op, 0);
14142 scale_rtx = XEXP (op, 1);
14148 index = XEXP (op, 0);
14149 tmp = XEXP (op, 1);
14150 if (!CONST_INT_P (tmp))
14152 scale = INTVAL (tmp);
14153 if ((unsigned HOST_WIDE_INT) scale > 3)
14155 scale = 1 << scale;
14160 if (GET_CODE (op) != UNSPEC)
14165 if (XINT (op, 1) == UNSPEC_TP
14166 && TARGET_TLS_DIRECT_SEG_REFS
14167 && seg == ADDR_SPACE_GENERIC)
14168 seg = DEFAULT_TLS_SEG_REG;
14174 if (!REG_P (SUBREG_REG (op)))
14201 else if (GET_CODE (addr) == MULT)
14203 index = XEXP (addr, 0); /* index*scale */
14204 scale_rtx = XEXP (addr, 1);
14206 else if (GET_CODE (addr) == ASHIFT)
14208 /* We're called for lea too, which implements ashift on occasion. */
14209 index = XEXP (addr, 0);
14210 tmp = XEXP (addr, 1);
14211 if (!CONST_INT_P (tmp))
14213 scale = INTVAL (tmp);
14214 if ((unsigned HOST_WIDE_INT) scale > 3)
14216 scale = 1 << scale;
14220 disp = addr; /* displacement */
14226 else if (SUBREG_P (index)
14227 && REG_P (SUBREG_REG (index)))
14233 /* Extract the integral value of scale. */
14236 if (!CONST_INT_P (scale_rtx))
14238 scale = INTVAL (scale_rtx);
14241 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14242 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14244 /* Avoid useless 0 displacement. */
14245 if (disp == const0_rtx && (base || index))
14248 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14249 if (base_reg && index_reg && scale == 1
14250 && (index_reg == arg_pointer_rtx
14251 || index_reg == frame_pointer_rtx
14252 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14254 std::swap (base, index);
14255 std::swap (base_reg, index_reg);
14258 /* Special case: %ebp cannot be encoded as a base without a displacement.
14262 && (base_reg == hard_frame_pointer_rtx
14263 || base_reg == frame_pointer_rtx
14264 || base_reg == arg_pointer_rtx
14265 || (REG_P (base_reg)
14266 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14267 || REGNO (base_reg) == R13_REG))))
14270 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14271 Avoid this by transforming to [%esi+0].
14272 Reload calls address legitimization without cfun defined, so we need
14273 to test cfun for being non-NULL. */
14274 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14275 && base_reg && !index_reg && !disp
14276 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14279 /* Special case: encode reg+reg instead of reg*2. */
14280 if (!base && index && scale == 2)
14281 base = index, base_reg = index_reg, scale = 1;
14283 /* Special case: scaling cannot be encoded without base or displacement. */
14284 if (!base && !disp && index && scale != 1)
14288 out->index = index;
14290 out->scale = scale;
14296 /* Return cost of the memory address x.
14297 For i386, it is better to use a complex address than let gcc copy
14298 the address into a reg and make a new pseudo. But not if the address
14299 requires to two regs - that would mean more pseudos with longer
14302 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14304 struct ix86_address parts;
14306 int ok = ix86_decompose_address (x, &parts);
14310 if (parts.base && SUBREG_P (parts.base))
14311 parts.base = SUBREG_REG (parts.base);
14312 if (parts.index && SUBREG_P (parts.index))
14313 parts.index = SUBREG_REG (parts.index);
14315 /* Attempt to minimize number of registers in the address by increasing
14316 address cost for each used register. We don't increase address cost
14317 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14318 is not invariant itself it most likely means that base or index is not
14319 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14320 which is not profitable for x86. */
14322 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14323 && (current_pass->type == GIMPLE_PASS
14324 || !pic_offset_table_rtx
14325 || !REG_P (parts.base)
14326 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14330 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14331 && (current_pass->type == GIMPLE_PASS
14332 || !pic_offset_table_rtx
14333 || !REG_P (parts.index)
14334 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14337 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14338 since it's predecode logic can't detect the length of instructions
14339 and it degenerates to vector decoded. Increase cost of such
14340 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14341 to split such addresses or even refuse such addresses at all.
14343 Following addressing modes are affected:
14348 The first and last case may be avoidable by explicitly coding the zero in
14349 memory address, but I don't have AMD-K6 machine handy to check this
14353 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14354 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14355 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14361 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14362 this is used for to form addresses to local data when -fPIC is in
14366 darwin_local_data_pic (rtx disp)
14368 return (GET_CODE (disp) == UNSPEC
14369 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14372 /* Determine if a given RTX is a valid constant. We already know this
14373 satisfies CONSTANT_P. */
14376 ix86_legitimate_constant_p (machine_mode, rtx x)
14378 /* Pointer bounds constants are not valid. */
14379 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14382 switch (GET_CODE (x))
14387 if (GET_CODE (x) == PLUS)
14389 if (!CONST_INT_P (XEXP (x, 1)))
14394 if (TARGET_MACHO && darwin_local_data_pic (x))
14397 /* Only some unspecs are valid as "constants". */
14398 if (GET_CODE (x) == UNSPEC)
14399 switch (XINT (x, 1))
14402 case UNSPEC_GOTOFF:
14403 case UNSPEC_PLTOFF:
14404 return TARGET_64BIT;
14406 case UNSPEC_NTPOFF:
14407 x = XVECEXP (x, 0, 0);
14408 return (GET_CODE (x) == SYMBOL_REF
14409 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14410 case UNSPEC_DTPOFF:
14411 x = XVECEXP (x, 0, 0);
14412 return (GET_CODE (x) == SYMBOL_REF
14413 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14418 /* We must have drilled down to a symbol. */
14419 if (GET_CODE (x) == LABEL_REF)
14421 if (GET_CODE (x) != SYMBOL_REF)
14426 /* TLS symbols are never valid. */
14427 if (SYMBOL_REF_TLS_MODEL (x))
14430 /* DLLIMPORT symbols are never valid. */
14431 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14432 && SYMBOL_REF_DLLIMPORT_P (x))
14436 /* mdynamic-no-pic */
14437 if (MACHO_DYNAMIC_NO_PIC_P)
14438 return machopic_symbol_defined_p (x);
14442 case CONST_WIDE_INT:
14443 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14448 if (!standard_sse_constant_p (x))
14455 /* Otherwise we handle everything else in the move patterns. */
14459 /* Determine if it's legal to put X into the constant pool. This
14460 is not possible for the address of thread-local symbols, which
14461 is checked above. */
14464 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14466 /* We can always put integral constants and vectors in memory. */
14467 switch (GET_CODE (x))
14470 case CONST_WIDE_INT:
14478 return !ix86_legitimate_constant_p (mode, x);
14481 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14485 is_imported_p (rtx x)
14487 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14488 || GET_CODE (x) != SYMBOL_REF)
14491 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14495 /* Nonzero if the constant value X is a legitimate general operand
14496 when generating PIC code. It is given that flag_pic is on and
14497 that X satisfies CONSTANT_P. */
14500 legitimate_pic_operand_p (rtx x)
14504 switch (GET_CODE (x))
14507 inner = XEXP (x, 0);
14508 if (GET_CODE (inner) == PLUS
14509 && CONST_INT_P (XEXP (inner, 1)))
14510 inner = XEXP (inner, 0);
14512 /* Only some unspecs are valid as "constants". */
14513 if (GET_CODE (inner) == UNSPEC)
14514 switch (XINT (inner, 1))
14517 case UNSPEC_GOTOFF:
14518 case UNSPEC_PLTOFF:
14519 return TARGET_64BIT;
14521 x = XVECEXP (inner, 0, 0);
14522 return (GET_CODE (x) == SYMBOL_REF
14523 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14524 case UNSPEC_MACHOPIC_OFFSET:
14525 return legitimate_pic_address_disp_p (x);
14533 return legitimate_pic_address_disp_p (x);
14540 /* Determine if a given CONST RTX is a valid memory displacement
14544 legitimate_pic_address_disp_p (rtx disp)
14548 /* In 64bit mode we can allow direct addresses of symbols and labels
14549 when they are not dynamic symbols. */
14552 rtx op0 = disp, op1;
14554 switch (GET_CODE (disp))
14560 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14562 op0 = XEXP (XEXP (disp, 0), 0);
14563 op1 = XEXP (XEXP (disp, 0), 1);
14564 if (!CONST_INT_P (op1)
14565 || INTVAL (op1) >= 16*1024*1024
14566 || INTVAL (op1) < -16*1024*1024)
14568 if (GET_CODE (op0) == LABEL_REF)
14570 if (GET_CODE (op0) == CONST
14571 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14572 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14574 if (GET_CODE (op0) == UNSPEC
14575 && XINT (op0, 1) == UNSPEC_PCREL)
14577 if (GET_CODE (op0) != SYMBOL_REF)
14582 /* TLS references should always be enclosed in UNSPEC.
14583 The dllimported symbol needs always to be resolved. */
14584 if (SYMBOL_REF_TLS_MODEL (op0)
14585 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14590 if (is_imported_p (op0))
14593 if (SYMBOL_REF_FAR_ADDR_P (op0)
14594 || !SYMBOL_REF_LOCAL_P (op0))
14597 /* Function-symbols need to be resolved only for
14599 For the small-model we don't need to resolve anything
14601 if ((ix86_cmodel != CM_LARGE_PIC
14602 && SYMBOL_REF_FUNCTION_P (op0))
14603 || ix86_cmodel == CM_SMALL_PIC)
14605 /* Non-external symbols don't need to be resolved for
14606 large, and medium-model. */
14607 if ((ix86_cmodel == CM_LARGE_PIC
14608 || ix86_cmodel == CM_MEDIUM_PIC)
14609 && !SYMBOL_REF_EXTERNAL_P (op0))
14612 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14613 && (SYMBOL_REF_LOCAL_P (op0)
14614 || (HAVE_LD_PIE_COPYRELOC
14616 && !SYMBOL_REF_WEAK (op0)
14617 && !SYMBOL_REF_FUNCTION_P (op0)))
14618 && ix86_cmodel != CM_LARGE_PIC)
14626 if (GET_CODE (disp) != CONST)
14628 disp = XEXP (disp, 0);
14632 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14633 of GOT tables. We should not need these anyway. */
14634 if (GET_CODE (disp) != UNSPEC
14635 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14636 && XINT (disp, 1) != UNSPEC_GOTOFF
14637 && XINT (disp, 1) != UNSPEC_PCREL
14638 && XINT (disp, 1) != UNSPEC_PLTOFF))
14641 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14642 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14648 if (GET_CODE (disp) == PLUS)
14650 if (!CONST_INT_P (XEXP (disp, 1)))
14652 disp = XEXP (disp, 0);
14656 if (TARGET_MACHO && darwin_local_data_pic (disp))
14659 if (GET_CODE (disp) != UNSPEC)
14662 switch (XINT (disp, 1))
14667 /* We need to check for both symbols and labels because VxWorks loads
14668 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14670 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14671 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14672 case UNSPEC_GOTOFF:
14673 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14674 While ABI specify also 32bit relocation but we don't produce it in
14675 small PIC model at all. */
14676 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14677 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14679 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14681 case UNSPEC_GOTTPOFF:
14682 case UNSPEC_GOTNTPOFF:
14683 case UNSPEC_INDNTPOFF:
14686 disp = XVECEXP (disp, 0, 0);
14687 return (GET_CODE (disp) == SYMBOL_REF
14688 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14689 case UNSPEC_NTPOFF:
14690 disp = XVECEXP (disp, 0, 0);
14691 return (GET_CODE (disp) == SYMBOL_REF
14692 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14693 case UNSPEC_DTPOFF:
14694 disp = XVECEXP (disp, 0, 0);
14695 return (GET_CODE (disp) == SYMBOL_REF
14696 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14702 /* Determine if op is suitable RTX for an address register.
14703 Return naked register if a register or a register subreg is
14704 found, otherwise return NULL_RTX. */
14707 ix86_validate_address_register (rtx op)
14709 machine_mode mode = GET_MODE (op);
14711 /* Only SImode or DImode registers can form the address. */
14712 if (mode != SImode && mode != DImode)
14717 else if (SUBREG_P (op))
14719 rtx reg = SUBREG_REG (op);
14724 mode = GET_MODE (reg);
14726 /* Don't allow SUBREGs that span more than a word. It can
14727 lead to spill failures when the register is one word out
14728 of a two word structure. */
14729 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14732 /* Allow only SUBREGs of non-eliminable hard registers. */
14733 if (register_no_elim_operand (reg, mode))
14737 /* Op is not a register. */
14741 /* Recognizes RTL expressions that are valid memory addresses for an
14742 instruction. The MODE argument is the machine mode for the MEM
14743 expression that wants to use this address.
14745 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14746 convert common non-canonical forms to canonical form so that they will
14750 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14752 struct ix86_address parts;
14753 rtx base, index, disp;
14754 HOST_WIDE_INT scale;
14757 if (ix86_decompose_address (addr, &parts) <= 0)
14758 /* Decomposition failed. */
14762 index = parts.index;
14764 scale = parts.scale;
14767 /* Validate base register. */
14770 rtx reg = ix86_validate_address_register (base);
14772 if (reg == NULL_RTX)
14775 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14776 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14777 /* Base is not valid. */
14781 /* Validate index register. */
14784 rtx reg = ix86_validate_address_register (index);
14786 if (reg == NULL_RTX)
14789 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14790 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14791 /* Index is not valid. */
14795 /* Index and base should have the same mode. */
14797 && GET_MODE (base) != GET_MODE (index))
14800 /* Address override works only on the (%reg) part of %fs:(%reg). */
14801 if (seg != ADDR_SPACE_GENERIC
14802 && ((base && GET_MODE (base) != word_mode)
14803 || (index && GET_MODE (index) != word_mode)))
14806 /* Validate scale factor. */
14810 /* Scale without index. */
14813 if (scale != 2 && scale != 4 && scale != 8)
14814 /* Scale is not a valid multiplier. */
14818 /* Validate displacement. */
14821 if (GET_CODE (disp) == CONST
14822 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14823 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14824 switch (XINT (XEXP (disp, 0), 1))
14826 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14827 used. While ABI specify also 32bit relocations, we don't produce
14828 them at all and use IP relative instead. */
14830 case UNSPEC_GOTOFF:
14831 gcc_assert (flag_pic);
14833 goto is_legitimate_pic;
14835 /* 64bit address unspec. */
14838 case UNSPEC_GOTPCREL:
14840 gcc_assert (flag_pic);
14841 goto is_legitimate_pic;
14843 case UNSPEC_GOTTPOFF:
14844 case UNSPEC_GOTNTPOFF:
14845 case UNSPEC_INDNTPOFF:
14846 case UNSPEC_NTPOFF:
14847 case UNSPEC_DTPOFF:
14850 case UNSPEC_STACK_CHECK:
14851 gcc_assert (flag_split_stack);
14855 /* Invalid address unspec. */
14859 else if (SYMBOLIC_CONST (disp)
14863 && MACHOPIC_INDIRECT
14864 && !machopic_operand_p (disp)
14870 if (TARGET_64BIT && (index || base))
14872 /* foo@dtpoff(%rX) is ok. */
14873 if (GET_CODE (disp) != CONST
14874 || GET_CODE (XEXP (disp, 0)) != PLUS
14875 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14876 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14877 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14878 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14879 /* Non-constant pic memory reference. */
14882 else if ((!TARGET_MACHO || flag_pic)
14883 && ! legitimate_pic_address_disp_p (disp))
14884 /* Displacement is an invalid pic construct. */
14887 else if (MACHO_DYNAMIC_NO_PIC_P
14888 && !ix86_legitimate_constant_p (Pmode, disp))
14889 /* displacment must be referenced via non_lazy_pointer */
14893 /* This code used to verify that a symbolic pic displacement
14894 includes the pic_offset_table_rtx register.
14896 While this is good idea, unfortunately these constructs may
14897 be created by "adds using lea" optimization for incorrect
14906 This code is nonsensical, but results in addressing
14907 GOT table with pic_offset_table_rtx base. We can't
14908 just refuse it easily, since it gets matched by
14909 "addsi3" pattern, that later gets split to lea in the
14910 case output register differs from input. While this
14911 can be handled by separate addsi pattern for this case
14912 that never results in lea, this seems to be easier and
14913 correct fix for crash to disable this test. */
14915 else if (GET_CODE (disp) != LABEL_REF
14916 && !CONST_INT_P (disp)
14917 && (GET_CODE (disp) != CONST
14918 || !ix86_legitimate_constant_p (Pmode, disp))
14919 && (GET_CODE (disp) != SYMBOL_REF
14920 || !ix86_legitimate_constant_p (Pmode, disp)))
14921 /* Displacement is not constant. */
14923 else if (TARGET_64BIT
14924 && !x86_64_immediate_operand (disp, VOIDmode))
14925 /* Displacement is out of range. */
14927 /* In x32 mode, constant addresses are sign extended to 64bit, so
14928 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14929 else if (TARGET_X32 && !(index || base)
14930 && CONST_INT_P (disp)
14931 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14935 /* Everything looks valid. */
14939 /* Determine if a given RTX is a valid constant address. */
14942 constant_address_p (rtx x)
14944 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14947 /* Return a unique alias set for the GOT. */
14949 static alias_set_type
14950 ix86_GOT_alias_set (void)
14952 static alias_set_type set = -1;
14954 set = new_alias_set ();
14958 /* Return a legitimate reference for ORIG (an address) using the
14959 register REG. If REG is 0, a new pseudo is generated.
14961 There are two types of references that must be handled:
14963 1. Global data references must load the address from the GOT, via
14964 the PIC reg. An insn is emitted to do this load, and the reg is
14967 2. Static data references, constant pool addresses, and code labels
14968 compute the address as an offset from the GOT, whose base is in
14969 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14970 differentiate them from global data objects. The returned
14971 address is the PIC reg + an unspec constant.
14973 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14974 reg also appears in the address. */
14977 legitimize_pic_address (rtx orig, rtx reg)
14980 rtx new_rtx = orig;
14983 if (TARGET_MACHO && !TARGET_64BIT)
14986 reg = gen_reg_rtx (Pmode);
14987 /* Use the generic Mach-O PIC machinery. */
14988 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
14992 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14994 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14999 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15001 else if (TARGET_64BIT && !TARGET_PECOFF
15002 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
15005 /* This symbol may be referenced via a displacement from the PIC
15006 base address (@GOTOFF). */
15008 if (GET_CODE (addr) == CONST)
15009 addr = XEXP (addr, 0);
15010 if (GET_CODE (addr) == PLUS)
15012 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15014 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15017 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15018 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15020 tmpreg = gen_reg_rtx (Pmode);
15023 emit_move_insn (tmpreg, new_rtx);
15027 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
15028 tmpreg, 1, OPTAB_DIRECT);
15032 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
15034 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
15036 /* This symbol may be referenced via a displacement from the PIC
15037 base address (@GOTOFF). */
15039 if (GET_CODE (addr) == CONST)
15040 addr = XEXP (addr, 0);
15041 if (GET_CODE (addr) == PLUS)
15043 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15045 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15048 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15049 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15050 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15054 emit_move_insn (reg, new_rtx);
15058 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15059 /* We can't use @GOTOFF for text labels on VxWorks;
15060 see gotoff_operand. */
15061 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15063 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15067 /* For x64 PE-COFF there is no GOT table. So we use address
15069 if (TARGET_64BIT && TARGET_PECOFF)
15071 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15072 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15075 reg = gen_reg_rtx (Pmode);
15076 emit_move_insn (reg, new_rtx);
15079 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15081 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
15082 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15083 new_rtx = gen_const_mem (Pmode, new_rtx);
15084 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15087 reg = gen_reg_rtx (Pmode);
15088 /* Use directly gen_movsi, otherwise the address is loaded
15089 into register for CSE. We don't want to CSE this addresses,
15090 instead we CSE addresses from the GOT table, so skip this. */
15091 emit_insn (gen_movsi (reg, new_rtx));
15096 /* This symbol must be referenced via a load from the
15097 Global Offset Table (@GOT). */
15099 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15100 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15102 new_rtx = force_reg (Pmode, new_rtx);
15103 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15104 new_rtx = gen_const_mem (Pmode, new_rtx);
15105 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15108 reg = gen_reg_rtx (Pmode);
15109 emit_move_insn (reg, new_rtx);
15115 if (CONST_INT_P (addr)
15116 && !x86_64_immediate_operand (addr, VOIDmode))
15120 emit_move_insn (reg, addr);
15124 new_rtx = force_reg (Pmode, addr);
15126 else if (GET_CODE (addr) == CONST)
15128 addr = XEXP (addr, 0);
15130 /* We must match stuff we generate before. Assume the only
15131 unspecs that can get here are ours. Not that we could do
15132 anything with them anyway.... */
15133 if (GET_CODE (addr) == UNSPEC
15134 || (GET_CODE (addr) == PLUS
15135 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15137 gcc_assert (GET_CODE (addr) == PLUS);
15139 if (GET_CODE (addr) == PLUS)
15141 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15143 /* Check first to see if this is a constant offset from a @GOTOFF
15144 symbol reference. */
15145 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15146 && CONST_INT_P (op1))
15150 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15152 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15153 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15154 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15158 emit_move_insn (reg, new_rtx);
15164 if (INTVAL (op1) < -16*1024*1024
15165 || INTVAL (op1) >= 16*1024*1024)
15167 if (!x86_64_immediate_operand (op1, Pmode))
15168 op1 = force_reg (Pmode, op1);
15169 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15175 rtx base = legitimize_pic_address (op0, reg);
15176 machine_mode mode = GET_MODE (base);
15178 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15180 if (CONST_INT_P (new_rtx))
15182 if (INTVAL (new_rtx) < -16*1024*1024
15183 || INTVAL (new_rtx) >= 16*1024*1024)
15185 if (!x86_64_immediate_operand (new_rtx, mode))
15186 new_rtx = force_reg (mode, new_rtx);
15188 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15191 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15195 /* For %rip addressing, we have to use just disp32, not
15198 && (GET_CODE (base) == SYMBOL_REF
15199 || GET_CODE (base) == LABEL_REF))
15200 base = force_reg (mode, base);
15201 if (GET_CODE (new_rtx) == PLUS
15202 && CONSTANT_P (XEXP (new_rtx, 1)))
15204 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15205 new_rtx = XEXP (new_rtx, 1);
15207 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15215 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15218 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15220 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15222 if (GET_MODE (tp) != tp_mode)
15224 gcc_assert (GET_MODE (tp) == SImode);
15225 gcc_assert (tp_mode == DImode);
15227 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15231 tp = copy_to_mode_reg (tp_mode, tp);
15236 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15238 static GTY(()) rtx ix86_tls_symbol;
15241 ix86_tls_get_addr (void)
15243 if (!ix86_tls_symbol)
15246 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15247 ? "___tls_get_addr" : "__tls_get_addr");
15249 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15252 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15254 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15256 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15257 gen_rtx_CONST (Pmode, unspec));
15260 return ix86_tls_symbol;
15263 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15265 static GTY(()) rtx ix86_tls_module_base_symbol;
15268 ix86_tls_module_base (void)
15270 if (!ix86_tls_module_base_symbol)
15272 ix86_tls_module_base_symbol
15273 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15275 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15276 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15279 return ix86_tls_module_base_symbol;
15282 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15283 false if we expect this to be used for a memory address and true if
15284 we expect to load the address into a register. */
15287 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15289 rtx dest, base, off;
15290 rtx pic = NULL_RTX, tp = NULL_RTX;
15291 machine_mode tp_mode = Pmode;
15294 /* Fall back to global dynamic model if tool chain cannot support local
15296 if (TARGET_SUN_TLS && !TARGET_64BIT
15297 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15298 && model == TLS_MODEL_LOCAL_DYNAMIC)
15299 model = TLS_MODEL_GLOBAL_DYNAMIC;
15303 case TLS_MODEL_GLOBAL_DYNAMIC:
15304 dest = gen_reg_rtx (Pmode);
15308 if (flag_pic && !TARGET_PECOFF)
15309 pic = pic_offset_table_rtx;
15312 pic = gen_reg_rtx (Pmode);
15313 emit_insn (gen_set_got (pic));
15317 if (TARGET_GNU2_TLS)
15320 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15322 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15324 tp = get_thread_pointer (Pmode, true);
15325 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15327 if (GET_MODE (x) != Pmode)
15328 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15330 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15334 rtx caddr = ix86_tls_get_addr ();
15338 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15343 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15344 insns = get_insns ();
15347 if (GET_MODE (x) != Pmode)
15348 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15350 RTL_CONST_CALL_P (insns) = 1;
15351 emit_libcall_block (insns, dest, rax, x);
15354 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15358 case TLS_MODEL_LOCAL_DYNAMIC:
15359 base = gen_reg_rtx (Pmode);
15364 pic = pic_offset_table_rtx;
15367 pic = gen_reg_rtx (Pmode);
15368 emit_insn (gen_set_got (pic));
15372 if (TARGET_GNU2_TLS)
15374 rtx tmp = ix86_tls_module_base ();
15377 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15379 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15381 tp = get_thread_pointer (Pmode, true);
15382 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15383 gen_rtx_MINUS (Pmode, tmp, tp));
15387 rtx caddr = ix86_tls_get_addr ();
15391 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15397 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15398 insns = get_insns ();
15401 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15402 share the LD_BASE result with other LD model accesses. */
15403 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15404 UNSPEC_TLS_LD_BASE);
15406 RTL_CONST_CALL_P (insns) = 1;
15407 emit_libcall_block (insns, base, rax, eqv);
15410 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15413 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15414 off = gen_rtx_CONST (Pmode, off);
15416 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15418 if (TARGET_GNU2_TLS)
15420 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15422 if (GET_MODE (x) != Pmode)
15423 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15425 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15429 case TLS_MODEL_INITIAL_EXEC:
15432 if (TARGET_SUN_TLS && !TARGET_X32)
15434 /* The Sun linker took the AMD64 TLS spec literally
15435 and can only handle %rax as destination of the
15436 initial executable code sequence. */
15438 dest = gen_reg_rtx (DImode);
15439 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15443 /* Generate DImode references to avoid %fs:(%reg32)
15444 problems and linker IE->LE relaxation bug. */
15447 type = UNSPEC_GOTNTPOFF;
15451 pic = pic_offset_table_rtx;
15452 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15454 else if (!TARGET_ANY_GNU_TLS)
15456 pic = gen_reg_rtx (Pmode);
15457 emit_insn (gen_set_got (pic));
15458 type = UNSPEC_GOTTPOFF;
15463 type = UNSPEC_INDNTPOFF;
15466 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15467 off = gen_rtx_CONST (tp_mode, off);
15469 off = gen_rtx_PLUS (tp_mode, pic, off);
15470 off = gen_const_mem (tp_mode, off);
15471 set_mem_alias_set (off, ix86_GOT_alias_set ());
15473 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15475 base = get_thread_pointer (tp_mode,
15476 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15477 off = force_reg (tp_mode, off);
15478 return gen_rtx_PLUS (tp_mode, base, off);
15482 base = get_thread_pointer (Pmode, true);
15483 dest = gen_reg_rtx (Pmode);
15484 emit_insn (ix86_gen_sub3 (dest, base, off));
15488 case TLS_MODEL_LOCAL_EXEC:
15489 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15490 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15491 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15492 off = gen_rtx_CONST (Pmode, off);
15494 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15496 base = get_thread_pointer (Pmode,
15497 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15498 return gen_rtx_PLUS (Pmode, base, off);
15502 base = get_thread_pointer (Pmode, true);
15503 dest = gen_reg_rtx (Pmode);
15504 emit_insn (ix86_gen_sub3 (dest, base, off));
15509 gcc_unreachable ();
15515 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15516 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15517 unique refptr-DECL symbol corresponding to symbol DECL. */
15519 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15521 static inline hashval_t hash (tree_map *m) { return m->hash; }
15523 equal (tree_map *a, tree_map *b)
15525 return a->base.from == b->base.from;
15529 keep_cache_entry (tree_map *&m)
15531 return ggc_marked_p (m->base.from);
15535 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15538 get_dllimport_decl (tree decl, bool beimport)
15540 struct tree_map *h, in;
15542 const char *prefix;
15543 size_t namelen, prefixlen;
15548 if (!dllimport_map)
15549 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15551 in.hash = htab_hash_pointer (decl);
15552 in.base.from = decl;
15553 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15558 *loc = h = ggc_alloc<tree_map> ();
15560 h->base.from = decl;
15561 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15562 VAR_DECL, NULL, ptr_type_node);
15563 DECL_ARTIFICIAL (to) = 1;
15564 DECL_IGNORED_P (to) = 1;
15565 DECL_EXTERNAL (to) = 1;
15566 TREE_READONLY (to) = 1;
15568 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15569 name = targetm.strip_name_encoding (name);
15571 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15572 ? "*__imp_" : "*__imp__";
15574 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15575 namelen = strlen (name);
15576 prefixlen = strlen (prefix);
15577 imp_name = (char *) alloca (namelen + prefixlen + 1);
15578 memcpy (imp_name, prefix, prefixlen);
15579 memcpy (imp_name + prefixlen, name, namelen + 1);
15581 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15582 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15583 SET_SYMBOL_REF_DECL (rtl, to);
15584 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15587 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15588 #ifdef SUB_TARGET_RECORD_STUB
15589 SUB_TARGET_RECORD_STUB (name);
15593 rtl = gen_const_mem (Pmode, rtl);
15594 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15596 SET_DECL_RTL (to, rtl);
15597 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15602 /* Expand SYMBOL into its corresponding far-addresse symbol.
15603 WANT_REG is true if we require the result be a register. */
15606 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15611 gcc_assert (SYMBOL_REF_DECL (symbol));
15612 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15614 x = DECL_RTL (imp_decl);
15616 x = force_reg (Pmode, x);
15620 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15621 true if we require the result be a register. */
15624 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15629 gcc_assert (SYMBOL_REF_DECL (symbol));
15630 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15632 x = DECL_RTL (imp_decl);
15634 x = force_reg (Pmode, x);
15638 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15639 is true if we require the result be a register. */
15642 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15644 if (!TARGET_PECOFF)
15647 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15649 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15650 return legitimize_dllimport_symbol (addr, inreg);
15651 if (GET_CODE (addr) == CONST
15652 && GET_CODE (XEXP (addr, 0)) == PLUS
15653 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15654 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15656 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15657 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15661 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15663 if (GET_CODE (addr) == SYMBOL_REF
15664 && !is_imported_p (addr)
15665 && SYMBOL_REF_EXTERNAL_P (addr)
15666 && SYMBOL_REF_DECL (addr))
15667 return legitimize_pe_coff_extern_decl (addr, inreg);
15669 if (GET_CODE (addr) == CONST
15670 && GET_CODE (XEXP (addr, 0)) == PLUS
15671 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15672 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15673 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15674 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15676 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15677 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15682 /* Try machine-dependent ways of modifying an illegitimate address
15683 to be legitimate. If we find one, return the new, valid address.
15684 This macro is used in only one place: `memory_address' in explow.c.
15686 OLDX is the address as it was before break_out_memory_refs was called.
15687 In some cases it is useful to look at this to decide what needs to be done.
15689 It is always safe for this macro to do nothing. It exists to recognize
15690 opportunities to optimize the output.
15692 For the 80386, we handle X+REG by loading X into a register R and
15693 using R+REG. R will go in a general reg and indexing will be used.
15694 However, if REG is a broken-out memory address or multiplication,
15695 nothing needs to be done because REG can certainly go in a general reg.
15697 When -fpic is used, special handling is needed for symbolic references.
15698 See comments by legitimize_pic_address in i386.c for details. */
15701 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15703 bool changed = false;
15706 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15708 return legitimize_tls_address (x, (enum tls_model) log, false);
15709 if (GET_CODE (x) == CONST
15710 && GET_CODE (XEXP (x, 0)) == PLUS
15711 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15712 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15714 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15715 (enum tls_model) log, false);
15716 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15719 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15721 rtx tmp = legitimize_pe_coff_symbol (x, true);
15726 if (flag_pic && SYMBOLIC_CONST (x))
15727 return legitimize_pic_address (x, 0);
15730 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15731 return machopic_indirect_data_reference (x, 0);
15734 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15735 if (GET_CODE (x) == ASHIFT
15736 && CONST_INT_P (XEXP (x, 1))
15737 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15740 log = INTVAL (XEXP (x, 1));
15741 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15742 GEN_INT (1 << log));
15745 if (GET_CODE (x) == PLUS)
15747 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15749 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15750 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15751 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15754 log = INTVAL (XEXP (XEXP (x, 0), 1));
15755 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15756 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15757 GEN_INT (1 << log));
15760 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15761 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15762 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15765 log = INTVAL (XEXP (XEXP (x, 1), 1));
15766 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15767 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15768 GEN_INT (1 << log));
15771 /* Put multiply first if it isn't already. */
15772 if (GET_CODE (XEXP (x, 1)) == MULT)
15774 std::swap (XEXP (x, 0), XEXP (x, 1));
15778 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15779 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15780 created by virtual register instantiation, register elimination, and
15781 similar optimizations. */
15782 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15785 x = gen_rtx_PLUS (Pmode,
15786 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15787 XEXP (XEXP (x, 1), 0)),
15788 XEXP (XEXP (x, 1), 1));
15792 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15793 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15794 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15795 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15796 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15797 && CONSTANT_P (XEXP (x, 1)))
15800 rtx other = NULL_RTX;
15802 if (CONST_INT_P (XEXP (x, 1)))
15804 constant = XEXP (x, 1);
15805 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15807 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15809 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15810 other = XEXP (x, 1);
15818 x = gen_rtx_PLUS (Pmode,
15819 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15820 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15821 plus_constant (Pmode, other,
15822 INTVAL (constant)));
15826 if (changed && ix86_legitimate_address_p (mode, x, false))
15829 if (GET_CODE (XEXP (x, 0)) == MULT)
15832 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15835 if (GET_CODE (XEXP (x, 1)) == MULT)
15838 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15842 && REG_P (XEXP (x, 1))
15843 && REG_P (XEXP (x, 0)))
15846 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15849 x = legitimize_pic_address (x, 0);
15852 if (changed && ix86_legitimate_address_p (mode, x, false))
15855 if (REG_P (XEXP (x, 0)))
15857 rtx temp = gen_reg_rtx (Pmode);
15858 rtx val = force_operand (XEXP (x, 1), temp);
15861 val = convert_to_mode (Pmode, val, 1);
15862 emit_move_insn (temp, val);
15865 XEXP (x, 1) = temp;
15869 else if (REG_P (XEXP (x, 1)))
15871 rtx temp = gen_reg_rtx (Pmode);
15872 rtx val = force_operand (XEXP (x, 0), temp);
15875 val = convert_to_mode (Pmode, val, 1);
15876 emit_move_insn (temp, val);
15879 XEXP (x, 0) = temp;
15887 /* Print an integer constant expression in assembler syntax. Addition
15888 and subtraction are the only arithmetic that may appear in these
15889 expressions. FILE is the stdio stream to write to, X is the rtx, and
15890 CODE is the operand print code from the output string. */
15893 output_pic_addr_const (FILE *file, rtx x, int code)
15897 switch (GET_CODE (x))
15900 gcc_assert (flag_pic);
15905 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15906 output_addr_const (file, x);
15909 const char *name = XSTR (x, 0);
15911 /* Mark the decl as referenced so that cgraph will
15912 output the function. */
15913 if (SYMBOL_REF_DECL (x))
15914 mark_decl_referenced (SYMBOL_REF_DECL (x));
15917 if (MACHOPIC_INDIRECT
15918 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15919 name = machopic_indirection_name (x, /*stub_p=*/true);
15921 assemble_name (file, name);
15923 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15924 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15925 fputs ("@PLT", file);
15932 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15933 assemble_name (asm_out_file, buf);
15937 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15941 /* This used to output parentheses around the expression,
15942 but that does not work on the 386 (either ATT or BSD assembler). */
15943 output_pic_addr_const (file, XEXP (x, 0), code);
15947 /* We can't handle floating point constants;
15948 TARGET_PRINT_OPERAND must handle them. */
15949 output_operand_lossage ("floating constant misused");
15953 /* Some assemblers need integer constants to appear first. */
15954 if (CONST_INT_P (XEXP (x, 0)))
15956 output_pic_addr_const (file, XEXP (x, 0), code);
15958 output_pic_addr_const (file, XEXP (x, 1), code);
15962 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15963 output_pic_addr_const (file, XEXP (x, 1), code);
15965 output_pic_addr_const (file, XEXP (x, 0), code);
15971 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15972 output_pic_addr_const (file, XEXP (x, 0), code);
15974 output_pic_addr_const (file, XEXP (x, 1), code);
15976 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15980 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15982 bool f = i386_asm_output_addr_const_extra (file, x);
15987 gcc_assert (XVECLEN (x, 0) == 1);
15988 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
15989 switch (XINT (x, 1))
15992 fputs ("@GOT", file);
15994 case UNSPEC_GOTOFF:
15995 fputs ("@GOTOFF", file);
15997 case UNSPEC_PLTOFF:
15998 fputs ("@PLTOFF", file);
16001 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16002 "(%rip)" : "[rip]", file);
16004 case UNSPEC_GOTPCREL:
16005 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16006 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16008 case UNSPEC_GOTTPOFF:
16009 /* FIXME: This might be @TPOFF in Sun ld too. */
16010 fputs ("@gottpoff", file);
16013 fputs ("@tpoff", file);
16015 case UNSPEC_NTPOFF:
16017 fputs ("@tpoff", file);
16019 fputs ("@ntpoff", file);
16021 case UNSPEC_DTPOFF:
16022 fputs ("@dtpoff", file);
16024 case UNSPEC_GOTNTPOFF:
16026 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16027 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16029 fputs ("@gotntpoff", file);
16031 case UNSPEC_INDNTPOFF:
16032 fputs ("@indntpoff", file);
16035 case UNSPEC_MACHOPIC_OFFSET:
16037 machopic_output_function_base_name (file);
16041 output_operand_lossage ("invalid UNSPEC as operand");
16047 output_operand_lossage ("invalid expression as operand");
16051 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16052 We need to emit DTP-relative relocations. */
16054 static void ATTRIBUTE_UNUSED
16055 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16057 fputs (ASM_LONG, file);
16058 output_addr_const (file, x);
16059 fputs ("@dtpoff", file);
16065 fputs (", 0", file);
16068 gcc_unreachable ();
16072 /* Return true if X is a representation of the PIC register. This copes
16073 with calls from ix86_find_base_term, where the register might have
16074 been replaced by a cselib value. */
16077 ix86_pic_register_p (rtx x)
16079 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16080 return (pic_offset_table_rtx
16081 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16082 else if (!REG_P (x))
16084 else if (pic_offset_table_rtx)
16086 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16088 if (HARD_REGISTER_P (x)
16089 && !HARD_REGISTER_P (pic_offset_table_rtx)
16090 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16095 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16098 /* Helper function for ix86_delegitimize_address.
16099 Attempt to delegitimize TLS local-exec accesses. */
16102 ix86_delegitimize_tls_address (rtx orig_x)
16104 rtx x = orig_x, unspec;
16105 struct ix86_address addr;
16107 if (!TARGET_TLS_DIRECT_SEG_REFS)
16111 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16113 if (ix86_decompose_address (x, &addr) == 0
16114 || addr.seg != DEFAULT_TLS_SEG_REG
16115 || addr.disp == NULL_RTX
16116 || GET_CODE (addr.disp) != CONST)
16118 unspec = XEXP (addr.disp, 0);
16119 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16120 unspec = XEXP (unspec, 0);
16121 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16123 x = XVECEXP (unspec, 0, 0);
16124 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16125 if (unspec != XEXP (addr.disp, 0))
16126 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16129 rtx idx = addr.index;
16130 if (addr.scale != 1)
16131 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16132 x = gen_rtx_PLUS (Pmode, idx, x);
16135 x = gen_rtx_PLUS (Pmode, addr.base, x);
16136 if (MEM_P (orig_x))
16137 x = replace_equiv_address_nv (orig_x, x);
16141 /* In the name of slightly smaller debug output, and to cater to
16142 general assembler lossage, recognize PIC+GOTOFF and turn it back
16143 into a direct symbol reference.
16145 On Darwin, this is necessary to avoid a crash, because Darwin
16146 has a different PIC label for each routine but the DWARF debugging
16147 information is not associated with any particular routine, so it's
16148 necessary to remove references to the PIC label from RTL stored by
16149 the DWARF output code. */
16152 ix86_delegitimize_address (rtx x)
16154 rtx orig_x = delegitimize_mem_from_attrs (x);
16155 /* addend is NULL or some rtx if x is something+GOTOFF where
16156 something doesn't include the PIC register. */
16157 rtx addend = NULL_RTX;
16158 /* reg_addend is NULL or a multiple of some register. */
16159 rtx reg_addend = NULL_RTX;
16160 /* const_addend is NULL or a const_int. */
16161 rtx const_addend = NULL_RTX;
16162 /* This is the result, or NULL. */
16163 rtx result = NULL_RTX;
16172 if (GET_CODE (x) == CONST
16173 && GET_CODE (XEXP (x, 0)) == PLUS
16174 && GET_MODE (XEXP (x, 0)) == Pmode
16175 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16176 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16177 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16179 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16180 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16181 if (MEM_P (orig_x))
16182 x = replace_equiv_address_nv (orig_x, x);
16186 if (GET_CODE (x) == CONST
16187 && GET_CODE (XEXP (x, 0)) == UNSPEC
16188 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16189 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16190 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16192 x = XVECEXP (XEXP (x, 0), 0, 0);
16193 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16195 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16203 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16204 return ix86_delegitimize_tls_address (orig_x);
16206 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16207 and -mcmodel=medium -fpic. */
16210 if (GET_CODE (x) != PLUS
16211 || GET_CODE (XEXP (x, 1)) != CONST)
16212 return ix86_delegitimize_tls_address (orig_x);
16214 if (ix86_pic_register_p (XEXP (x, 0)))
16215 /* %ebx + GOT/GOTOFF */
16217 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16219 /* %ebx + %reg * scale + GOT/GOTOFF */
16220 reg_addend = XEXP (x, 0);
16221 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16222 reg_addend = XEXP (reg_addend, 1);
16223 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16224 reg_addend = XEXP (reg_addend, 0);
16227 reg_addend = NULL_RTX;
16228 addend = XEXP (x, 0);
16232 addend = XEXP (x, 0);
16234 x = XEXP (XEXP (x, 1), 0);
16235 if (GET_CODE (x) == PLUS
16236 && CONST_INT_P (XEXP (x, 1)))
16238 const_addend = XEXP (x, 1);
16242 if (GET_CODE (x) == UNSPEC
16243 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16244 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16245 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16246 && !MEM_P (orig_x) && !addend)))
16247 result = XVECEXP (x, 0, 0);
16249 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16250 && !MEM_P (orig_x))
16251 result = XVECEXP (x, 0, 0);
16254 return ix86_delegitimize_tls_address (orig_x);
16257 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16259 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16262 /* If the rest of original X doesn't involve the PIC register, add
16263 addend and subtract pic_offset_table_rtx. This can happen e.g.
16265 leal (%ebx, %ecx, 4), %ecx
16267 movl foo@GOTOFF(%ecx), %edx
16268 in which case we return (%ecx - %ebx) + foo
16269 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16270 and reload has completed. */
16271 if (pic_offset_table_rtx
16272 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16273 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16274 pic_offset_table_rtx),
16276 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16278 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16279 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16280 result = gen_rtx_PLUS (Pmode, tmp, result);
16285 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16287 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16288 if (result == NULL_RTX)
16294 /* If X is a machine specific address (i.e. a symbol or label being
16295 referenced as a displacement from the GOT implemented using an
16296 UNSPEC), then return the base term. Otherwise return X. */
16299 ix86_find_base_term (rtx x)
16305 if (GET_CODE (x) != CONST)
16307 term = XEXP (x, 0);
16308 if (GET_CODE (term) == PLUS
16309 && CONST_INT_P (XEXP (term, 1)))
16310 term = XEXP (term, 0);
16311 if (GET_CODE (term) != UNSPEC
16312 || (XINT (term, 1) != UNSPEC_GOTPCREL
16313 && XINT (term, 1) != UNSPEC_PCREL))
16316 return XVECEXP (term, 0, 0);
16319 return ix86_delegitimize_address (x);
16323 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16324 bool fp, FILE *file)
16326 const char *suffix;
16328 if (mode == CCFPmode || mode == CCFPUmode)
16330 code = ix86_fp_compare_code_to_integer (code);
16334 code = reverse_condition (code);
16385 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16389 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16390 Those same assemblers have the same but opposite lossage on cmov. */
16391 if (mode == CCmode)
16392 suffix = fp ? "nbe" : "a";
16394 gcc_unreachable ();
16410 gcc_unreachable ();
16414 if (mode == CCmode)
16416 else if (mode == CCCmode)
16417 suffix = fp ? "b" : "c";
16419 gcc_unreachable ();
16435 gcc_unreachable ();
16439 if (mode == CCmode)
16441 else if (mode == CCCmode)
16442 suffix = fp ? "nb" : "nc";
16444 gcc_unreachable ();
16447 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16451 if (mode == CCmode)
16454 gcc_unreachable ();
16457 suffix = fp ? "u" : "p";
16460 suffix = fp ? "nu" : "np";
16463 gcc_unreachable ();
16465 fputs (suffix, file);
16468 /* Print the name of register X to FILE based on its machine mode and number.
16469 If CODE is 'w', pretend the mode is HImode.
16470 If CODE is 'b', pretend the mode is QImode.
16471 If CODE is 'k', pretend the mode is SImode.
16472 If CODE is 'q', pretend the mode is DImode.
16473 If CODE is 'x', pretend the mode is V4SFmode.
16474 If CODE is 't', pretend the mode is V8SFmode.
16475 If CODE is 'g', pretend the mode is V16SFmode.
16476 If CODE is 'h', pretend the reg is the 'high' byte register.
16477 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16478 If CODE is 'd', duplicate the operand for AVX instruction.
16482 print_reg (rtx x, int code, FILE *file)
16486 unsigned int regno;
16489 if (ASSEMBLER_DIALECT == ASM_ATT)
16494 gcc_assert (TARGET_64BIT);
16495 fputs ("rip", file);
16499 if (code == 'y' && STACK_TOP_P (x))
16501 fputs ("st(0)", file);
16507 else if (code == 'b')
16509 else if (code == 'k')
16511 else if (code == 'q')
16513 else if (code == 'h')
16515 else if (code == 'x')
16517 else if (code == 't')
16519 else if (code == 'g')
16522 msize = GET_MODE_SIZE (GET_MODE (x));
16524 regno = true_regnum (x);
16526 gcc_assert (regno != ARG_POINTER_REGNUM
16527 && regno != FRAME_POINTER_REGNUM
16528 && regno != FLAGS_REG
16529 && regno != FPSR_REG
16530 && regno != FPCR_REG);
16532 duplicated = code == 'd' && TARGET_AVX;
16538 if (LEGACY_INT_REGNO_P (regno))
16539 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16544 reg = hi_reg_name[regno];
16547 if (regno >= ARRAY_SIZE (qi_reg_name))
16549 reg = qi_reg_name[regno];
16552 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16554 reg = qi_high_reg_name[regno];
16558 if (SSE_REGNO_P (regno))
16560 gcc_assert (!duplicated);
16561 putc (msize == 32 ? 'y' : 'z', file);
16562 reg = hi_reg_name[regno] + 1;
16567 gcc_unreachable ();
16572 /* Irritatingly, AMD extended registers use
16573 different naming convention: "r%d[bwd]" */
16574 if (REX_INT_REGNO_P (regno))
16576 gcc_assert (TARGET_64BIT);
16580 error ("extended registers have no high halves");
16595 error ("unsupported operand size for extended register");
16603 if (ASSEMBLER_DIALECT == ASM_ATT)
16604 fprintf (file, ", %%%s", reg);
16606 fprintf (file, ", %s", reg);
16610 /* Meaning of CODE:
16611 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16612 C -- print opcode suffix for set/cmov insn.
16613 c -- like C, but print reversed condition
16614 F,f -- likewise, but for floating-point.
16615 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16617 R -- print embeded rounding and sae.
16618 r -- print only sae.
16619 z -- print the opcode suffix for the size of the current operand.
16620 Z -- likewise, with special suffixes for x87 instructions.
16621 * -- print a star (in certain assembler syntax)
16622 A -- print an absolute memory reference.
16623 E -- print address with DImode register names if TARGET_64BIT.
16624 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16625 s -- print a shift double count, followed by the assemblers argument
16627 b -- print the QImode name of the register for the indicated operand.
16628 %b0 would print %al if operands[0] is reg 0.
16629 w -- likewise, print the HImode name of the register.
16630 k -- likewise, print the SImode name of the register.
16631 q -- likewise, print the DImode name of the register.
16632 x -- likewise, print the V4SFmode name of the register.
16633 t -- likewise, print the V8SFmode name of the register.
16634 g -- likewise, print the V16SFmode name of the register.
16635 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16636 y -- print "st(0)" instead of "st" as a register.
16637 d -- print duplicated register operand for AVX instruction.
16638 D -- print condition for SSE cmp instruction.
16639 P -- if PIC, print an @PLT suffix.
16640 p -- print raw symbol name.
16641 X -- don't print any sort of PIC '@' suffix for a symbol.
16642 & -- print some in-use local-dynamic symbol name.
16643 H -- print a memory address offset by 8; used for sse high-parts
16644 Y -- print condition for XOP pcom* instruction.
16645 + -- print a branch hint as 'cs' or 'ds' prefix
16646 ; -- print a semicolon (after prefixes due to bug in older gas).
16647 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16648 @ -- print a segment register of thread base pointer load
16649 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16650 ! -- print MPX prefix for jxx/call/ret instructions if required.
16654 ix86_print_operand (FILE *file, rtx x, int code)
16661 switch (ASSEMBLER_DIALECT)
16668 /* Intel syntax. For absolute addresses, registers should not
16669 be surrounded by braces. */
16673 ix86_print_operand (file, x, 0);
16680 gcc_unreachable ();
16683 ix86_print_operand (file, x, 0);
16687 /* Wrap address in an UNSPEC to declare special handling. */
16689 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16691 output_address (VOIDmode, x);
16695 if (ASSEMBLER_DIALECT == ASM_ATT)
16700 if (ASSEMBLER_DIALECT == ASM_ATT)
16705 if (ASSEMBLER_DIALECT == ASM_ATT)
16710 if (ASSEMBLER_DIALECT == ASM_ATT)
16715 if (ASSEMBLER_DIALECT == ASM_ATT)
16720 if (ASSEMBLER_DIALECT == ASM_ATT)
16725 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16726 if (ASSEMBLER_DIALECT != ASM_ATT)
16729 switch (GET_MODE_SIZE (GET_MODE (x)))
16744 output_operand_lossage
16745 ("invalid operand size for operand code 'O'");
16754 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16756 /* Opcodes don't get size suffixes if using Intel opcodes. */
16757 if (ASSEMBLER_DIALECT == ASM_INTEL)
16760 switch (GET_MODE_SIZE (GET_MODE (x)))
16779 output_operand_lossage
16780 ("invalid operand size for operand code 'z'");
16785 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16787 (0, "non-integer operand used with operand code 'z'");
16791 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16792 if (ASSEMBLER_DIALECT == ASM_INTEL)
16795 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16797 switch (GET_MODE_SIZE (GET_MODE (x)))
16800 #ifdef HAVE_AS_IX86_FILDS
16810 #ifdef HAVE_AS_IX86_FILDQ
16813 fputs ("ll", file);
16821 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16823 /* 387 opcodes don't get size suffixes
16824 if the operands are registers. */
16825 if (STACK_REG_P (x))
16828 switch (GET_MODE_SIZE (GET_MODE (x)))
16849 output_operand_lossage
16850 ("invalid operand type used with operand code 'Z'");
16854 output_operand_lossage
16855 ("invalid operand size for operand code 'Z'");
16874 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16876 ix86_print_operand (file, x, 0);
16877 fputs (", ", file);
16882 switch (GET_CODE (x))
16885 fputs ("neq", file);
16888 fputs ("eq", file);
16892 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16896 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16900 fputs ("le", file);
16904 fputs ("lt", file);
16907 fputs ("unord", file);
16910 fputs ("ord", file);
16913 fputs ("ueq", file);
16916 fputs ("nlt", file);
16919 fputs ("nle", file);
16922 fputs ("ule", file);
16925 fputs ("ult", file);
16928 fputs ("une", file);
16931 output_operand_lossage ("operand is not a condition code, "
16932 "invalid operand code 'Y'");
16938 /* Little bit of braindamage here. The SSE compare instructions
16939 does use completely different names for the comparisons that the
16940 fp conditional moves. */
16941 switch (GET_CODE (x))
16946 fputs ("eq_us", file);
16950 fputs ("eq", file);
16955 fputs ("nge", file);
16959 fputs ("lt", file);
16964 fputs ("ngt", file);
16968 fputs ("le", file);
16971 fputs ("unord", file);
16976 fputs ("neq_oq", file);
16980 fputs ("neq", file);
16985 fputs ("ge", file);
16989 fputs ("nlt", file);
16994 fputs ("gt", file);
16998 fputs ("nle", file);
17001 fputs ("ord", file);
17004 output_operand_lossage ("operand is not a condition code, "
17005 "invalid operand code 'D'");
17012 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17013 if (ASSEMBLER_DIALECT == ASM_ATT)
17019 if (!COMPARISON_P (x))
17021 output_operand_lossage ("operand is not a condition code, "
17022 "invalid operand code '%c'", code);
17025 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17026 code == 'c' || code == 'f',
17027 code == 'F' || code == 'f',
17032 if (!offsettable_memref_p (x))
17034 output_operand_lossage ("operand is not an offsettable memory "
17035 "reference, invalid operand code 'H'");
17038 /* It doesn't actually matter what mode we use here, as we're
17039 only going to use this for printing. */
17040 x = adjust_address_nv (x, DImode, 8);
17041 /* Output 'qword ptr' for intel assembler dialect. */
17042 if (ASSEMBLER_DIALECT == ASM_INTEL)
17047 gcc_assert (CONST_INT_P (x));
17049 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17050 #ifdef HAVE_AS_IX86_HLE
17051 fputs ("xacquire ", file);
17053 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17055 else if (INTVAL (x) & IX86_HLE_RELEASE)
17056 #ifdef HAVE_AS_IX86_HLE
17057 fputs ("xrelease ", file);
17059 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17061 /* We do not want to print value of the operand. */
17065 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17066 fputs ("{z}", file);
17070 gcc_assert (CONST_INT_P (x));
17071 gcc_assert (INTVAL (x) == ROUND_SAE);
17073 if (ASSEMBLER_DIALECT == ASM_INTEL)
17074 fputs (", ", file);
17076 fputs ("{sae}", file);
17078 if (ASSEMBLER_DIALECT == ASM_ATT)
17079 fputs (", ", file);
17084 gcc_assert (CONST_INT_P (x));
17086 if (ASSEMBLER_DIALECT == ASM_INTEL)
17087 fputs (", ", file);
17089 switch (INTVAL (x))
17091 case ROUND_NEAREST_INT | ROUND_SAE:
17092 fputs ("{rn-sae}", file);
17094 case ROUND_NEG_INF | ROUND_SAE:
17095 fputs ("{rd-sae}", file);
17097 case ROUND_POS_INF | ROUND_SAE:
17098 fputs ("{ru-sae}", file);
17100 case ROUND_ZERO | ROUND_SAE:
17101 fputs ("{rz-sae}", file);
17104 gcc_unreachable ();
17107 if (ASSEMBLER_DIALECT == ASM_ATT)
17108 fputs (", ", file);
17113 if (ASSEMBLER_DIALECT == ASM_ATT)
17119 const char *name = get_some_local_dynamic_name ();
17121 output_operand_lossage ("'%%&' used without any "
17122 "local dynamic TLS references");
17124 assemble_name (file, name);
17133 || optimize_function_for_size_p (cfun)
17134 || !TARGET_BRANCH_PREDICTION_HINTS)
17137 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17140 int pred_val = XINT (x, 0);
17142 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17143 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17145 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17147 = final_forward_branch_p (current_output_insn) == 0;
17149 /* Emit hints only in the case default branch prediction
17150 heuristics would fail. */
17151 if (taken != cputaken)
17153 /* We use 3e (DS) prefix for taken branches and
17154 2e (CS) prefix for not taken branches. */
17156 fputs ("ds ; ", file);
17158 fputs ("cs ; ", file);
17166 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17172 if (ASSEMBLER_DIALECT == ASM_ATT)
17175 /* The kernel uses a different segment register for performance
17176 reasons; a system call would not have to trash the userspace
17177 segment register, which would be expensive. */
17178 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17179 fputs ("fs", file);
17181 fputs ("gs", file);
17185 putc (TARGET_AVX2 ? 'i' : 'f', file);
17189 if (TARGET_64BIT && Pmode != word_mode)
17190 fputs ("addr32 ", file);
17194 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17195 fputs ("bnd ", file);
17199 output_operand_lossage ("invalid operand code '%c'", code);
17204 print_reg (x, code, file);
17206 else if (MEM_P (x))
17208 rtx addr = XEXP (x, 0);
17210 /* No `byte ptr' prefix for call instructions ... */
17211 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17213 machine_mode mode = GET_MODE (x);
17216 /* Check for explicit size override codes. */
17219 else if (code == 'w')
17221 else if (code == 'k')
17223 else if (code == 'q')
17225 else if (code == 'x')
17227 else if (mode == BLKmode)
17228 /* ... or BLKmode operands, when not overridden. */
17231 switch (GET_MODE_SIZE (mode))
17233 case 1: size = "BYTE"; break;
17234 case 2: size = "WORD"; break;
17235 case 4: size = "DWORD"; break;
17236 case 8: size = "QWORD"; break;
17237 case 12: size = "TBYTE"; break;
17239 if (mode == XFmode)
17244 case 32: size = "YMMWORD"; break;
17245 case 64: size = "ZMMWORD"; break;
17247 gcc_unreachable ();
17251 fputs (size, file);
17252 fputs (" PTR ", file);
17256 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17257 output_operand_lossage ("invalid constraints for operand");
17259 ix86_print_operand_address_as
17260 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17263 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17267 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17269 if (ASSEMBLER_DIALECT == ASM_ATT)
17271 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17273 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17274 (unsigned long long) (int) l);
17276 fprintf (file, "0x%08x", (unsigned int) l);
17279 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17283 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17285 if (ASSEMBLER_DIALECT == ASM_ATT)
17287 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17290 /* These float cases don't actually occur as immediate operands. */
17291 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17295 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17296 fputs (dstr, file);
17301 /* We have patterns that allow zero sets of memory, for instance.
17302 In 64-bit mode, we should probably support all 8-byte vectors,
17303 since we can in fact encode that into an immediate. */
17304 if (GET_CODE (x) == CONST_VECTOR)
17306 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17310 if (code != 'P' && code != 'p')
17312 if (CONST_INT_P (x))
17314 if (ASSEMBLER_DIALECT == ASM_ATT)
17317 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17318 || GET_CODE (x) == LABEL_REF)
17320 if (ASSEMBLER_DIALECT == ASM_ATT)
17323 fputs ("OFFSET FLAT:", file);
17326 if (CONST_INT_P (x))
17327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17328 else if (flag_pic || MACHOPIC_INDIRECT)
17329 output_pic_addr_const (file, x, code);
17331 output_addr_const (file, x);
17336 ix86_print_operand_punct_valid_p (unsigned char code)
17338 return (code == '@' || code == '*' || code == '+' || code == '&'
17339 || code == ';' || code == '~' || code == '^' || code == '!');
17342 /* Print a memory operand whose address is ADDR. */
17345 ix86_print_operand_address_as (FILE *file, rtx addr,
17346 addr_space_t as, bool no_rip)
17348 struct ix86_address parts;
17349 rtx base, index, disp;
17355 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17357 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17358 gcc_assert (parts.index == NULL_RTX);
17359 parts.index = XVECEXP (addr, 0, 1);
17360 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17361 addr = XVECEXP (addr, 0, 0);
17364 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17366 gcc_assert (TARGET_64BIT);
17367 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17370 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17372 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17373 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17374 if (parts.base != NULL_RTX)
17376 parts.index = parts.base;
17379 parts.base = XVECEXP (addr, 0, 0);
17380 addr = XVECEXP (addr, 0, 0);
17382 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17384 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17385 gcc_assert (parts.index == NULL_RTX);
17386 parts.index = XVECEXP (addr, 0, 1);
17387 addr = XVECEXP (addr, 0, 0);
17390 ok = ix86_decompose_address (addr, &parts);
17395 index = parts.index;
17397 scale = parts.scale;
17399 if (ADDR_SPACE_GENERIC_P (as))
17402 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17404 if (!ADDR_SPACE_GENERIC_P (as))
17406 const char *string;
17408 if (as == ADDR_SPACE_SEG_FS)
17409 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17410 else if (as == ADDR_SPACE_SEG_GS)
17411 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17413 gcc_unreachable ();
17414 fputs (string, file);
17417 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17418 if (TARGET_64BIT && !base && !index && !no_rip)
17422 if (GET_CODE (disp) == CONST
17423 && GET_CODE (XEXP (disp, 0)) == PLUS
17424 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17425 symbol = XEXP (XEXP (disp, 0), 0);
17427 if (GET_CODE (symbol) == LABEL_REF
17428 || (GET_CODE (symbol) == SYMBOL_REF
17429 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17433 if (!base && !index)
17435 /* Displacement only requires special attention. */
17436 if (CONST_INT_P (disp))
17438 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
17439 fputs ("ds:", file);
17440 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17443 output_pic_addr_const (file, disp, 0);
17445 output_addr_const (file, disp);
17449 /* Print SImode register names to force addr32 prefix. */
17450 if (SImode_address_operand (addr, VOIDmode))
17454 gcc_assert (TARGET_64BIT);
17455 switch (GET_CODE (addr))
17458 gcc_assert (GET_MODE (addr) == SImode);
17459 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17463 gcc_assert (GET_MODE (addr) == DImode);
17466 gcc_unreachable ();
17469 gcc_assert (!code);
17475 && CONST_INT_P (disp)
17476 && INTVAL (disp) < -16*1024*1024)
17478 /* X32 runs in 64-bit mode, where displacement, DISP, in
17479 address DISP(%r64), is encoded as 32-bit immediate sign-
17480 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17481 address is %r64 + 0xffffffffbffffd00. When %r64 <
17482 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17483 which is invalid for x32. The correct address is %r64
17484 - 0x40000300 == 0xf7ffdd64. To properly encode
17485 -0x40000300(%r64) for x32, we zero-extend negative
17486 displacement by forcing addr32 prefix which truncates
17487 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17488 zero-extend all negative displacements, including -1(%rsp).
17489 However, for small negative displacements, sign-extension
17490 won't cause overflow. We only zero-extend negative
17491 displacements if they < -16*1024*1024, which is also used
17492 to check legitimate address displacements for PIC. */
17496 if (ASSEMBLER_DIALECT == ASM_ATT)
17501 output_pic_addr_const (file, disp, 0);
17502 else if (GET_CODE (disp) == LABEL_REF)
17503 output_asm_label (disp);
17505 output_addr_const (file, disp);
17510 print_reg (base, code, file);
17514 print_reg (index, vsib ? 0 : code, file);
17515 if (scale != 1 || vsib)
17516 fprintf (file, ",%d", scale);
17522 rtx offset = NULL_RTX;
17526 /* Pull out the offset of a symbol; print any symbol itself. */
17527 if (GET_CODE (disp) == CONST
17528 && GET_CODE (XEXP (disp, 0)) == PLUS
17529 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17531 offset = XEXP (XEXP (disp, 0), 1);
17532 disp = gen_rtx_CONST (VOIDmode,
17533 XEXP (XEXP (disp, 0), 0));
17537 output_pic_addr_const (file, disp, 0);
17538 else if (GET_CODE (disp) == LABEL_REF)
17539 output_asm_label (disp);
17540 else if (CONST_INT_P (disp))
17543 output_addr_const (file, disp);
17549 print_reg (base, code, file);
17552 if (INTVAL (offset) >= 0)
17554 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17565 print_reg (index, vsib ? 0 : code, file);
17566 if (scale != 1 || vsib)
17567 fprintf (file, "*%d", scale);
17575 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
17577 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
17580 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17583 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17587 if (GET_CODE (x) != UNSPEC)
17590 op = XVECEXP (x, 0, 0);
17591 switch (XINT (x, 1))
17593 case UNSPEC_GOTTPOFF:
17594 output_addr_const (file, op);
17595 /* FIXME: This might be @TPOFF in Sun ld. */
17596 fputs ("@gottpoff", file);
17599 output_addr_const (file, op);
17600 fputs ("@tpoff", file);
17602 case UNSPEC_NTPOFF:
17603 output_addr_const (file, op);
17605 fputs ("@tpoff", file);
17607 fputs ("@ntpoff", file);
17609 case UNSPEC_DTPOFF:
17610 output_addr_const (file, op);
17611 fputs ("@dtpoff", file);
17613 case UNSPEC_GOTNTPOFF:
17614 output_addr_const (file, op);
17616 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17617 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17619 fputs ("@gotntpoff", file);
17621 case UNSPEC_INDNTPOFF:
17622 output_addr_const (file, op);
17623 fputs ("@indntpoff", file);
17626 case UNSPEC_MACHOPIC_OFFSET:
17627 output_addr_const (file, op);
17629 machopic_output_function_base_name (file);
17633 case UNSPEC_STACK_CHECK:
17637 gcc_assert (flag_split_stack);
17639 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17640 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17642 gcc_unreachable ();
17645 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17656 /* Split one or more double-mode RTL references into pairs of half-mode
17657 references. The RTL can be REG, offsettable MEM, integer constant, or
17658 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17659 split and "num" is its length. lo_half and hi_half are output arrays
17660 that parallel "operands". */
17663 split_double_mode (machine_mode mode, rtx operands[],
17664 int num, rtx lo_half[], rtx hi_half[])
17666 machine_mode half_mode;
17672 half_mode = DImode;
17675 half_mode = SImode;
17678 gcc_unreachable ();
17681 byte = GET_MODE_SIZE (half_mode);
17685 rtx op = operands[num];
17687 /* simplify_subreg refuse to split volatile memory addresses,
17688 but we still have to handle it. */
17691 lo_half[num] = adjust_address (op, half_mode, 0);
17692 hi_half[num] = adjust_address (op, half_mode, byte);
17696 lo_half[num] = simplify_gen_subreg (half_mode, op,
17697 GET_MODE (op) == VOIDmode
17698 ? mode : GET_MODE (op), 0);
17699 hi_half[num] = simplify_gen_subreg (half_mode, op,
17700 GET_MODE (op) == VOIDmode
17701 ? mode : GET_MODE (op), byte);
17706 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17707 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17708 is the expression of the binary operation. The output may either be
17709 emitted here, or returned to the caller, like all output_* functions.
17711 There is no guarantee that the operands are the same mode, as they
17712 might be within FLOAT or FLOAT_EXTEND expressions. */
17714 #ifndef SYSV386_COMPAT
17715 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17716 wants to fix the assemblers because that causes incompatibility
17717 with gcc. No-one wants to fix gcc because that causes
17718 incompatibility with assemblers... You can use the option of
17719 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17720 #define SYSV386_COMPAT 1
17724 output_387_binary_op (rtx insn, rtx *operands)
17726 static char buf[40];
17729 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17731 /* Even if we do not want to check the inputs, this documents input
17732 constraints. Which helps in understanding the following code. */
17735 if (STACK_REG_P (operands[0])
17736 && ((REG_P (operands[1])
17737 && REGNO (operands[0]) == REGNO (operands[1])
17738 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17739 || (REG_P (operands[2])
17740 && REGNO (operands[0]) == REGNO (operands[2])
17741 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17742 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17745 gcc_assert (is_sse);
17748 switch (GET_CODE (operands[3]))
17751 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17752 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17760 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17761 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17769 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17770 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17778 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17779 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17787 gcc_unreachable ();
17794 strcpy (buf, ssep);
17795 if (GET_MODE (operands[0]) == SFmode)
17796 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17798 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17802 strcpy (buf, ssep + 1);
17803 if (GET_MODE (operands[0]) == SFmode)
17804 strcat (buf, "ss\t{%2, %0|%0, %2}");
17806 strcat (buf, "sd\t{%2, %0|%0, %2}");
17812 switch (GET_CODE (operands[3]))
17816 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17817 std::swap (operands[1], operands[2]);
17819 /* know operands[0] == operands[1]. */
17821 if (MEM_P (operands[2]))
17827 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17829 if (STACK_TOP_P (operands[0]))
17830 /* How is it that we are storing to a dead operand[2]?
17831 Well, presumably operands[1] is dead too. We can't
17832 store the result to st(0) as st(0) gets popped on this
17833 instruction. Instead store to operands[2] (which I
17834 think has to be st(1)). st(1) will be popped later.
17835 gcc <= 2.8.1 didn't have this check and generated
17836 assembly code that the Unixware assembler rejected. */
17837 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17839 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17843 if (STACK_TOP_P (operands[0]))
17844 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17846 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17851 if (MEM_P (operands[1]))
17857 if (MEM_P (operands[2]))
17863 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17866 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17867 derived assemblers, confusingly reverse the direction of
17868 the operation for fsub{r} and fdiv{r} when the
17869 destination register is not st(0). The Intel assembler
17870 doesn't have this brain damage. Read !SYSV386_COMPAT to
17871 figure out what the hardware really does. */
17872 if (STACK_TOP_P (operands[0]))
17873 p = "{p\t%0, %2|rp\t%2, %0}";
17875 p = "{rp\t%2, %0|p\t%0, %2}";
17877 if (STACK_TOP_P (operands[0]))
17878 /* As above for fmul/fadd, we can't store to st(0). */
17879 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17881 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17886 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17889 if (STACK_TOP_P (operands[0]))
17890 p = "{rp\t%0, %1|p\t%1, %0}";
17892 p = "{p\t%1, %0|rp\t%0, %1}";
17894 if (STACK_TOP_P (operands[0]))
17895 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17897 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17902 if (STACK_TOP_P (operands[0]))
17904 if (STACK_TOP_P (operands[1]))
17905 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17907 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17910 else if (STACK_TOP_P (operands[1]))
17913 p = "{\t%1, %0|r\t%0, %1}";
17915 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17921 p = "{r\t%2, %0|\t%0, %2}";
17923 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17929 gcc_unreachable ();
17936 /* Check if a 256bit AVX register is referenced inside of EXP. */
17939 ix86_check_avx256_register (const_rtx exp)
17941 if (SUBREG_P (exp))
17942 exp = SUBREG_REG (exp);
17944 return (REG_P (exp)
17945 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17948 /* Return needed mode for entity in optimize_mode_switching pass. */
17951 ix86_avx_u128_mode_needed (rtx_insn *insn)
17957 /* Needed mode is set to AVX_U128_CLEAN if there are
17958 no 256bit modes used in function arguments. */
17959 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17961 link = XEXP (link, 1))
17963 if (GET_CODE (XEXP (link, 0)) == USE)
17965 rtx arg = XEXP (XEXP (link, 0), 0);
17967 if (ix86_check_avx256_register (arg))
17968 return AVX_U128_DIRTY;
17972 return AVX_U128_CLEAN;
17975 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17976 changes state only when a 256bit register is written to, but we need
17977 to prevent the compiler from moving optimal insertion point above
17978 eventual read from 256bit register. */
17979 subrtx_iterator::array_type array;
17980 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17981 if (ix86_check_avx256_register (*iter))
17982 return AVX_U128_DIRTY;
17984 return AVX_U128_ANY;
17987 /* Return mode that i387 must be switched into
17988 prior to the execution of insn. */
17991 ix86_i387_mode_needed (int entity, rtx_insn *insn)
17993 enum attr_i387_cw mode;
17995 /* The mode UNINITIALIZED is used to store control word after a
17996 function call or ASM pattern. The mode ANY specify that function
17997 has no requirements on the control word and make no changes in the
17998 bits we are interested in. */
18001 || (NONJUMP_INSN_P (insn)
18002 && (asm_noperands (PATTERN (insn)) >= 0
18003 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18004 return I387_CW_UNINITIALIZED;
18006 if (recog_memoized (insn) < 0)
18007 return I387_CW_ANY;
18009 mode = get_attr_i387_cw (insn);
18014 if (mode == I387_CW_TRUNC)
18019 if (mode == I387_CW_FLOOR)
18024 if (mode == I387_CW_CEIL)
18029 if (mode == I387_CW_MASK_PM)
18034 gcc_unreachable ();
18037 return I387_CW_ANY;
18040 /* Return mode that entity must be switched into
18041 prior to the execution of insn. */
18044 ix86_mode_needed (int entity, rtx_insn *insn)
18049 return ix86_avx_u128_mode_needed (insn);
18054 return ix86_i387_mode_needed (entity, insn);
18056 gcc_unreachable ();
18061 /* Check if a 256bit AVX register is referenced in stores. */
18064 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18066 if (ix86_check_avx256_register (dest))
18068 bool *used = (bool *) data;
18073 /* Calculate mode of upper 128bit AVX registers after the insn. */
18076 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18078 rtx pat = PATTERN (insn);
18080 if (vzeroupper_operation (pat, VOIDmode)
18081 || vzeroall_operation (pat, VOIDmode))
18082 return AVX_U128_CLEAN;
18084 /* We know that state is clean after CALL insn if there are no
18085 256bit registers used in the function return register. */
18088 bool avx_reg256_found = false;
18089 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18091 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18094 /* Otherwise, return current mode. Remember that if insn
18095 references AVX 256bit registers, the mode was already changed
18096 to DIRTY from MODE_NEEDED. */
18100 /* Return the mode that an insn results in. */
18103 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18108 return ix86_avx_u128_mode_after (mode, insn);
18115 gcc_unreachable ();
18120 ix86_avx_u128_mode_entry (void)
18124 /* Entry mode is set to AVX_U128_DIRTY if there are
18125 256bit modes used in function arguments. */
18126 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18127 arg = TREE_CHAIN (arg))
18129 rtx incoming = DECL_INCOMING_RTL (arg);
18131 if (incoming && ix86_check_avx256_register (incoming))
18132 return AVX_U128_DIRTY;
18135 return AVX_U128_CLEAN;
18138 /* Return a mode that ENTITY is assumed to be
18139 switched to at function entry. */
18142 ix86_mode_entry (int entity)
18147 return ix86_avx_u128_mode_entry ();
18152 return I387_CW_ANY;
18154 gcc_unreachable ();
18159 ix86_avx_u128_mode_exit (void)
18161 rtx reg = crtl->return_rtx;
18163 /* Exit mode is set to AVX_U128_DIRTY if there are
18164 256bit modes used in the function return register. */
18165 if (reg && ix86_check_avx256_register (reg))
18166 return AVX_U128_DIRTY;
18168 return AVX_U128_CLEAN;
18171 /* Return a mode that ENTITY is assumed to be
18172 switched to at function exit. */
18175 ix86_mode_exit (int entity)
18180 return ix86_avx_u128_mode_exit ();
18185 return I387_CW_ANY;
18187 gcc_unreachable ();
18192 ix86_mode_priority (int, int n)
18197 /* Output code to initialize control word copies used by trunc?f?i and
18198 rounding patterns. CURRENT_MODE is set to current control word,
18199 while NEW_MODE is set to new control word. */
18202 emit_i387_cw_initialization (int mode)
18204 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18207 enum ix86_stack_slot slot;
18209 rtx reg = gen_reg_rtx (HImode);
18211 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18212 emit_move_insn (reg, copy_rtx (stored_mode));
18214 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18215 || optimize_insn_for_size_p ())
18219 case I387_CW_TRUNC:
18220 /* round toward zero (truncate) */
18221 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18222 slot = SLOT_CW_TRUNC;
18225 case I387_CW_FLOOR:
18226 /* round down toward -oo */
18227 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18228 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18229 slot = SLOT_CW_FLOOR;
18233 /* round up toward +oo */
18234 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18235 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18236 slot = SLOT_CW_CEIL;
18239 case I387_CW_MASK_PM:
18240 /* mask precision exception for nearbyint() */
18241 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18242 slot = SLOT_CW_MASK_PM;
18246 gcc_unreachable ();
18253 case I387_CW_TRUNC:
18254 /* round toward zero (truncate) */
18255 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18256 slot = SLOT_CW_TRUNC;
18259 case I387_CW_FLOOR:
18260 /* round down toward -oo */
18261 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18262 slot = SLOT_CW_FLOOR;
18266 /* round up toward +oo */
18267 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18268 slot = SLOT_CW_CEIL;
18271 case I387_CW_MASK_PM:
18272 /* mask precision exception for nearbyint() */
18273 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18274 slot = SLOT_CW_MASK_PM;
18278 gcc_unreachable ();
18282 gcc_assert (slot < MAX_386_STACK_LOCALS);
18284 new_mode = assign_386_stack_local (HImode, slot);
18285 emit_move_insn (new_mode, reg);
18288 /* Emit vzeroupper. */
18291 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18295 /* Cancel automatic vzeroupper insertion if there are
18296 live call-saved SSE registers at the insertion point. */
18298 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18299 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18303 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18304 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18307 emit_insn (gen_avx_vzeroupper ());
18310 /* Generate one or more insns to set ENTITY to MODE. */
18312 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18313 is the set of hard registers live at the point where the insn(s)
18314 are to be inserted. */
18317 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18318 HARD_REG_SET regs_live)
18323 if (mode == AVX_U128_CLEAN)
18324 ix86_avx_emit_vzeroupper (regs_live);
18330 if (mode != I387_CW_ANY
18331 && mode != I387_CW_UNINITIALIZED)
18332 emit_i387_cw_initialization (mode);
18335 gcc_unreachable ();
18339 /* Output code for INSN to convert a float to a signed int. OPERANDS
18340 are the insn operands. The output may be [HSD]Imode and the input
18341 operand may be [SDX]Fmode. */
18344 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18346 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18347 int dimode_p = GET_MODE (operands[0]) == DImode;
18348 int round_mode = get_attr_i387_cw (insn);
18350 /* Jump through a hoop or two for DImode, since the hardware has no
18351 non-popping instruction. We used to do this a different way, but
18352 that was somewhat fragile and broke with post-reload splitters. */
18353 if ((dimode_p || fisttp) && !stack_top_dies)
18354 output_asm_insn ("fld\t%y1", operands);
18356 gcc_assert (STACK_TOP_P (operands[1]));
18357 gcc_assert (MEM_P (operands[0]));
18358 gcc_assert (GET_MODE (operands[1]) != TFmode);
18361 output_asm_insn ("fisttp%Z0\t%0", operands);
18364 if (round_mode != I387_CW_ANY)
18365 output_asm_insn ("fldcw\t%3", operands);
18366 if (stack_top_dies || dimode_p)
18367 output_asm_insn ("fistp%Z0\t%0", operands);
18369 output_asm_insn ("fist%Z0\t%0", operands);
18370 if (round_mode != I387_CW_ANY)
18371 output_asm_insn ("fldcw\t%2", operands);
18377 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18378 have the values zero or one, indicates the ffreep insn's operand
18379 from the OPERANDS array. */
18381 static const char *
18382 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18384 if (TARGET_USE_FFREEP)
18385 #ifdef HAVE_AS_IX86_FFREEP
18386 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18389 static char retval[32];
18390 int regno = REGNO (operands[opno]);
18392 gcc_assert (STACK_REGNO_P (regno));
18394 regno -= FIRST_STACK_REG;
18396 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18401 return opno ? "fstp\t%y1" : "fstp\t%y0";
18405 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18406 should be used. UNORDERED_P is true when fucom should be used. */
18409 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18411 int stack_top_dies;
18412 rtx cmp_op0, cmp_op1;
18413 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18417 cmp_op0 = operands[0];
18418 cmp_op1 = operands[1];
18422 cmp_op0 = operands[1];
18423 cmp_op1 = operands[2];
18428 if (GET_MODE (operands[0]) == SFmode)
18430 return "%vucomiss\t{%1, %0|%0, %1}";
18432 return "%vcomiss\t{%1, %0|%0, %1}";
18435 return "%vucomisd\t{%1, %0|%0, %1}";
18437 return "%vcomisd\t{%1, %0|%0, %1}";
18440 gcc_assert (STACK_TOP_P (cmp_op0));
18442 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18444 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18446 if (stack_top_dies)
18448 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18449 return output_387_ffreep (operands, 1);
18452 return "ftst\n\tfnstsw\t%0";
18455 if (STACK_REG_P (cmp_op1)
18457 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18458 && REGNO (cmp_op1) != FIRST_STACK_REG)
18460 /* If both the top of the 387 stack dies, and the other operand
18461 is also a stack register that dies, then this must be a
18462 `fcompp' float compare */
18466 /* There is no double popping fcomi variant. Fortunately,
18467 eflags is immune from the fstp's cc clobbering. */
18469 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18471 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18472 return output_387_ffreep (operands, 0);
18477 return "fucompp\n\tfnstsw\t%0";
18479 return "fcompp\n\tfnstsw\t%0";
18484 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18486 static const char * const alt[16] =
18488 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18489 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18490 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18491 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18493 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18494 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18498 "fcomi\t{%y1, %0|%0, %y1}",
18499 "fcomip\t{%y1, %0|%0, %y1}",
18500 "fucomi\t{%y1, %0|%0, %y1}",
18501 "fucomip\t{%y1, %0|%0, %y1}",
18512 mask = eflags_p << 3;
18513 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18514 mask |= unordered_p << 1;
18515 mask |= stack_top_dies;
18517 gcc_assert (mask < 16);
18526 ix86_output_addr_vec_elt (FILE *file, int value)
18528 const char *directive = ASM_LONG;
18532 directive = ASM_QUAD;
18534 gcc_assert (!TARGET_64BIT);
18537 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18541 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18543 const char *directive = ASM_LONG;
18546 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18547 directive = ASM_QUAD;
18549 gcc_assert (!TARGET_64BIT);
18551 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18552 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18553 fprintf (file, "%s%s%d-%s%d\n",
18554 directive, LPREFIX, value, LPREFIX, rel);
18555 else if (HAVE_AS_GOTOFF_IN_DATA)
18556 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18558 else if (TARGET_MACHO)
18560 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18561 machopic_output_function_base_name (file);
18566 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18567 GOT_SYMBOL_NAME, LPREFIX, value);
18570 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18574 ix86_expand_clear (rtx dest)
18578 /* We play register width games, which are only valid after reload. */
18579 gcc_assert (reload_completed);
18581 /* Avoid HImode and its attendant prefix byte. */
18582 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18583 dest = gen_rtx_REG (SImode, REGNO (dest));
18584 tmp = gen_rtx_SET (dest, const0_rtx);
18586 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18588 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18589 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18595 /* X is an unchanging MEM. If it is a constant pool reference, return
18596 the constant pool rtx, else NULL. */
18599 maybe_get_pool_constant (rtx x)
18601 x = ix86_delegitimize_address (XEXP (x, 0));
18603 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18604 return get_pool_constant (x);
18610 ix86_expand_move (machine_mode mode, rtx operands[])
18613 enum tls_model model;
18618 if (GET_CODE (op1) == SYMBOL_REF)
18622 model = SYMBOL_REF_TLS_MODEL (op1);
18625 op1 = legitimize_tls_address (op1, model, true);
18626 op1 = force_operand (op1, op0);
18629 op1 = convert_to_mode (mode, op1, 1);
18631 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18634 else if (GET_CODE (op1) == CONST
18635 && GET_CODE (XEXP (op1, 0)) == PLUS
18636 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18638 rtx addend = XEXP (XEXP (op1, 0), 1);
18639 rtx symbol = XEXP (XEXP (op1, 0), 0);
18642 model = SYMBOL_REF_TLS_MODEL (symbol);
18644 tmp = legitimize_tls_address (symbol, model, true);
18646 tmp = legitimize_pe_coff_symbol (symbol, true);
18650 tmp = force_operand (tmp, NULL);
18651 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18652 op0, 1, OPTAB_DIRECT);
18655 op1 = convert_to_mode (mode, tmp, 1);
18659 if ((flag_pic || MACHOPIC_INDIRECT)
18660 && symbolic_operand (op1, mode))
18662 if (TARGET_MACHO && !TARGET_64BIT)
18665 /* dynamic-no-pic */
18666 if (MACHOPIC_INDIRECT)
18668 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18669 ? op0 : gen_reg_rtx (Pmode);
18670 op1 = machopic_indirect_data_reference (op1, temp);
18672 op1 = machopic_legitimize_pic_address (op1, mode,
18673 temp == op1 ? 0 : temp);
18675 if (op0 != op1 && GET_CODE (op0) != MEM)
18677 rtx insn = gen_rtx_SET (op0, op1);
18681 if (GET_CODE (op0) == MEM)
18682 op1 = force_reg (Pmode, op1);
18686 if (GET_CODE (temp) != REG)
18687 temp = gen_reg_rtx (Pmode);
18688 temp = legitimize_pic_address (op1, temp);
18693 /* dynamic-no-pic */
18699 op1 = force_reg (mode, op1);
18700 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18702 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18703 op1 = legitimize_pic_address (op1, reg);
18706 op1 = convert_to_mode (mode, op1, 1);
18713 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18714 || !push_operand (op0, mode))
18716 op1 = force_reg (mode, op1);
18718 if (push_operand (op0, mode)
18719 && ! general_no_elim_operand (op1, mode))
18720 op1 = copy_to_mode_reg (mode, op1);
18722 /* Force large constants in 64bit compilation into register
18723 to get them CSEed. */
18724 if (can_create_pseudo_p ()
18725 && (mode == DImode) && TARGET_64BIT
18726 && immediate_operand (op1, mode)
18727 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18728 && !register_operand (op0, mode)
18730 op1 = copy_to_mode_reg (mode, op1);
18732 if (can_create_pseudo_p ()
18733 && CONST_DOUBLE_P (op1))
18735 /* If we are loading a floating point constant to a register,
18736 force the value to memory now, since we'll get better code
18737 out the back end. */
18739 op1 = validize_mem (force_const_mem (mode, op1));
18740 if (!register_operand (op0, mode))
18742 rtx temp = gen_reg_rtx (mode);
18743 emit_insn (gen_rtx_SET (temp, op1));
18744 emit_move_insn (op0, temp);
18750 emit_insn (gen_rtx_SET (op0, op1));
18754 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18756 rtx op0 = operands[0], op1 = operands[1];
18757 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18758 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18759 unsigned int align = (TARGET_IAMCU
18760 ? GET_MODE_BITSIZE (mode)
18761 : GET_MODE_ALIGNMENT (mode));
18763 if (push_operand (op0, VOIDmode))
18764 op0 = emit_move_resolve_push (mode, op0);
18766 /* Force constants other than zero into memory. We do not know how
18767 the instructions used to build constants modify the upper 64 bits
18768 of the register, once we have that information we may be able
18769 to handle some of them more efficiently. */
18770 if (can_create_pseudo_p ()
18771 && register_operand (op0, mode)
18772 && (CONSTANT_P (op1)
18774 && CONSTANT_P (SUBREG_REG (op1))))
18775 && !standard_sse_constant_p (op1))
18776 op1 = validize_mem (force_const_mem (mode, op1));
18778 /* We need to check memory alignment for SSE mode since attribute
18779 can make operands unaligned. */
18780 if (can_create_pseudo_p ()
18781 && SSE_REG_MODE_P (mode)
18782 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18783 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18787 /* ix86_expand_vector_move_misalign() does not like constants ... */
18788 if (CONSTANT_P (op1)
18790 && CONSTANT_P (SUBREG_REG (op1))))
18791 op1 = validize_mem (force_const_mem (mode, op1));
18793 /* ... nor both arguments in memory. */
18794 if (!register_operand (op0, mode)
18795 && !register_operand (op1, mode))
18796 op1 = force_reg (mode, op1);
18798 tmp[0] = op0; tmp[1] = op1;
18799 ix86_expand_vector_move_misalign (mode, tmp);
18803 /* Make operand1 a register if it isn't already. */
18804 if (can_create_pseudo_p ()
18805 && !register_operand (op0, mode)
18806 && !register_operand (op1, mode))
18808 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18812 emit_insn (gen_rtx_SET (op0, op1));
18815 /* Split 32-byte AVX unaligned load and store if needed. */
18818 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18821 rtx (*extract) (rtx, rtx, rtx);
18822 rtx (*load_unaligned) (rtx, rtx);
18823 rtx (*store_unaligned) (rtx, rtx);
18826 switch (GET_MODE (op0))
18829 gcc_unreachable ();
18831 extract = gen_avx_vextractf128v32qi;
18832 load_unaligned = gen_avx_loaddquv32qi;
18833 store_unaligned = gen_avx_storedquv32qi;
18837 extract = gen_avx_vextractf128v8sf;
18838 load_unaligned = gen_avx_loadups256;
18839 store_unaligned = gen_avx_storeups256;
18843 extract = gen_avx_vextractf128v4df;
18844 load_unaligned = gen_avx_loadupd256;
18845 store_unaligned = gen_avx_storeupd256;
18852 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18853 && optimize_insn_for_speed_p ())
18855 rtx r = gen_reg_rtx (mode);
18856 m = adjust_address (op1, mode, 0);
18857 emit_move_insn (r, m);
18858 m = adjust_address (op1, mode, 16);
18859 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18860 emit_move_insn (op0, r);
18862 /* Normal *mov<mode>_internal pattern will handle
18863 unaligned loads just fine if misaligned_operand
18864 is true, and without the UNSPEC it can be combined
18865 with arithmetic instructions. */
18866 else if (misaligned_operand (op1, GET_MODE (op1)))
18867 emit_insn (gen_rtx_SET (op0, op1));
18869 emit_insn (load_unaligned (op0, op1));
18871 else if (MEM_P (op0))
18873 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18874 && optimize_insn_for_speed_p ())
18876 m = adjust_address (op0, mode, 0);
18877 emit_insn (extract (m, op1, const0_rtx));
18878 m = adjust_address (op0, mode, 16);
18879 emit_insn (extract (m, op1, const1_rtx));
18882 emit_insn (store_unaligned (op0, op1));
18885 gcc_unreachable ();
18888 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18889 straight to ix86_expand_vector_move. */
18890 /* Code generation for scalar reg-reg moves of single and double precision data:
18891 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18895 if (x86_sse_partial_reg_dependency == true)
18900 Code generation for scalar loads of double precision data:
18901 if (x86_sse_split_regs == true)
18902 movlpd mem, reg (gas syntax)
18906 Code generation for unaligned packed loads of single precision data
18907 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18908 if (x86_sse_unaligned_move_optimal)
18911 if (x86_sse_partial_reg_dependency == true)
18923 Code generation for unaligned packed loads of double precision data
18924 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18925 if (x86_sse_unaligned_move_optimal)
18928 if (x86_sse_split_regs == true)
18941 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18943 rtx op0, op1, orig_op0 = NULL_RTX, m;
18944 rtx (*load_unaligned) (rtx, rtx);
18945 rtx (*store_unaligned) (rtx, rtx);
18950 if (GET_MODE_SIZE (mode) == 64)
18952 switch (GET_MODE_CLASS (mode))
18954 case MODE_VECTOR_INT:
18956 if (GET_MODE (op0) != V16SImode)
18961 op0 = gen_reg_rtx (V16SImode);
18964 op0 = gen_lowpart (V16SImode, op0);
18966 op1 = gen_lowpart (V16SImode, op1);
18969 case MODE_VECTOR_FLOAT:
18970 switch (GET_MODE (op0))
18973 gcc_unreachable ();
18975 load_unaligned = gen_avx512f_loaddquv16si;
18976 store_unaligned = gen_avx512f_storedquv16si;
18979 load_unaligned = gen_avx512f_loadups512;
18980 store_unaligned = gen_avx512f_storeups512;
18983 load_unaligned = gen_avx512f_loadupd512;
18984 store_unaligned = gen_avx512f_storeupd512;
18989 emit_insn (load_unaligned (op0, op1));
18990 else if (MEM_P (op0))
18991 emit_insn (store_unaligned (op0, op1));
18993 gcc_unreachable ();
18995 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18999 gcc_unreachable ();
19006 && GET_MODE_SIZE (mode) == 32)
19008 switch (GET_MODE_CLASS (mode))
19010 case MODE_VECTOR_INT:
19012 if (GET_MODE (op0) != V32QImode)
19017 op0 = gen_reg_rtx (V32QImode);
19020 op0 = gen_lowpart (V32QImode, op0);
19022 op1 = gen_lowpart (V32QImode, op1);
19025 case MODE_VECTOR_FLOAT:
19026 ix86_avx256_split_vector_move_misalign (op0, op1);
19028 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19032 gcc_unreachable ();
19040 /* Normal *mov<mode>_internal pattern will handle
19041 unaligned loads just fine if misaligned_operand
19042 is true, and without the UNSPEC it can be combined
19043 with arithmetic instructions. */
19045 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
19046 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19047 && misaligned_operand (op1, GET_MODE (op1)))
19048 emit_insn (gen_rtx_SET (op0, op1));
19049 /* ??? If we have typed data, then it would appear that using
19050 movdqu is the only way to get unaligned data loaded with
19052 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19054 if (GET_MODE (op0) != V16QImode)
19057 op0 = gen_reg_rtx (V16QImode);
19059 op1 = gen_lowpart (V16QImode, op1);
19060 /* We will eventually emit movups based on insn attributes. */
19061 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
19063 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19065 else if (TARGET_SSE2 && mode == V2DFmode)
19070 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19071 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19072 || optimize_insn_for_size_p ())
19074 /* We will eventually emit movups based on insn attributes. */
19075 emit_insn (gen_sse2_loadupd (op0, op1));
19079 /* When SSE registers are split into halves, we can avoid
19080 writing to the top half twice. */
19081 if (TARGET_SSE_SPLIT_REGS)
19083 emit_clobber (op0);
19088 /* ??? Not sure about the best option for the Intel chips.
19089 The following would seem to satisfy; the register is
19090 entirely cleared, breaking the dependency chain. We
19091 then store to the upper half, with a dependency depth
19092 of one. A rumor has it that Intel recommends two movsd
19093 followed by an unpacklpd, but this is unconfirmed. And
19094 given that the dependency depth of the unpacklpd would
19095 still be one, I'm not sure why this would be better. */
19096 zero = CONST0_RTX (V2DFmode);
19099 m = adjust_address (op1, DFmode, 0);
19100 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19101 m = adjust_address (op1, DFmode, 8);
19102 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19109 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19110 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19111 || optimize_insn_for_size_p ())
19113 if (GET_MODE (op0) != V4SFmode)
19116 op0 = gen_reg_rtx (V4SFmode);
19118 op1 = gen_lowpart (V4SFmode, op1);
19119 emit_insn (gen_sse_loadups (op0, op1));
19121 emit_move_insn (orig_op0,
19122 gen_lowpart (GET_MODE (orig_op0), op0));
19126 if (mode != V4SFmode)
19127 t = gen_reg_rtx (V4SFmode);
19131 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19132 emit_move_insn (t, CONST0_RTX (V4SFmode));
19136 m = adjust_address (op1, V2SFmode, 0);
19137 emit_insn (gen_sse_loadlps (t, t, m));
19138 m = adjust_address (op1, V2SFmode, 8);
19139 emit_insn (gen_sse_loadhps (t, t, m));
19140 if (mode != V4SFmode)
19141 emit_move_insn (op0, gen_lowpart (mode, t));
19144 else if (MEM_P (op0))
19146 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19148 op0 = gen_lowpart (V16QImode, op0);
19149 op1 = gen_lowpart (V16QImode, op1);
19150 /* We will eventually emit movups based on insn attributes. */
19151 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19153 else if (TARGET_SSE2 && mode == V2DFmode)
19156 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19157 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19158 || optimize_insn_for_size_p ())
19159 /* We will eventually emit movups based on insn attributes. */
19160 emit_insn (gen_sse2_storeupd (op0, op1));
19163 m = adjust_address (op0, DFmode, 0);
19164 emit_insn (gen_sse2_storelpd (m, op1));
19165 m = adjust_address (op0, DFmode, 8);
19166 emit_insn (gen_sse2_storehpd (m, op1));
19171 if (mode != V4SFmode)
19172 op1 = gen_lowpart (V4SFmode, op1);
19175 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19176 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19177 || optimize_insn_for_size_p ())
19179 op0 = gen_lowpart (V4SFmode, op0);
19180 emit_insn (gen_sse_storeups (op0, op1));
19184 m = adjust_address (op0, V2SFmode, 0);
19185 emit_insn (gen_sse_storelps (m, op1));
19186 m = adjust_address (op0, V2SFmode, 8);
19187 emit_insn (gen_sse_storehps (m, op1));
19192 gcc_unreachable ();
19195 /* Helper function of ix86_fixup_binary_operands to canonicalize
19196 operand order. Returns true if the operands should be swapped. */
19199 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19202 rtx dst = operands[0];
19203 rtx src1 = operands[1];
19204 rtx src2 = operands[2];
19206 /* If the operation is not commutative, we can't do anything. */
19207 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19210 /* Highest priority is that src1 should match dst. */
19211 if (rtx_equal_p (dst, src1))
19213 if (rtx_equal_p (dst, src2))
19216 /* Next highest priority is that immediate constants come second. */
19217 if (immediate_operand (src2, mode))
19219 if (immediate_operand (src1, mode))
19222 /* Lowest priority is that memory references should come second. */
19232 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19233 destination to use for the operation. If different from the true
19234 destination in operands[0], a copy operation will be required. */
19237 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19240 rtx dst = operands[0];
19241 rtx src1 = operands[1];
19242 rtx src2 = operands[2];
19244 /* Canonicalize operand order. */
19245 if (ix86_swap_binary_operands_p (code, mode, operands))
19247 /* It is invalid to swap operands of different modes. */
19248 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19250 std::swap (src1, src2);
19253 /* Both source operands cannot be in memory. */
19254 if (MEM_P (src1) && MEM_P (src2))
19256 /* Optimization: Only read from memory once. */
19257 if (rtx_equal_p (src1, src2))
19259 src2 = force_reg (mode, src2);
19262 else if (rtx_equal_p (dst, src1))
19263 src2 = force_reg (mode, src2);
19265 src1 = force_reg (mode, src1);
19268 /* If the destination is memory, and we do not have matching source
19269 operands, do things in registers. */
19270 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19271 dst = gen_reg_rtx (mode);
19273 /* Source 1 cannot be a constant. */
19274 if (CONSTANT_P (src1))
19275 src1 = force_reg (mode, src1);
19277 /* Source 1 cannot be a non-matching memory. */
19278 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19279 src1 = force_reg (mode, src1);
19281 /* Improve address combine. */
19283 && GET_MODE_CLASS (mode) == MODE_INT
19285 src2 = force_reg (mode, src2);
19287 operands[1] = src1;
19288 operands[2] = src2;
19292 /* Similarly, but assume that the destination has already been
19293 set up properly. */
19296 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19297 machine_mode mode, rtx operands[])
19299 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19300 gcc_assert (dst == operands[0]);
19303 /* Attempt to expand a binary operator. Make the expansion closer to the
19304 actual machine, then just general_operand, which will allow 3 separate
19305 memory references (one output, two input) in a single insn. */
19308 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19311 rtx src1, src2, dst, op, clob;
19313 dst = ix86_fixup_binary_operands (code, mode, operands);
19314 src1 = operands[1];
19315 src2 = operands[2];
19317 /* Emit the instruction. */
19319 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19321 if (reload_completed
19323 && !rtx_equal_p (dst, src1))
19325 /* This is going to be an LEA; avoid splitting it later. */
19330 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19331 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19334 /* Fix up the destination if needed. */
19335 if (dst != operands[0])
19336 emit_move_insn (operands[0], dst);
19339 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19340 the given OPERANDS. */
19343 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19346 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19347 if (SUBREG_P (operands[1]))
19352 else if (SUBREG_P (operands[2]))
19357 /* Optimize (__m128i) d | (__m128i) e and similar code
19358 when d and e are float vectors into float vector logical
19359 insn. In C/C++ without using intrinsics there is no other way
19360 to express vector logical operation on float vectors than
19361 to cast them temporarily to integer vectors. */
19363 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19364 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19365 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19366 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19367 && SUBREG_BYTE (op1) == 0
19368 && (GET_CODE (op2) == CONST_VECTOR
19369 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19370 && SUBREG_BYTE (op2) == 0))
19371 && can_create_pseudo_p ())
19374 switch (GET_MODE (SUBREG_REG (op1)))
19382 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19383 if (GET_CODE (op2) == CONST_VECTOR)
19385 op2 = gen_lowpart (GET_MODE (dst), op2);
19386 op2 = force_reg (GET_MODE (dst), op2);
19391 op2 = SUBREG_REG (operands[2]);
19392 if (!vector_operand (op2, GET_MODE (dst)))
19393 op2 = force_reg (GET_MODE (dst), op2);
19395 op1 = SUBREG_REG (op1);
19396 if (!vector_operand (op1, GET_MODE (dst)))
19397 op1 = force_reg (GET_MODE (dst), op1);
19398 emit_insn (gen_rtx_SET (dst,
19399 gen_rtx_fmt_ee (code, GET_MODE (dst),
19401 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19407 if (!vector_operand (operands[1], mode))
19408 operands[1] = force_reg (mode, operands[1]);
19409 if (!vector_operand (operands[2], mode))
19410 operands[2] = force_reg (mode, operands[2]);
19411 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19412 emit_insn (gen_rtx_SET (operands[0],
19413 gen_rtx_fmt_ee (code, mode, operands[1],
19417 /* Return TRUE or FALSE depending on whether the binary operator meets the
19418 appropriate constraints. */
19421 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19424 rtx dst = operands[0];
19425 rtx src1 = operands[1];
19426 rtx src2 = operands[2];
19428 /* Both source operands cannot be in memory. */
19429 if (MEM_P (src1) && MEM_P (src2))
19432 /* Canonicalize operand order for commutative operators. */
19433 if (ix86_swap_binary_operands_p (code, mode, operands))
19434 std::swap (src1, src2);
19436 /* If the destination is memory, we must have a matching source operand. */
19437 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19440 /* Source 1 cannot be a constant. */
19441 if (CONSTANT_P (src1))
19444 /* Source 1 cannot be a non-matching memory. */
19445 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19446 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19447 return (code == AND
19450 || (TARGET_64BIT && mode == DImode))
19451 && satisfies_constraint_L (src2));
19456 /* Attempt to expand a unary operator. Make the expansion closer to the
19457 actual machine, then just general_operand, which will allow 2 separate
19458 memory references (one output, one input) in a single insn. */
19461 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19464 bool matching_memory = false;
19465 rtx src, dst, op, clob;
19470 /* If the destination is memory, and we do not have matching source
19471 operands, do things in registers. */
19474 if (rtx_equal_p (dst, src))
19475 matching_memory = true;
19477 dst = gen_reg_rtx (mode);
19480 /* When source operand is memory, destination must match. */
19481 if (MEM_P (src) && !matching_memory)
19482 src = force_reg (mode, src);
19484 /* Emit the instruction. */
19486 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19492 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19493 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19496 /* Fix up the destination if needed. */
19497 if (dst != operands[0])
19498 emit_move_insn (operands[0], dst);
19501 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19502 divisor are within the range [0-255]. */
19505 ix86_split_idivmod (machine_mode mode, rtx operands[],
19508 rtx_code_label *end_label, *qimode_label;
19509 rtx insn, div, mod;
19510 rtx scratch, tmp0, tmp1, tmp2;
19511 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19512 rtx (*gen_zero_extend) (rtx, rtx);
19513 rtx (*gen_test_ccno_1) (rtx, rtx);
19518 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19519 gen_test_ccno_1 = gen_testsi_ccno_1;
19520 gen_zero_extend = gen_zero_extendqisi2;
19523 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19524 gen_test_ccno_1 = gen_testdi_ccno_1;
19525 gen_zero_extend = gen_zero_extendqidi2;
19528 gcc_unreachable ();
19531 end_label = gen_label_rtx ();
19532 qimode_label = gen_label_rtx ();
19534 scratch = gen_reg_rtx (mode);
19536 /* Use 8bit unsigned divimod if dividend and divisor are within
19537 the range [0-255]. */
19538 emit_move_insn (scratch, operands[2]);
19539 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19540 scratch, 1, OPTAB_DIRECT);
19541 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19542 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19543 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19544 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19545 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19547 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19548 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19549 JUMP_LABEL (insn) = qimode_label;
19551 /* Generate original signed/unsigned divimod. */
19552 div = gen_divmod4_1 (operands[0], operands[1],
19553 operands[2], operands[3]);
19556 /* Branch to the end. */
19557 emit_jump_insn (gen_jump (end_label));
19560 /* Generate 8bit unsigned divide. */
19561 emit_label (qimode_label);
19562 /* Don't use operands[0] for result of 8bit divide since not all
19563 registers support QImode ZERO_EXTRACT. */
19564 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19565 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19566 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19567 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19571 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19572 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19576 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19577 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19580 /* Extract remainder from AH. */
19581 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19582 if (REG_P (operands[1]))
19583 insn = emit_move_insn (operands[1], tmp1);
19586 /* Need a new scratch register since the old one has result
19588 scratch = gen_reg_rtx (mode);
19589 emit_move_insn (scratch, tmp1);
19590 insn = emit_move_insn (operands[1], scratch);
19592 set_unique_reg_note (insn, REG_EQUAL, mod);
19594 /* Zero extend quotient from AL. */
19595 tmp1 = gen_lowpart (QImode, tmp0);
19596 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19597 set_unique_reg_note (insn, REG_EQUAL, div);
19599 emit_label (end_label);
19602 #define LEA_MAX_STALL (3)
19603 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19605 /* Increase given DISTANCE in half-cycles according to
19606 dependencies between PREV and NEXT instructions.
19607 Add 1 half-cycle if there is no dependency and
19608 go to next cycle if there is some dependecy. */
19610 static unsigned int
19611 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19615 if (!prev || !next)
19616 return distance + (distance & 1) + 2;
19618 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19619 return distance + 1;
19621 FOR_EACH_INSN_USE (use, next)
19622 FOR_EACH_INSN_DEF (def, prev)
19623 if (!DF_REF_IS_ARTIFICIAL (def)
19624 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19625 return distance + (distance & 1) + 2;
19627 return distance + 1;
19630 /* Function checks if instruction INSN defines register number
19631 REGNO1 or REGNO2. */
19634 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19639 FOR_EACH_INSN_DEF (def, insn)
19640 if (DF_REF_REG_DEF_P (def)
19641 && !DF_REF_IS_ARTIFICIAL (def)
19642 && (regno1 == DF_REF_REGNO (def)
19643 || regno2 == DF_REF_REGNO (def)))
19649 /* Function checks if instruction INSN uses register number
19650 REGNO as a part of address expression. */
19653 insn_uses_reg_mem (unsigned int regno, rtx insn)
19657 FOR_EACH_INSN_USE (use, insn)
19658 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19664 /* Search backward for non-agu definition of register number REGNO1
19665 or register number REGNO2 in basic block starting from instruction
19666 START up to head of basic block or instruction INSN.
19668 Function puts true value into *FOUND var if definition was found
19669 and false otherwise.
19671 Distance in half-cycles between START and found instruction or head
19672 of BB is added to DISTANCE and returned. */
19675 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19676 rtx_insn *insn, int distance,
19677 rtx_insn *start, bool *found)
19679 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19680 rtx_insn *prev = start;
19681 rtx_insn *next = NULL;
19687 && distance < LEA_SEARCH_THRESHOLD)
19689 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19691 distance = increase_distance (prev, next, distance);
19692 if (insn_defines_reg (regno1, regno2, prev))
19694 if (recog_memoized (prev) < 0
19695 || get_attr_type (prev) != TYPE_LEA)
19704 if (prev == BB_HEAD (bb))
19707 prev = PREV_INSN (prev);
19713 /* Search backward for non-agu definition of register number REGNO1
19714 or register number REGNO2 in INSN's basic block until
19715 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19716 2. Reach neighbor BBs boundary, or
19717 3. Reach agu definition.
19718 Returns the distance between the non-agu definition point and INSN.
19719 If no definition point, returns -1. */
19722 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19725 basic_block bb = BLOCK_FOR_INSN (insn);
19727 bool found = false;
19729 if (insn != BB_HEAD (bb))
19730 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19731 distance, PREV_INSN (insn),
19734 if (!found && distance < LEA_SEARCH_THRESHOLD)
19738 bool simple_loop = false;
19740 FOR_EACH_EDGE (e, ei, bb->preds)
19743 simple_loop = true;
19748 distance = distance_non_agu_define_in_bb (regno1, regno2,
19750 BB_END (bb), &found);
19753 int shortest_dist = -1;
19754 bool found_in_bb = false;
19756 FOR_EACH_EDGE (e, ei, bb->preds)
19759 = distance_non_agu_define_in_bb (regno1, regno2,
19765 if (shortest_dist < 0)
19766 shortest_dist = bb_dist;
19767 else if (bb_dist > 0)
19768 shortest_dist = MIN (bb_dist, shortest_dist);
19774 distance = shortest_dist;
19778 /* get_attr_type may modify recog data. We want to make sure
19779 that recog data is valid for instruction INSN, on which
19780 distance_non_agu_define is called. INSN is unchanged here. */
19781 extract_insn_cached (insn);
19786 return distance >> 1;
19789 /* Return the distance in half-cycles between INSN and the next
19790 insn that uses register number REGNO in memory address added
19791 to DISTANCE. Return -1 if REGNO0 is set.
19793 Put true value into *FOUND if register usage was found and
19795 Put true value into *REDEFINED if register redefinition was
19796 found and false otherwise. */
19799 distance_agu_use_in_bb (unsigned int regno,
19800 rtx_insn *insn, int distance, rtx_insn *start,
19801 bool *found, bool *redefined)
19803 basic_block bb = NULL;
19804 rtx_insn *next = start;
19805 rtx_insn *prev = NULL;
19808 *redefined = false;
19810 if (start != NULL_RTX)
19812 bb = BLOCK_FOR_INSN (start);
19813 if (start != BB_HEAD (bb))
19814 /* If insn and start belong to the same bb, set prev to insn,
19815 so the call to increase_distance will increase the distance
19816 between insns by 1. */
19822 && distance < LEA_SEARCH_THRESHOLD)
19824 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19826 distance = increase_distance(prev, next, distance);
19827 if (insn_uses_reg_mem (regno, next))
19829 /* Return DISTANCE if OP0 is used in memory
19830 address in NEXT. */
19835 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19837 /* Return -1 if OP0 is set in NEXT. */
19845 if (next == BB_END (bb))
19848 next = NEXT_INSN (next);
19854 /* Return the distance between INSN and the next insn that uses
19855 register number REGNO0 in memory address. Return -1 if no such
19856 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19859 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19861 basic_block bb = BLOCK_FOR_INSN (insn);
19863 bool found = false;
19864 bool redefined = false;
19866 if (insn != BB_END (bb))
19867 distance = distance_agu_use_in_bb (regno0, insn, distance,
19869 &found, &redefined);
19871 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19875 bool simple_loop = false;
19877 FOR_EACH_EDGE (e, ei, bb->succs)
19880 simple_loop = true;
19885 distance = distance_agu_use_in_bb (regno0, insn,
19886 distance, BB_HEAD (bb),
19887 &found, &redefined);
19890 int shortest_dist = -1;
19891 bool found_in_bb = false;
19892 bool redefined_in_bb = false;
19894 FOR_EACH_EDGE (e, ei, bb->succs)
19897 = distance_agu_use_in_bb (regno0, insn,
19898 distance, BB_HEAD (e->dest),
19899 &found_in_bb, &redefined_in_bb);
19902 if (shortest_dist < 0)
19903 shortest_dist = bb_dist;
19904 else if (bb_dist > 0)
19905 shortest_dist = MIN (bb_dist, shortest_dist);
19911 distance = shortest_dist;
19915 if (!found || redefined)
19918 return distance >> 1;
19921 /* Define this macro to tune LEA priority vs ADD, it take effect when
19922 there is a dilemma of choicing LEA or ADD
19923 Negative value: ADD is more preferred than LEA
19925 Positive value: LEA is more preferred than ADD*/
19926 #define IX86_LEA_PRIORITY 0
19928 /* Return true if usage of lea INSN has performance advantage
19929 over a sequence of instructions. Instructions sequence has
19930 SPLIT_COST cycles higher latency than lea latency. */
19933 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19934 unsigned int regno2, int split_cost, bool has_scale)
19936 int dist_define, dist_use;
19938 /* For Silvermont if using a 2-source or 3-source LEA for
19939 non-destructive destination purposes, or due to wanting
19940 ability to use SCALE, the use of LEA is justified. */
19941 if (TARGET_SILVERMONT || TARGET_INTEL)
19945 if (split_cost < 1)
19947 if (regno0 == regno1 || regno0 == regno2)
19952 dist_define = distance_non_agu_define (regno1, regno2, insn);
19953 dist_use = distance_agu_use (regno0, insn);
19955 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19957 /* If there is no non AGU operand definition, no AGU
19958 operand usage and split cost is 0 then both lea
19959 and non lea variants have same priority. Currently
19960 we prefer lea for 64 bit code and non lea on 32 bit
19962 if (dist_use < 0 && split_cost == 0)
19963 return TARGET_64BIT || IX86_LEA_PRIORITY;
19968 /* With longer definitions distance lea is more preferable.
19969 Here we change it to take into account splitting cost and
19971 dist_define += split_cost + IX86_LEA_PRIORITY;
19973 /* If there is no use in memory addess then we just check
19974 that split cost exceeds AGU stall. */
19976 return dist_define > LEA_MAX_STALL;
19978 /* If this insn has both backward non-agu dependence and forward
19979 agu dependence, the one with short distance takes effect. */
19980 return dist_define >= dist_use;
19983 /* Return true if it is legal to clobber flags by INSN and
19984 false otherwise. */
19987 ix86_ok_to_clobber_flags (rtx_insn *insn)
19989 basic_block bb = BLOCK_FOR_INSN (insn);
19995 if (NONDEBUG_INSN_P (insn))
19997 FOR_EACH_INSN_USE (use, insn)
19998 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20001 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20005 if (insn == BB_END (bb))
20008 insn = NEXT_INSN (insn);
20011 live = df_get_live_out(bb);
20012 return !REGNO_REG_SET_P (live, FLAGS_REG);
20015 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20016 move and add to avoid AGU stalls. */
20019 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20021 unsigned int regno0, regno1, regno2;
20023 /* Check if we need to optimize. */
20024 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20027 /* Check it is correct to split here. */
20028 if (!ix86_ok_to_clobber_flags(insn))
20031 regno0 = true_regnum (operands[0]);
20032 regno1 = true_regnum (operands[1]);
20033 regno2 = true_regnum (operands[2]);
20035 /* We need to split only adds with non destructive
20036 destination operand. */
20037 if (regno0 == regno1 || regno0 == regno2)
20040 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20043 /* Return true if we should emit lea instruction instead of mov
20047 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20049 unsigned int regno0, regno1;
20051 /* Check if we need to optimize. */
20052 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20055 /* Use lea for reg to reg moves only. */
20056 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20059 regno0 = true_regnum (operands[0]);
20060 regno1 = true_regnum (operands[1]);
20062 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20065 /* Return true if we need to split lea into a sequence of
20066 instructions to avoid AGU stalls. */
20069 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20071 unsigned int regno0, regno1, regno2;
20073 struct ix86_address parts;
20076 /* Check we need to optimize. */
20077 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20080 /* The "at least two components" test below might not catch simple
20081 move or zero extension insns if parts.base is non-NULL and parts.disp
20082 is const0_rtx as the only components in the address, e.g. if the
20083 register is %rbp or %r13. As this test is much cheaper and moves or
20084 zero extensions are the common case, do this check first. */
20085 if (REG_P (operands[1])
20086 || (SImode_address_operand (operands[1], VOIDmode)
20087 && REG_P (XEXP (operands[1], 0))))
20090 /* Check if it is OK to split here. */
20091 if (!ix86_ok_to_clobber_flags (insn))
20094 ok = ix86_decompose_address (operands[1], &parts);
20097 /* There should be at least two components in the address. */
20098 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20099 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20102 /* We should not split into add if non legitimate pic
20103 operand is used as displacement. */
20104 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20107 regno0 = true_regnum (operands[0]) ;
20108 regno1 = INVALID_REGNUM;
20109 regno2 = INVALID_REGNUM;
20112 regno1 = true_regnum (parts.base);
20114 regno2 = true_regnum (parts.index);
20118 /* Compute how many cycles we will add to execution time
20119 if split lea into a sequence of instructions. */
20120 if (parts.base || parts.index)
20122 /* Have to use mov instruction if non desctructive
20123 destination form is used. */
20124 if (regno1 != regno0 && regno2 != regno0)
20127 /* Have to add index to base if both exist. */
20128 if (parts.base && parts.index)
20131 /* Have to use shift and adds if scale is 2 or greater. */
20132 if (parts.scale > 1)
20134 if (regno0 != regno1)
20136 else if (regno2 == regno0)
20139 split_cost += parts.scale;
20142 /* Have to use add instruction with immediate if
20143 disp is non zero. */
20144 if (parts.disp && parts.disp != const0_rtx)
20147 /* Subtract the price of lea. */
20151 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20155 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20156 matches destination. RTX includes clobber of FLAGS_REG. */
20159 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20164 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20165 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20167 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20170 /* Return true if regno1 def is nearest to the insn. */
20173 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20175 rtx_insn *prev = insn;
20176 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20180 while (prev && prev != start)
20182 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20184 prev = PREV_INSN (prev);
20187 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20189 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20191 prev = PREV_INSN (prev);
20194 /* None of the regs is defined in the bb. */
20198 /* Split lea instructions into a sequence of instructions
20199 which are executed on ALU to avoid AGU stalls.
20200 It is assumed that it is allowed to clobber flags register
20201 at lea position. */
20204 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20206 unsigned int regno0, regno1, regno2;
20207 struct ix86_address parts;
20211 ok = ix86_decompose_address (operands[1], &parts);
20214 target = gen_lowpart (mode, operands[0]);
20216 regno0 = true_regnum (target);
20217 regno1 = INVALID_REGNUM;
20218 regno2 = INVALID_REGNUM;
20222 parts.base = gen_lowpart (mode, parts.base);
20223 regno1 = true_regnum (parts.base);
20228 parts.index = gen_lowpart (mode, parts.index);
20229 regno2 = true_regnum (parts.index);
20233 parts.disp = gen_lowpart (mode, parts.disp);
20235 if (parts.scale > 1)
20237 /* Case r1 = r1 + ... */
20238 if (regno1 == regno0)
20240 /* If we have a case r1 = r1 + C * r2 then we
20241 should use multiplication which is very
20242 expensive. Assume cost model is wrong if we
20243 have such case here. */
20244 gcc_assert (regno2 != regno0);
20246 for (adds = parts.scale; adds > 0; adds--)
20247 ix86_emit_binop (PLUS, mode, target, parts.index);
20251 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20252 if (regno0 != regno2)
20253 emit_insn (gen_rtx_SET (target, parts.index));
20255 /* Use shift for scaling. */
20256 ix86_emit_binop (ASHIFT, mode, target,
20257 GEN_INT (exact_log2 (parts.scale)));
20260 ix86_emit_binop (PLUS, mode, target, parts.base);
20262 if (parts.disp && parts.disp != const0_rtx)
20263 ix86_emit_binop (PLUS, mode, target, parts.disp);
20266 else if (!parts.base && !parts.index)
20268 gcc_assert(parts.disp);
20269 emit_insn (gen_rtx_SET (target, parts.disp));
20275 if (regno0 != regno2)
20276 emit_insn (gen_rtx_SET (target, parts.index));
20278 else if (!parts.index)
20280 if (regno0 != regno1)
20281 emit_insn (gen_rtx_SET (target, parts.base));
20285 if (regno0 == regno1)
20287 else if (regno0 == regno2)
20293 /* Find better operand for SET instruction, depending
20294 on which definition is farther from the insn. */
20295 if (find_nearest_reg_def (insn, regno1, regno2))
20296 tmp = parts.index, tmp1 = parts.base;
20298 tmp = parts.base, tmp1 = parts.index;
20300 emit_insn (gen_rtx_SET (target, tmp));
20302 if (parts.disp && parts.disp != const0_rtx)
20303 ix86_emit_binop (PLUS, mode, target, parts.disp);
20305 ix86_emit_binop (PLUS, mode, target, tmp1);
20309 ix86_emit_binop (PLUS, mode, target, tmp);
20312 if (parts.disp && parts.disp != const0_rtx)
20313 ix86_emit_binop (PLUS, mode, target, parts.disp);
20317 /* Return true if it is ok to optimize an ADD operation to LEA
20318 operation to avoid flag register consumation. For most processors,
20319 ADD is faster than LEA. For the processors like BONNELL, if the
20320 destination register of LEA holds an actual address which will be
20321 used soon, LEA is better and otherwise ADD is better. */
20324 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20326 unsigned int regno0 = true_regnum (operands[0]);
20327 unsigned int regno1 = true_regnum (operands[1]);
20328 unsigned int regno2 = true_regnum (operands[2]);
20330 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20331 if (regno0 != regno1 && regno0 != regno2)
20334 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20337 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20340 /* Return true if destination reg of SET_BODY is shift count of
20344 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20350 /* Retrieve destination of SET_BODY. */
20351 switch (GET_CODE (set_body))
20354 set_dest = SET_DEST (set_body);
20355 if (!set_dest || !REG_P (set_dest))
20359 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20360 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20368 /* Retrieve shift count of USE_BODY. */
20369 switch (GET_CODE (use_body))
20372 shift_rtx = XEXP (use_body, 1);
20375 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20376 if (ix86_dep_by_shift_count_body (set_body,
20377 XVECEXP (use_body, 0, i)))
20385 && (GET_CODE (shift_rtx) == ASHIFT
20386 || GET_CODE (shift_rtx) == LSHIFTRT
20387 || GET_CODE (shift_rtx) == ASHIFTRT
20388 || GET_CODE (shift_rtx) == ROTATE
20389 || GET_CODE (shift_rtx) == ROTATERT))
20391 rtx shift_count = XEXP (shift_rtx, 1);
20393 /* Return true if shift count is dest of SET_BODY. */
20394 if (REG_P (shift_count))
20396 /* Add check since it can be invoked before register
20397 allocation in pre-reload schedule. */
20398 if (reload_completed
20399 && true_regnum (set_dest) == true_regnum (shift_count))
20401 else if (REGNO(set_dest) == REGNO(shift_count))
20409 /* Return true if destination reg of SET_INSN is shift count of
20413 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20415 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20416 PATTERN (use_insn));
20419 /* Return TRUE or FALSE depending on whether the unary operator meets the
20420 appropriate constraints. */
20423 ix86_unary_operator_ok (enum rtx_code,
20427 /* If one of operands is memory, source and destination must match. */
20428 if ((MEM_P (operands[0])
20429 || MEM_P (operands[1]))
20430 && ! rtx_equal_p (operands[0], operands[1]))
20435 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20436 are ok, keeping in mind the possible movddup alternative. */
20439 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20441 if (MEM_P (operands[0]))
20442 return rtx_equal_p (operands[0], operands[1 + high]);
20443 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20444 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20448 /* Post-reload splitter for converting an SF or DFmode value in an
20449 SSE register into an unsigned SImode. */
20452 ix86_split_convert_uns_si_sse (rtx operands[])
20454 machine_mode vecmode;
20455 rtx value, large, zero_or_two31, input, two31, x;
20457 large = operands[1];
20458 zero_or_two31 = operands[2];
20459 input = operands[3];
20460 two31 = operands[4];
20461 vecmode = GET_MODE (large);
20462 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20464 /* Load up the value into the low element. We must ensure that the other
20465 elements are valid floats -- zero is the easiest such value. */
20468 if (vecmode == V4SFmode)
20469 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20471 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20475 input = gen_rtx_REG (vecmode, REGNO (input));
20476 emit_move_insn (value, CONST0_RTX (vecmode));
20477 if (vecmode == V4SFmode)
20478 emit_insn (gen_sse_movss (value, value, input));
20480 emit_insn (gen_sse2_movsd (value, value, input));
20483 emit_move_insn (large, two31);
20484 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20486 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20487 emit_insn (gen_rtx_SET (large, x));
20489 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20490 emit_insn (gen_rtx_SET (zero_or_two31, x));
20492 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20493 emit_insn (gen_rtx_SET (value, x));
20495 large = gen_rtx_REG (V4SImode, REGNO (large));
20496 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20498 x = gen_rtx_REG (V4SImode, REGNO (value));
20499 if (vecmode == V4SFmode)
20500 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20502 emit_insn (gen_sse2_cvttpd2dq (x, value));
20505 emit_insn (gen_xorv4si3 (value, value, large));
20508 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20509 Expects the 64-bit DImode to be supplied in a pair of integral
20510 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20511 -mfpmath=sse, !optimize_size only. */
20514 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20516 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20517 rtx int_xmm, fp_xmm;
20518 rtx biases, exponents;
20521 int_xmm = gen_reg_rtx (V4SImode);
20522 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20523 emit_insn (gen_movdi_to_sse (int_xmm, input));
20524 else if (TARGET_SSE_SPLIT_REGS)
20526 emit_clobber (int_xmm);
20527 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20531 x = gen_reg_rtx (V2DImode);
20532 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20533 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20536 x = gen_rtx_CONST_VECTOR (V4SImode,
20537 gen_rtvec (4, GEN_INT (0x43300000UL),
20538 GEN_INT (0x45300000UL),
20539 const0_rtx, const0_rtx));
20540 exponents = validize_mem (force_const_mem (V4SImode, x));
20542 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20543 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20545 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20546 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20547 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20548 (0x1.0p84 + double(fp_value_hi_xmm)).
20549 Note these exponents differ by 32. */
20551 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20553 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20554 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20555 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20556 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20557 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20558 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20559 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20560 biases = validize_mem (force_const_mem (V2DFmode, biases));
20561 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20563 /* Add the upper and lower DFmode values together. */
20565 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20568 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20569 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20570 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20573 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20576 /* Not used, but eases macroization of patterns. */
20578 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20580 gcc_unreachable ();
20583 /* Convert an unsigned SImode value into a DFmode. Only currently used
20584 for SSE, but applicable anywhere. */
20587 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20589 REAL_VALUE_TYPE TWO31r;
20592 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20593 NULL, 1, OPTAB_DIRECT);
20595 fp = gen_reg_rtx (DFmode);
20596 emit_insn (gen_floatsidf2 (fp, x));
20598 real_ldexp (&TWO31r, &dconst1, 31);
20599 x = const_double_from_real_value (TWO31r, DFmode);
20601 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20603 emit_move_insn (target, x);
20606 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20607 32-bit mode; otherwise we have a direct convert instruction. */
20610 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20612 REAL_VALUE_TYPE TWO32r;
20613 rtx fp_lo, fp_hi, x;
20615 fp_lo = gen_reg_rtx (DFmode);
20616 fp_hi = gen_reg_rtx (DFmode);
20618 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20620 real_ldexp (&TWO32r, &dconst1, 32);
20621 x = const_double_from_real_value (TWO32r, DFmode);
20622 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20624 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20626 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20629 emit_move_insn (target, x);
20632 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20633 For x86_32, -mfpmath=sse, !optimize_size only. */
20635 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20637 REAL_VALUE_TYPE ONE16r;
20638 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20640 real_ldexp (&ONE16r, &dconst1, 16);
20641 x = const_double_from_real_value (ONE16r, SFmode);
20642 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20643 NULL, 0, OPTAB_DIRECT);
20644 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20645 NULL, 0, OPTAB_DIRECT);
20646 fp_hi = gen_reg_rtx (SFmode);
20647 fp_lo = gen_reg_rtx (SFmode);
20648 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20649 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20650 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20652 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20654 if (!rtx_equal_p (target, fp_hi))
20655 emit_move_insn (target, fp_hi);
20658 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20659 a vector of unsigned ints VAL to vector of floats TARGET. */
20662 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20665 REAL_VALUE_TYPE TWO16r;
20666 machine_mode intmode = GET_MODE (val);
20667 machine_mode fltmode = GET_MODE (target);
20668 rtx (*cvt) (rtx, rtx);
20670 if (intmode == V4SImode)
20671 cvt = gen_floatv4siv4sf2;
20673 cvt = gen_floatv8siv8sf2;
20674 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20675 tmp[0] = force_reg (intmode, tmp[0]);
20676 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20678 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20679 NULL_RTX, 1, OPTAB_DIRECT);
20680 tmp[3] = gen_reg_rtx (fltmode);
20681 emit_insn (cvt (tmp[3], tmp[1]));
20682 tmp[4] = gen_reg_rtx (fltmode);
20683 emit_insn (cvt (tmp[4], tmp[2]));
20684 real_ldexp (&TWO16r, &dconst1, 16);
20685 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20686 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20687 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20689 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20691 if (tmp[7] != target)
20692 emit_move_insn (target, tmp[7]);
20695 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20696 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20697 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20698 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20701 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20703 REAL_VALUE_TYPE TWO31r;
20704 rtx two31r, tmp[4];
20705 machine_mode mode = GET_MODE (val);
20706 machine_mode scalarmode = GET_MODE_INNER (mode);
20707 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20708 rtx (*cmp) (rtx, rtx, rtx, rtx);
20711 for (i = 0; i < 3; i++)
20712 tmp[i] = gen_reg_rtx (mode);
20713 real_ldexp (&TWO31r, &dconst1, 31);
20714 two31r = const_double_from_real_value (TWO31r, scalarmode);
20715 two31r = ix86_build_const_vector (mode, 1, two31r);
20716 two31r = force_reg (mode, two31r);
20719 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20720 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20721 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20722 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20723 default: gcc_unreachable ();
20725 tmp[3] = gen_rtx_LE (mode, two31r, val);
20726 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20727 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20729 if (intmode == V4SImode || TARGET_AVX2)
20730 *xorp = expand_simple_binop (intmode, ASHIFT,
20731 gen_lowpart (intmode, tmp[0]),
20732 GEN_INT (31), NULL_RTX, 0,
20736 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20737 two31 = ix86_build_const_vector (intmode, 1, two31);
20738 *xorp = expand_simple_binop (intmode, AND,
20739 gen_lowpart (intmode, tmp[0]),
20740 two31, NULL_RTX, 0,
20743 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20747 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20748 then replicate the value for all elements of the vector
20752 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20756 machine_mode scalar_mode;
20779 n_elt = GET_MODE_NUNITS (mode);
20780 v = rtvec_alloc (n_elt);
20781 scalar_mode = GET_MODE_INNER (mode);
20783 RTVEC_ELT (v, 0) = value;
20785 for (i = 1; i < n_elt; ++i)
20786 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20788 return gen_rtx_CONST_VECTOR (mode, v);
20791 gcc_unreachable ();
20795 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20796 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20797 for an SSE register. If VECT is true, then replicate the mask for
20798 all elements of the vector register. If INVERT is true, then create
20799 a mask excluding the sign bit. */
20802 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20804 machine_mode vec_mode, imode;
20832 vec_mode = VOIDmode;
20837 gcc_unreachable ();
20840 machine_mode inner_mode = GET_MODE_INNER (mode);
20841 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20842 GET_MODE_BITSIZE (inner_mode));
20844 w = wi::bit_not (w);
20846 /* Force this value into the low part of a fp vector constant. */
20847 mask = immed_wide_int_const (w, imode);
20848 mask = gen_lowpart (inner_mode, mask);
20850 if (vec_mode == VOIDmode)
20851 return force_reg (inner_mode, mask);
20853 v = ix86_build_const_vector (vec_mode, vect, mask);
20854 return force_reg (vec_mode, v);
20857 /* Generate code for floating point ABS or NEG. */
20860 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20863 rtx mask, set, dst, src;
20864 bool use_sse = false;
20865 bool vector_mode = VECTOR_MODE_P (mode);
20866 machine_mode vmode = mode;
20870 else if (mode == TFmode)
20872 else if (TARGET_SSE_MATH)
20874 use_sse = SSE_FLOAT_MODE_P (mode);
20875 if (mode == SFmode)
20877 else if (mode == DFmode)
20881 /* NEG and ABS performed with SSE use bitwise mask operations.
20882 Create the appropriate mask now. */
20884 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20891 set = gen_rtx_fmt_e (code, mode, src);
20892 set = gen_rtx_SET (dst, set);
20899 use = gen_rtx_USE (VOIDmode, mask);
20901 par = gen_rtvec (2, set, use);
20904 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20905 par = gen_rtvec (3, set, use, clob);
20907 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20913 /* Expand a copysign operation. Special case operand 0 being a constant. */
20916 ix86_expand_copysign (rtx operands[])
20918 machine_mode mode, vmode;
20919 rtx dest, op0, op1, mask, nmask;
20921 dest = operands[0];
20925 mode = GET_MODE (dest);
20927 if (mode == SFmode)
20929 else if (mode == DFmode)
20934 if (CONST_DOUBLE_P (op0))
20936 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20938 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20939 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20941 if (mode == SFmode || mode == DFmode)
20943 if (op0 == CONST0_RTX (mode))
20944 op0 = CONST0_RTX (vmode);
20947 rtx v = ix86_build_const_vector (vmode, false, op0);
20949 op0 = force_reg (vmode, v);
20952 else if (op0 != CONST0_RTX (mode))
20953 op0 = force_reg (mode, op0);
20955 mask = ix86_build_signbit_mask (vmode, 0, 0);
20957 if (mode == SFmode)
20958 copysign_insn = gen_copysignsf3_const;
20959 else if (mode == DFmode)
20960 copysign_insn = gen_copysigndf3_const;
20962 copysign_insn = gen_copysigntf3_const;
20964 emit_insn (copysign_insn (dest, op0, op1, mask));
20968 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20970 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20971 mask = ix86_build_signbit_mask (vmode, 0, 0);
20973 if (mode == SFmode)
20974 copysign_insn = gen_copysignsf3_var;
20975 else if (mode == DFmode)
20976 copysign_insn = gen_copysigndf3_var;
20978 copysign_insn = gen_copysigntf3_var;
20980 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
20984 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
20985 be a constant, and so has already been expanded into a vector constant. */
20988 ix86_split_copysign_const (rtx operands[])
20990 machine_mode mode, vmode;
20991 rtx dest, op0, mask, x;
20993 dest = operands[0];
20995 mask = operands[3];
20997 mode = GET_MODE (dest);
20998 vmode = GET_MODE (mask);
21000 dest = simplify_gen_subreg (vmode, dest, mode, 0);
21001 x = gen_rtx_AND (vmode, dest, mask);
21002 emit_insn (gen_rtx_SET (dest, x));
21004 if (op0 != CONST0_RTX (vmode))
21006 x = gen_rtx_IOR (vmode, dest, op0);
21007 emit_insn (gen_rtx_SET (dest, x));
21011 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21012 so we have to do two masks. */
21015 ix86_split_copysign_var (rtx operands[])
21017 machine_mode mode, vmode;
21018 rtx dest, scratch, op0, op1, mask, nmask, x;
21020 dest = operands[0];
21021 scratch = operands[1];
21024 nmask = operands[4];
21025 mask = operands[5];
21027 mode = GET_MODE (dest);
21028 vmode = GET_MODE (mask);
21030 if (rtx_equal_p (op0, op1))
21032 /* Shouldn't happen often (it's useless, obviously), but when it does
21033 we'd generate incorrect code if we continue below. */
21034 emit_move_insn (dest, op0);
21038 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21040 gcc_assert (REGNO (op1) == REGNO (scratch));
21042 x = gen_rtx_AND (vmode, scratch, mask);
21043 emit_insn (gen_rtx_SET (scratch, x));
21046 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21047 x = gen_rtx_NOT (vmode, dest);
21048 x = gen_rtx_AND (vmode, x, op0);
21049 emit_insn (gen_rtx_SET (dest, x));
21053 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21055 x = gen_rtx_AND (vmode, scratch, mask);
21057 else /* alternative 2,4 */
21059 gcc_assert (REGNO (mask) == REGNO (scratch));
21060 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
21061 x = gen_rtx_AND (vmode, scratch, op1);
21063 emit_insn (gen_rtx_SET (scratch, x));
21065 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21067 dest = simplify_gen_subreg (vmode, op0, mode, 0);
21068 x = gen_rtx_AND (vmode, dest, nmask);
21070 else /* alternative 3,4 */
21072 gcc_assert (REGNO (nmask) == REGNO (dest));
21074 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21075 x = gen_rtx_AND (vmode, dest, op0);
21077 emit_insn (gen_rtx_SET (dest, x));
21080 x = gen_rtx_IOR (vmode, dest, scratch);
21081 emit_insn (gen_rtx_SET (dest, x));
21084 /* Return TRUE or FALSE depending on whether the first SET in INSN
21085 has source and destination with matching CC modes, and that the
21086 CC mode is at least as constrained as REQ_MODE. */
21089 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21092 machine_mode set_mode;
21094 set = PATTERN (insn);
21095 if (GET_CODE (set) == PARALLEL)
21096 set = XVECEXP (set, 0, 0);
21097 gcc_assert (GET_CODE (set) == SET);
21098 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21100 set_mode = GET_MODE (SET_DEST (set));
21104 if (req_mode != CCNOmode
21105 && (req_mode != CCmode
21106 || XEXP (SET_SRC (set), 1) != const0_rtx))
21110 if (req_mode == CCGCmode)
21114 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21118 if (req_mode == CCZmode)
21129 if (set_mode != req_mode)
21134 gcc_unreachable ();
21137 return GET_MODE (SET_SRC (set)) == set_mode;
21140 /* Generate insn patterns to do an integer compare of OPERANDS. */
21143 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21145 machine_mode cmpmode;
21148 cmpmode = SELECT_CC_MODE (code, op0, op1);
21149 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21151 /* This is very simple, but making the interface the same as in the
21152 FP case makes the rest of the code easier. */
21153 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21154 emit_insn (gen_rtx_SET (flags, tmp));
21156 /* Return the test that should be put into the flags user, i.e.
21157 the bcc, scc, or cmov instruction. */
21158 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21161 /* Figure out whether to use ordered or unordered fp comparisons.
21162 Return the appropriate mode to use. */
21165 ix86_fp_compare_mode (enum rtx_code)
21167 /* ??? In order to make all comparisons reversible, we do all comparisons
21168 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21169 all forms trapping and nontrapping comparisons, we can make inequality
21170 comparisons trapping again, since it results in better code when using
21171 FCOM based compares. */
21172 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21176 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21178 machine_mode mode = GET_MODE (op0);
21180 if (SCALAR_FLOAT_MODE_P (mode))
21182 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21183 return ix86_fp_compare_mode (code);
21188 /* Only zero flag is needed. */
21189 case EQ: /* ZF=0 */
21190 case NE: /* ZF!=0 */
21192 /* Codes needing carry flag. */
21193 case GEU: /* CF=0 */
21194 case LTU: /* CF=1 */
21195 /* Detect overflow checks. They need just the carry flag. */
21196 if (GET_CODE (op0) == PLUS
21197 && (rtx_equal_p (op1, XEXP (op0, 0))
21198 || rtx_equal_p (op1, XEXP (op0, 1))))
21202 case GTU: /* CF=0 & ZF=0 */
21203 case LEU: /* CF=1 | ZF=1 */
21205 /* Codes possibly doable only with sign flag when
21206 comparing against zero. */
21207 case GE: /* SF=OF or SF=0 */
21208 case LT: /* SF<>OF or SF=1 */
21209 if (op1 == const0_rtx)
21212 /* For other cases Carry flag is not required. */
21214 /* Codes doable only with sign flag when comparing
21215 against zero, but we miss jump instruction for it
21216 so we need to use relational tests against overflow
21217 that thus needs to be zero. */
21218 case GT: /* ZF=0 & SF=OF */
21219 case LE: /* ZF=1 | SF<>OF */
21220 if (op1 == const0_rtx)
21224 /* strcmp pattern do (use flags) and combine may ask us for proper
21229 gcc_unreachable ();
21233 /* Return the fixed registers used for condition codes. */
21236 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21243 /* If two condition code modes are compatible, return a condition code
21244 mode which is compatible with both. Otherwise, return
21247 static machine_mode
21248 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21253 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21256 if ((m1 == CCGCmode && m2 == CCGOCmode)
21257 || (m1 == CCGOCmode && m2 == CCGCmode))
21260 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21262 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21268 gcc_unreachable ();
21300 /* These are only compatible with themselves, which we already
21307 /* Return a comparison we can do and that it is equivalent to
21308 swap_condition (code) apart possibly from orderedness.
21309 But, never change orderedness if TARGET_IEEE_FP, returning
21310 UNKNOWN in that case if necessary. */
21312 static enum rtx_code
21313 ix86_fp_swap_condition (enum rtx_code code)
21317 case GT: /* GTU - CF=0 & ZF=0 */
21318 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21319 case GE: /* GEU - CF=0 */
21320 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21321 case UNLT: /* LTU - CF=1 */
21322 return TARGET_IEEE_FP ? UNKNOWN : GT;
21323 case UNLE: /* LEU - CF=1 | ZF=1 */
21324 return TARGET_IEEE_FP ? UNKNOWN : GE;
21326 return swap_condition (code);
21330 /* Return cost of comparison CODE using the best strategy for performance.
21331 All following functions do use number of instructions as a cost metrics.
21332 In future this should be tweaked to compute bytes for optimize_size and
21333 take into account performance of various instructions on various CPUs. */
21336 ix86_fp_comparison_cost (enum rtx_code code)
21340 /* The cost of code using bit-twiddling on %ah. */
21357 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21361 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21364 gcc_unreachable ();
21367 switch (ix86_fp_comparison_strategy (code))
21369 case IX86_FPCMP_COMI:
21370 return arith_cost > 4 ? 3 : 2;
21371 case IX86_FPCMP_SAHF:
21372 return arith_cost > 4 ? 4 : 3;
21378 /* Return strategy to use for floating-point. We assume that fcomi is always
21379 preferrable where available, since that is also true when looking at size
21380 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21382 enum ix86_fpcmp_strategy
21383 ix86_fp_comparison_strategy (enum rtx_code)
21385 /* Do fcomi/sahf based test when profitable. */
21388 return IX86_FPCMP_COMI;
21390 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21391 return IX86_FPCMP_SAHF;
21393 return IX86_FPCMP_ARITH;
21396 /* Swap, force into registers, or otherwise massage the two operands
21397 to a fp comparison. The operands are updated in place; the new
21398 comparison code is returned. */
21400 static enum rtx_code
21401 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21403 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21404 rtx op0 = *pop0, op1 = *pop1;
21405 machine_mode op_mode = GET_MODE (op0);
21406 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21408 /* All of the unordered compare instructions only work on registers.
21409 The same is true of the fcomi compare instructions. The XFmode
21410 compare instructions require registers except when comparing
21411 against zero or when converting operand 1 from fixed point to
21415 && (fpcmp_mode == CCFPUmode
21416 || (op_mode == XFmode
21417 && ! (standard_80387_constant_p (op0) == 1
21418 || standard_80387_constant_p (op1) == 1)
21419 && GET_CODE (op1) != FLOAT)
21420 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21422 op0 = force_reg (op_mode, op0);
21423 op1 = force_reg (op_mode, op1);
21427 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21428 things around if they appear profitable, otherwise force op0
21429 into a register. */
21431 if (standard_80387_constant_p (op0) == 0
21433 && ! (standard_80387_constant_p (op1) == 0
21436 enum rtx_code new_code = ix86_fp_swap_condition (code);
21437 if (new_code != UNKNOWN)
21439 std::swap (op0, op1);
21445 op0 = force_reg (op_mode, op0);
21447 if (CONSTANT_P (op1))
21449 int tmp = standard_80387_constant_p (op1);
21451 op1 = validize_mem (force_const_mem (op_mode, op1));
21455 op1 = force_reg (op_mode, op1);
21458 op1 = force_reg (op_mode, op1);
21462 /* Try to rearrange the comparison to make it cheaper. */
21463 if (ix86_fp_comparison_cost (code)
21464 > ix86_fp_comparison_cost (swap_condition (code))
21465 && (REG_P (op1) || can_create_pseudo_p ()))
21467 std::swap (op0, op1);
21468 code = swap_condition (code);
21470 op0 = force_reg (op_mode, op0);
21478 /* Convert comparison codes we use to represent FP comparison to integer
21479 code that will result in proper branch. Return UNKNOWN if no such code
21483 ix86_fp_compare_code_to_integer (enum rtx_code code)
21512 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21515 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21517 machine_mode fpcmp_mode, intcmp_mode;
21520 fpcmp_mode = ix86_fp_compare_mode (code);
21521 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21523 /* Do fcomi/sahf based test when profitable. */
21524 switch (ix86_fp_comparison_strategy (code))
21526 case IX86_FPCMP_COMI:
21527 intcmp_mode = fpcmp_mode;
21528 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21529 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21533 case IX86_FPCMP_SAHF:
21534 intcmp_mode = fpcmp_mode;
21535 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21536 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21539 scratch = gen_reg_rtx (HImode);
21540 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21541 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21544 case IX86_FPCMP_ARITH:
21545 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21546 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21547 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21549 scratch = gen_reg_rtx (HImode);
21550 emit_insn (gen_rtx_SET (scratch, tmp2));
21552 /* In the unordered case, we have to check C2 for NaN's, which
21553 doesn't happen to work out to anything nice combination-wise.
21554 So do some bit twiddling on the value we've got in AH to come
21555 up with an appropriate set of condition codes. */
21557 intcmp_mode = CCNOmode;
21562 if (code == GT || !TARGET_IEEE_FP)
21564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21569 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21570 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21571 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21572 intcmp_mode = CCmode;
21578 if (code == LT && TARGET_IEEE_FP)
21580 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21581 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21582 intcmp_mode = CCmode;
21587 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21593 if (code == GE || !TARGET_IEEE_FP)
21595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21600 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21601 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21607 if (code == LE && TARGET_IEEE_FP)
21609 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21610 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21611 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21612 intcmp_mode = CCmode;
21617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21623 if (code == EQ && TARGET_IEEE_FP)
21625 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21626 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21627 intcmp_mode = CCmode;
21632 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21638 if (code == NE && TARGET_IEEE_FP)
21640 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21641 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21647 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21653 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21657 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21662 gcc_unreachable ();
21670 /* Return the test that should be put into the flags user, i.e.
21671 the bcc, scc, or cmov instruction. */
21672 return gen_rtx_fmt_ee (code, VOIDmode,
21673 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21678 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21682 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21683 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21685 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21687 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21688 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21691 ret = ix86_expand_int_compare (code, op0, op1);
21697 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21699 machine_mode mode = GET_MODE (op0);
21702 /* Handle special case - vector comparsion with boolean result, transform
21703 it using ptest instruction. */
21704 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21706 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
21707 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
21709 gcc_assert (code == EQ || code == NE);
21710 /* Generate XOR since we can't check that one operand is zero vector. */
21711 tmp = gen_reg_rtx (mode);
21712 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
21713 tmp = gen_lowpart (p_mode, tmp);
21714 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
21715 gen_rtx_UNSPEC (CCmode,
21716 gen_rtvec (2, tmp, tmp),
21718 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
21719 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21720 gen_rtx_LABEL_REF (VOIDmode, label),
21722 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21735 tmp = ix86_expand_compare (code, op0, op1);
21736 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21737 gen_rtx_LABEL_REF (VOIDmode, label),
21739 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21745 /* For 32-bit target DI comparison may be performed on
21746 SSE registers. To allow this we should avoid split
21747 to SI mode which is achieved by doing xor in DI mode
21748 and then comparing with zero (which is recognized by
21749 STV pass). We don't compare using xor when optimizing
21751 if (!optimize_insn_for_size_p ()
21753 && (code == EQ || code == NE))
21755 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
21759 /* Expand DImode branch into multiple compare+branch. */
21762 rtx_code_label *label2;
21763 enum rtx_code code1, code2, code3;
21764 machine_mode submode;
21766 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21768 std::swap (op0, op1);
21769 code = swap_condition (code);
21772 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21773 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21775 submode = mode == DImode ? SImode : DImode;
21777 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21778 avoid two branches. This costs one extra insn, so disable when
21779 optimizing for size. */
21781 if ((code == EQ || code == NE)
21782 && (!optimize_insn_for_size_p ()
21783 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21788 if (hi[1] != const0_rtx)
21789 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21790 NULL_RTX, 0, OPTAB_WIDEN);
21793 if (lo[1] != const0_rtx)
21794 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21795 NULL_RTX, 0, OPTAB_WIDEN);
21797 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21798 NULL_RTX, 0, OPTAB_WIDEN);
21800 ix86_expand_branch (code, tmp, const0_rtx, label);
21804 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21805 op1 is a constant and the low word is zero, then we can just
21806 examine the high word. Similarly for low word -1 and
21807 less-or-equal-than or greater-than. */
21809 if (CONST_INT_P (hi[1]))
21812 case LT: case LTU: case GE: case GEU:
21813 if (lo[1] == const0_rtx)
21815 ix86_expand_branch (code, hi[0], hi[1], label);
21819 case LE: case LEU: case GT: case GTU:
21820 if (lo[1] == constm1_rtx)
21822 ix86_expand_branch (code, hi[0], hi[1], label);
21830 /* Otherwise, we need two or three jumps. */
21832 label2 = gen_label_rtx ();
21835 code2 = swap_condition (code);
21836 code3 = unsigned_condition (code);
21840 case LT: case GT: case LTU: case GTU:
21843 case LE: code1 = LT; code2 = GT; break;
21844 case GE: code1 = GT; code2 = LT; break;
21845 case LEU: code1 = LTU; code2 = GTU; break;
21846 case GEU: code1 = GTU; code2 = LTU; break;
21848 case EQ: code1 = UNKNOWN; code2 = NE; break;
21849 case NE: code2 = UNKNOWN; break;
21852 gcc_unreachable ();
21857 * if (hi(a) < hi(b)) goto true;
21858 * if (hi(a) > hi(b)) goto false;
21859 * if (lo(a) < lo(b)) goto true;
21863 if (code1 != UNKNOWN)
21864 ix86_expand_branch (code1, hi[0], hi[1], label);
21865 if (code2 != UNKNOWN)
21866 ix86_expand_branch (code2, hi[0], hi[1], label2);
21868 ix86_expand_branch (code3, lo[0], lo[1], label);
21870 if (code2 != UNKNOWN)
21871 emit_label (label2);
21876 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21881 /* Split branch based on floating point condition. */
21883 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21884 rtx target1, rtx target2, rtx tmp)
21889 if (target2 != pc_rtx)
21891 std::swap (target1, target2);
21892 code = reverse_condition_maybe_unordered (code);
21895 condition = ix86_expand_fp_compare (code, op1, op2,
21898 i = emit_jump_insn (gen_rtx_SET
21900 gen_rtx_IF_THEN_ELSE (VOIDmode,
21901 condition, target1, target2)));
21902 if (split_branch_probability >= 0)
21903 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21907 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21911 gcc_assert (GET_MODE (dest) == QImode);
21913 ret = ix86_expand_compare (code, op0, op1);
21914 PUT_MODE (ret, QImode);
21915 emit_insn (gen_rtx_SET (dest, ret));
21918 /* Expand comparison setting or clearing carry flag. Return true when
21919 successful and set pop for the operation. */
21921 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21923 machine_mode mode =
21924 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21926 /* Do not handle double-mode compares that go through special path. */
21927 if (mode == (TARGET_64BIT ? TImode : DImode))
21930 if (SCALAR_FLOAT_MODE_P (mode))
21933 rtx_insn *compare_seq;
21935 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21937 /* Shortcut: following common codes never translate
21938 into carry flag compares. */
21939 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21940 || code == ORDERED || code == UNORDERED)
21943 /* These comparisons require zero flag; swap operands so they won't. */
21944 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21945 && !TARGET_IEEE_FP)
21947 std::swap (op0, op1);
21948 code = swap_condition (code);
21951 /* Try to expand the comparison and verify that we end up with
21952 carry flag based comparison. This fails to be true only when
21953 we decide to expand comparison using arithmetic that is not
21954 too common scenario. */
21956 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21957 compare_seq = get_insns ();
21960 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21961 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21962 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21964 code = GET_CODE (compare_op);
21966 if (code != LTU && code != GEU)
21969 emit_insn (compare_seq);
21974 if (!INTEGRAL_MODE_P (mode))
21983 /* Convert a==0 into (unsigned)a<1. */
21986 if (op1 != const0_rtx)
21989 code = (code == EQ ? LTU : GEU);
21992 /* Convert a>b into b<a or a>=b-1. */
21995 if (CONST_INT_P (op1))
21997 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
21998 /* Bail out on overflow. We still can swap operands but that
21999 would force loading of the constant into register. */
22000 if (op1 == const0_rtx
22001 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22003 code = (code == GTU ? GEU : LTU);
22007 std::swap (op0, op1);
22008 code = (code == GTU ? LTU : GEU);
22012 /* Convert a>=0 into (unsigned)a<0x80000000. */
22015 if (mode == DImode || op1 != const0_rtx)
22017 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22018 code = (code == LT ? GEU : LTU);
22022 if (mode == DImode || op1 != constm1_rtx)
22024 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22025 code = (code == LE ? GEU : LTU);
22031 /* Swapping operands may cause constant to appear as first operand. */
22032 if (!nonimmediate_operand (op0, VOIDmode))
22034 if (!can_create_pseudo_p ())
22036 op0 = force_reg (mode, op0);
22038 *pop = ix86_expand_compare (code, op0, op1);
22039 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22044 ix86_expand_int_movcc (rtx operands[])
22046 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22047 rtx_insn *compare_seq;
22049 machine_mode mode = GET_MODE (operands[0]);
22050 bool sign_bit_compare_p = false;
22051 rtx op0 = XEXP (operands[1], 0);
22052 rtx op1 = XEXP (operands[1], 1);
22054 if (GET_MODE (op0) == TImode
22055 || (GET_MODE (op0) == DImode
22060 compare_op = ix86_expand_compare (code, op0, op1);
22061 compare_seq = get_insns ();
22064 compare_code = GET_CODE (compare_op);
22066 if ((op1 == const0_rtx && (code == GE || code == LT))
22067 || (op1 == constm1_rtx && (code == GT || code == LE)))
22068 sign_bit_compare_p = true;
22070 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22071 HImode insns, we'd be swallowed in word prefix ops. */
22073 if ((mode != HImode || TARGET_FAST_PREFIX)
22074 && (mode != (TARGET_64BIT ? TImode : DImode))
22075 && CONST_INT_P (operands[2])
22076 && CONST_INT_P (operands[3]))
22078 rtx out = operands[0];
22079 HOST_WIDE_INT ct = INTVAL (operands[2]);
22080 HOST_WIDE_INT cf = INTVAL (operands[3]);
22081 HOST_WIDE_INT diff;
22084 /* Sign bit compares are better done using shifts than we do by using
22086 if (sign_bit_compare_p
22087 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22089 /* Detect overlap between destination and compare sources. */
22092 if (!sign_bit_compare_p)
22095 bool fpcmp = false;
22097 compare_code = GET_CODE (compare_op);
22099 flags = XEXP (compare_op, 0);
22101 if (GET_MODE (flags) == CCFPmode
22102 || GET_MODE (flags) == CCFPUmode)
22106 = ix86_fp_compare_code_to_integer (compare_code);
22109 /* To simplify rest of code, restrict to the GEU case. */
22110 if (compare_code == LTU)
22112 std::swap (ct, cf);
22113 compare_code = reverse_condition (compare_code);
22114 code = reverse_condition (code);
22119 PUT_CODE (compare_op,
22120 reverse_condition_maybe_unordered
22121 (GET_CODE (compare_op)));
22123 PUT_CODE (compare_op,
22124 reverse_condition (GET_CODE (compare_op)));
22128 if (reg_overlap_mentioned_p (out, op0)
22129 || reg_overlap_mentioned_p (out, op1))
22130 tmp = gen_reg_rtx (mode);
22132 if (mode == DImode)
22133 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22135 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22136 flags, compare_op));
22140 if (code == GT || code == GE)
22141 code = reverse_condition (code);
22144 std::swap (ct, cf);
22147 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22160 tmp = expand_simple_binop (mode, PLUS,
22162 copy_rtx (tmp), 1, OPTAB_DIRECT);
22173 tmp = expand_simple_binop (mode, IOR,
22175 copy_rtx (tmp), 1, OPTAB_DIRECT);
22177 else if (diff == -1 && ct)
22187 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22189 tmp = expand_simple_binop (mode, PLUS,
22190 copy_rtx (tmp), GEN_INT (cf),
22191 copy_rtx (tmp), 1, OPTAB_DIRECT);
22199 * andl cf - ct, dest
22209 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22212 tmp = expand_simple_binop (mode, AND,
22214 gen_int_mode (cf - ct, mode),
22215 copy_rtx (tmp), 1, OPTAB_DIRECT);
22217 tmp = expand_simple_binop (mode, PLUS,
22218 copy_rtx (tmp), GEN_INT (ct),
22219 copy_rtx (tmp), 1, OPTAB_DIRECT);
22222 if (!rtx_equal_p (tmp, out))
22223 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22230 machine_mode cmp_mode = GET_MODE (op0);
22231 enum rtx_code new_code;
22233 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22235 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22237 /* We may be reversing unordered compare to normal compare, that
22238 is not valid in general (we may convert non-trapping condition
22239 to trapping one), however on i386 we currently emit all
22240 comparisons unordered. */
22241 new_code = reverse_condition_maybe_unordered (code);
22244 new_code = ix86_reverse_condition (code, cmp_mode);
22245 if (new_code != UNKNOWN)
22247 std::swap (ct, cf);
22253 compare_code = UNKNOWN;
22254 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22255 && CONST_INT_P (op1))
22257 if (op1 == const0_rtx
22258 && (code == LT || code == GE))
22259 compare_code = code;
22260 else if (op1 == constm1_rtx)
22264 else if (code == GT)
22269 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22270 if (compare_code != UNKNOWN
22271 && GET_MODE (op0) == GET_MODE (out)
22272 && (cf == -1 || ct == -1))
22274 /* If lea code below could be used, only optimize
22275 if it results in a 2 insn sequence. */
22277 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22278 || diff == 3 || diff == 5 || diff == 9)
22279 || (compare_code == LT && ct == -1)
22280 || (compare_code == GE && cf == -1))
22283 * notl op1 (if necessary)
22291 code = reverse_condition (code);
22294 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22296 out = expand_simple_binop (mode, IOR,
22298 out, 1, OPTAB_DIRECT);
22299 if (out != operands[0])
22300 emit_move_insn (operands[0], out);
22307 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22308 || diff == 3 || diff == 5 || diff == 9)
22309 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22311 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22317 * lea cf(dest*(ct-cf)),dest
22321 * This also catches the degenerate setcc-only case.
22327 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22330 /* On x86_64 the lea instruction operates on Pmode, so we need
22331 to get arithmetics done in proper mode to match. */
22333 tmp = copy_rtx (out);
22337 out1 = copy_rtx (out);
22338 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22342 tmp = gen_rtx_PLUS (mode, tmp, out1);
22348 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22351 if (!rtx_equal_p (tmp, out))
22354 out = force_operand (tmp, copy_rtx (out));
22356 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22358 if (!rtx_equal_p (out, operands[0]))
22359 emit_move_insn (operands[0], copy_rtx (out));
22365 * General case: Jumpful:
22366 * xorl dest,dest cmpl op1, op2
22367 * cmpl op1, op2 movl ct, dest
22368 * setcc dest jcc 1f
22369 * decl dest movl cf, dest
22370 * andl (cf-ct),dest 1:
22373 * Size 20. Size 14.
22375 * This is reasonably steep, but branch mispredict costs are
22376 * high on modern cpus, so consider failing only if optimizing
22380 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22381 && BRANCH_COST (optimize_insn_for_speed_p (),
22386 machine_mode cmp_mode = GET_MODE (op0);
22387 enum rtx_code new_code;
22389 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22391 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22393 /* We may be reversing unordered compare to normal compare,
22394 that is not valid in general (we may convert non-trapping
22395 condition to trapping one), however on i386 we currently
22396 emit all comparisons unordered. */
22397 new_code = reverse_condition_maybe_unordered (code);
22401 new_code = ix86_reverse_condition (code, cmp_mode);
22402 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22403 compare_code = reverse_condition (compare_code);
22406 if (new_code != UNKNOWN)
22414 if (compare_code != UNKNOWN)
22416 /* notl op1 (if needed)
22421 For x < 0 (resp. x <= -1) there will be no notl,
22422 so if possible swap the constants to get rid of the
22424 True/false will be -1/0 while code below (store flag
22425 followed by decrement) is 0/-1, so the constants need
22426 to be exchanged once more. */
22428 if (compare_code == GE || !cf)
22430 code = reverse_condition (code);
22434 std::swap (ct, cf);
22436 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22440 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22442 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22444 copy_rtx (out), 1, OPTAB_DIRECT);
22447 out = expand_simple_binop (mode, AND, copy_rtx (out),
22448 gen_int_mode (cf - ct, mode),
22449 copy_rtx (out), 1, OPTAB_DIRECT);
22451 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22452 copy_rtx (out), 1, OPTAB_DIRECT);
22453 if (!rtx_equal_p (out, operands[0]))
22454 emit_move_insn (operands[0], copy_rtx (out));
22460 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22462 /* Try a few things more with specific constants and a variable. */
22465 rtx var, orig_out, out, tmp;
22467 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22470 /* If one of the two operands is an interesting constant, load a
22471 constant with the above and mask it in with a logical operation. */
22473 if (CONST_INT_P (operands[2]))
22476 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22477 operands[3] = constm1_rtx, op = and_optab;
22478 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22479 operands[3] = const0_rtx, op = ior_optab;
22483 else if (CONST_INT_P (operands[3]))
22486 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22487 operands[2] = constm1_rtx, op = and_optab;
22488 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22489 operands[2] = const0_rtx, op = ior_optab;
22496 orig_out = operands[0];
22497 tmp = gen_reg_rtx (mode);
22500 /* Recurse to get the constant loaded. */
22501 if (!ix86_expand_int_movcc (operands))
22504 /* Mask in the interesting variable. */
22505 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22507 if (!rtx_equal_p (out, orig_out))
22508 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22514 * For comparison with above,
22524 if (! nonimmediate_operand (operands[2], mode))
22525 operands[2] = force_reg (mode, operands[2]);
22526 if (! nonimmediate_operand (operands[3], mode))
22527 operands[3] = force_reg (mode, operands[3]);
22529 if (! register_operand (operands[2], VOIDmode)
22531 || ! register_operand (operands[3], VOIDmode)))
22532 operands[2] = force_reg (mode, operands[2]);
22535 && ! register_operand (operands[3], VOIDmode))
22536 operands[3] = force_reg (mode, operands[3]);
22538 emit_insn (compare_seq);
22539 emit_insn (gen_rtx_SET (operands[0],
22540 gen_rtx_IF_THEN_ELSE (mode,
22541 compare_op, operands[2],
22546 /* Swap, force into registers, or otherwise massage the two operands
22547 to an sse comparison with a mask result. Thus we differ a bit from
22548 ix86_prepare_fp_compare_args which expects to produce a flags result.
22550 The DEST operand exists to help determine whether to commute commutative
22551 operators. The POP0/POP1 operands are updated in place. The new
22552 comparison code is returned, or UNKNOWN if not implementable. */
22554 static enum rtx_code
22555 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22556 rtx *pop0, rtx *pop1)
22562 /* AVX supports all the needed comparisons. */
22565 /* We have no LTGT as an operator. We could implement it with
22566 NE & ORDERED, but this requires an extra temporary. It's
22567 not clear that it's worth it. */
22574 /* These are supported directly. */
22581 /* AVX has 3 operand comparisons, no need to swap anything. */
22584 /* For commutative operators, try to canonicalize the destination
22585 operand to be first in the comparison - this helps reload to
22586 avoid extra moves. */
22587 if (!dest || !rtx_equal_p (dest, *pop1))
22595 /* These are not supported directly before AVX, and furthermore
22596 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22597 comparison operands to transform into something that is
22599 std::swap (*pop0, *pop1);
22600 code = swap_condition (code);
22604 gcc_unreachable ();
22610 /* Detect conditional moves that exactly match min/max operational
22611 semantics. Note that this is IEEE safe, as long as we don't
22612 interchange the operands.
22614 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22615 and TRUE if the operation is successful and instructions are emitted. */
22618 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22619 rtx cmp_op1, rtx if_true, rtx if_false)
22627 else if (code == UNGE)
22628 std::swap (if_true, if_false);
22632 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22634 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22639 mode = GET_MODE (dest);
22641 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22642 but MODE may be a vector mode and thus not appropriate. */
22643 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22645 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22648 if_true = force_reg (mode, if_true);
22649 v = gen_rtvec (2, if_true, if_false);
22650 tmp = gen_rtx_UNSPEC (mode, v, u);
22654 code = is_min ? SMIN : SMAX;
22655 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22658 emit_insn (gen_rtx_SET (dest, tmp));
22662 /* Expand an sse vector comparison. Return the register with the result. */
22665 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22666 rtx op_true, rtx op_false)
22668 machine_mode mode = GET_MODE (dest);
22669 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22671 /* In general case result of comparison can differ from operands' type. */
22672 machine_mode cmp_mode;
22674 /* In AVX512F the result of comparison is an integer mask. */
22675 bool maskcmp = false;
22678 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22680 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22681 gcc_assert (cmp_mode != BLKmode);
22686 cmp_mode = cmp_ops_mode;
22689 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22690 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22691 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22694 || (op_true && reg_overlap_mentioned_p (dest, op_true))
22695 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
22696 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22698 /* Compare patterns for int modes are unspec in AVX512F only. */
22699 if (maskcmp && (code == GT || code == EQ))
22701 rtx (*gen)(rtx, rtx, rtx);
22703 switch (cmp_ops_mode)
22706 gcc_assert (TARGET_AVX512BW);
22707 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22710 gcc_assert (TARGET_AVX512BW);
22711 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22714 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22717 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22725 emit_insn (gen (dest, cmp_op0, cmp_op1));
22729 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22731 if (cmp_mode != mode && !maskcmp)
22733 x = force_reg (cmp_ops_mode, x);
22734 convert_move (dest, x, false);
22737 emit_insn (gen_rtx_SET (dest, x));
22742 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22743 operations. This is used for both scalar and vector conditional moves. */
22746 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22748 machine_mode mode = GET_MODE (dest);
22749 machine_mode cmpmode = GET_MODE (cmp);
22751 /* In AVX512F the result of comparison is an integer mask. */
22752 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22756 /* If we have an integer mask and FP value then we need
22757 to cast mask to FP mode. */
22758 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
22760 cmp = force_reg (cmpmode, cmp);
22761 cmp = gen_rtx_SUBREG (mode, cmp, 0);
22764 if (vector_all_ones_operand (op_true, mode)
22765 && rtx_equal_p (op_false, CONST0_RTX (mode))
22768 emit_insn (gen_rtx_SET (dest, cmp));
22770 else if (op_false == CONST0_RTX (mode)
22773 op_true = force_reg (mode, op_true);
22774 x = gen_rtx_AND (mode, cmp, op_true);
22775 emit_insn (gen_rtx_SET (dest, x));
22777 else if (op_true == CONST0_RTX (mode)
22780 op_false = force_reg (mode, op_false);
22781 x = gen_rtx_NOT (mode, cmp);
22782 x = gen_rtx_AND (mode, x, op_false);
22783 emit_insn (gen_rtx_SET (dest, x));
22785 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22788 op_false = force_reg (mode, op_false);
22789 x = gen_rtx_IOR (mode, cmp, op_false);
22790 emit_insn (gen_rtx_SET (dest, x));
22792 else if (TARGET_XOP
22795 op_true = force_reg (mode, op_true);
22797 if (!nonimmediate_operand (op_false, mode))
22798 op_false = force_reg (mode, op_false);
22800 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22806 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22809 if (!nonimmediate_operand (op_true, mode))
22810 op_true = force_reg (mode, op_true);
22812 op_false = force_reg (mode, op_false);
22818 gen = gen_sse4_1_blendvps;
22822 gen = gen_sse4_1_blendvpd;
22830 gen = gen_sse4_1_pblendvb;
22831 if (mode != V16QImode)
22832 d = gen_reg_rtx (V16QImode);
22833 op_false = gen_lowpart (V16QImode, op_false);
22834 op_true = gen_lowpart (V16QImode, op_true);
22835 cmp = gen_lowpart (V16QImode, cmp);
22840 gen = gen_avx_blendvps256;
22844 gen = gen_avx_blendvpd256;
22852 gen = gen_avx2_pblendvb;
22853 if (mode != V32QImode)
22854 d = gen_reg_rtx (V32QImode);
22855 op_false = gen_lowpart (V32QImode, op_false);
22856 op_true = gen_lowpart (V32QImode, op_true);
22857 cmp = gen_lowpart (V32QImode, cmp);
22862 gen = gen_avx512bw_blendmv64qi;
22865 gen = gen_avx512bw_blendmv32hi;
22868 gen = gen_avx512f_blendmv16si;
22871 gen = gen_avx512f_blendmv8di;
22874 gen = gen_avx512f_blendmv8df;
22877 gen = gen_avx512f_blendmv16sf;
22886 emit_insn (gen (d, op_false, op_true, cmp));
22888 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22892 op_true = force_reg (mode, op_true);
22894 t2 = gen_reg_rtx (mode);
22896 t3 = gen_reg_rtx (mode);
22900 x = gen_rtx_AND (mode, op_true, cmp);
22901 emit_insn (gen_rtx_SET (t2, x));
22903 x = gen_rtx_NOT (mode, cmp);
22904 x = gen_rtx_AND (mode, x, op_false);
22905 emit_insn (gen_rtx_SET (t3, x));
22907 x = gen_rtx_IOR (mode, t3, t2);
22908 emit_insn (gen_rtx_SET (dest, x));
22913 /* Expand a floating-point conditional move. Return true if successful. */
22916 ix86_expand_fp_movcc (rtx operands[])
22918 machine_mode mode = GET_MODE (operands[0]);
22919 enum rtx_code code = GET_CODE (operands[1]);
22920 rtx tmp, compare_op;
22921 rtx op0 = XEXP (operands[1], 0);
22922 rtx op1 = XEXP (operands[1], 1);
22924 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22926 machine_mode cmode;
22928 /* Since we've no cmove for sse registers, don't force bad register
22929 allocation just to gain access to it. Deny movcc when the
22930 comparison mode doesn't match the move mode. */
22931 cmode = GET_MODE (op0);
22932 if (cmode == VOIDmode)
22933 cmode = GET_MODE (op1);
22937 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22938 if (code == UNKNOWN)
22941 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22942 operands[2], operands[3]))
22945 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22946 operands[2], operands[3]);
22947 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22951 if (GET_MODE (op0) == TImode
22952 || (GET_MODE (op0) == DImode
22956 /* The floating point conditional move instructions don't directly
22957 support conditions resulting from a signed integer comparison. */
22959 compare_op = ix86_expand_compare (code, op0, op1);
22960 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22962 tmp = gen_reg_rtx (QImode);
22963 ix86_expand_setcc (tmp, code, op0, op1);
22965 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22968 emit_insn (gen_rtx_SET (operands[0],
22969 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22970 operands[2], operands[3])));
22975 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
22978 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
22999 gcc_unreachable ();
23003 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23006 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23023 gcc_unreachable ();
23027 /* Return immediate value to be used in UNSPEC_PCMP
23028 for comparison CODE in MODE. */
23031 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23033 if (FLOAT_MODE_P (mode))
23034 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23035 return ix86_int_cmp_code_to_pcmp_immediate (code);
23038 /* Expand AVX-512 vector comparison. */
23041 ix86_expand_mask_vec_cmp (rtx operands[])
23043 machine_mode mask_mode = GET_MODE (operands[0]);
23044 machine_mode cmp_mode = GET_MODE (operands[2]);
23045 enum rtx_code code = GET_CODE (operands[1]);
23046 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23056 unspec_code = UNSPEC_UNSIGNED_PCMP;
23060 unspec_code = UNSPEC_PCMP;
23063 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23066 emit_insn (gen_rtx_SET (operands[0], unspec));
23071 /* Expand fp vector comparison. */
23074 ix86_expand_fp_vec_cmp (rtx operands[])
23076 enum rtx_code code = GET_CODE (operands[1]);
23079 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23080 &operands[2], &operands[3]);
23081 if (code == UNKNOWN)
23084 switch (GET_CODE (operands[1]))
23087 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23088 operands[3], NULL, NULL);
23089 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23090 operands[3], NULL, NULL);
23094 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23095 operands[3], NULL, NULL);
23096 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23097 operands[3], NULL, NULL);
23101 gcc_unreachable ();
23103 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23107 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23108 operands[1], operands[2]);
23110 if (operands[0] != cmp)
23111 emit_move_insn (operands[0], cmp);
23117 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23118 rtx op_true, rtx op_false, bool *negate)
23120 machine_mode data_mode = GET_MODE (dest);
23121 machine_mode mode = GET_MODE (cop0);
23126 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23128 && (mode == V16QImode || mode == V8HImode
23129 || mode == V4SImode || mode == V2DImode))
23133 /* Canonicalize the comparison to EQ, GT, GTU. */
23144 code = reverse_condition (code);
23150 code = reverse_condition (code);
23156 std::swap (cop0, cop1);
23157 code = swap_condition (code);
23161 gcc_unreachable ();
23164 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23165 if (mode == V2DImode)
23170 /* SSE4.1 supports EQ. */
23171 if (!TARGET_SSE4_1)
23177 /* SSE4.2 supports GT/GTU. */
23178 if (!TARGET_SSE4_2)
23183 gcc_unreachable ();
23187 /* Unsigned parallel compare is not supported by the hardware.
23188 Play some tricks to turn this into a signed comparison
23192 cop0 = force_reg (mode, cop0);
23204 rtx (*gen_sub3) (rtx, rtx, rtx);
23208 case V16SImode: gen_sub3 = gen_subv16si3; break;
23209 case V8DImode: gen_sub3 = gen_subv8di3; break;
23210 case V8SImode: gen_sub3 = gen_subv8si3; break;
23211 case V4DImode: gen_sub3 = gen_subv4di3; break;
23212 case V4SImode: gen_sub3 = gen_subv4si3; break;
23213 case V2DImode: gen_sub3 = gen_subv2di3; break;
23215 gcc_unreachable ();
23217 /* Subtract (-(INT MAX) - 1) from both operands to make
23219 mask = ix86_build_signbit_mask (mode, true, false);
23220 t1 = gen_reg_rtx (mode);
23221 emit_insn (gen_sub3 (t1, cop0, mask));
23223 t2 = gen_reg_rtx (mode);
23224 emit_insn (gen_sub3 (t2, cop1, mask));
23238 /* Perform a parallel unsigned saturating subtraction. */
23239 x = gen_reg_rtx (mode);
23240 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23244 cop1 = CONST0_RTX (mode);
23246 *negate = !*negate;
23250 gcc_unreachable ();
23256 std::swap (op_true, op_false);
23258 /* Allow the comparison to be done in one mode, but the movcc to
23259 happen in another mode. */
23260 if (data_mode == mode)
23262 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23263 op_true, op_false);
23267 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23268 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23269 op_true, op_false);
23270 if (GET_MODE (x) == mode)
23271 x = gen_lowpart (data_mode, x);
23277 /* Expand integer vector comparison. */
23280 ix86_expand_int_vec_cmp (rtx operands[])
23282 rtx_code code = GET_CODE (operands[1]);
23283 bool negate = false;
23284 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23285 operands[3], NULL, NULL, &negate);
23291 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23292 CONST0_RTX (GET_MODE (cmp)),
23293 NULL, NULL, &negate);
23295 gcc_assert (!negate);
23297 if (operands[0] != cmp)
23298 emit_move_insn (operands[0], cmp);
23303 /* Expand a floating-point vector conditional move; a vcond operation
23304 rather than a movcc operation. */
23307 ix86_expand_fp_vcond (rtx operands[])
23309 enum rtx_code code = GET_CODE (operands[3]);
23312 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23313 &operands[4], &operands[5]);
23314 if (code == UNKNOWN)
23317 switch (GET_CODE (operands[3]))
23320 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23321 operands[5], operands[0], operands[0]);
23322 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23323 operands[5], operands[1], operands[2]);
23327 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23328 operands[5], operands[0], operands[0]);
23329 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23330 operands[5], operands[1], operands[2]);
23334 gcc_unreachable ();
23336 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23338 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23342 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23343 operands[5], operands[1], operands[2]))
23346 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23347 operands[1], operands[2]);
23348 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23352 /* Expand a signed/unsigned integral vector conditional move. */
23355 ix86_expand_int_vcond (rtx operands[])
23357 machine_mode data_mode = GET_MODE (operands[0]);
23358 machine_mode mode = GET_MODE (operands[4]);
23359 enum rtx_code code = GET_CODE (operands[3]);
23360 bool negate = false;
23363 cop0 = operands[4];
23364 cop1 = operands[5];
23366 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23367 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23368 if ((code == LT || code == GE)
23369 && data_mode == mode
23370 && cop1 == CONST0_RTX (mode)
23371 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23372 && GET_MODE_UNIT_SIZE (data_mode) > 1
23373 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23374 && (GET_MODE_SIZE (data_mode) == 16
23375 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23377 rtx negop = operands[2 - (code == LT)];
23378 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23379 if (negop == CONST1_RTX (data_mode))
23381 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23382 operands[0], 1, OPTAB_DIRECT);
23383 if (res != operands[0])
23384 emit_move_insn (operands[0], res);
23387 else if (GET_MODE_INNER (data_mode) != DImode
23388 && vector_all_ones_operand (negop, data_mode))
23390 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23391 operands[0], 0, OPTAB_DIRECT);
23392 if (res != operands[0])
23393 emit_move_insn (operands[0], res);
23398 if (!nonimmediate_operand (cop1, mode))
23399 cop1 = force_reg (mode, cop1);
23400 if (!general_operand (operands[1], data_mode))
23401 operands[1] = force_reg (data_mode, operands[1]);
23402 if (!general_operand (operands[2], data_mode))
23403 operands[2] = force_reg (data_mode, operands[2]);
23405 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23406 operands[1], operands[2], &negate);
23411 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23412 operands[2-negate]);
23416 /* AVX512F does support 64-byte integer vector operations,
23417 thus the longest vector we are faced with is V64QImode. */
23418 #define MAX_VECT_LEN 64
23420 struct expand_vec_perm_d
23422 rtx target, op0, op1;
23423 unsigned char perm[MAX_VECT_LEN];
23424 machine_mode vmode;
23425 unsigned char nelt;
23426 bool one_operand_p;
23431 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23432 struct expand_vec_perm_d *d)
23434 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23435 expander, so args are either in d, or in op0, op1 etc. */
23436 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23437 machine_mode maskmode = mode;
23438 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23443 if (TARGET_AVX512VL && TARGET_AVX512BW)
23444 gen = gen_avx512vl_vpermi2varv8hi3;
23447 if (TARGET_AVX512VL && TARGET_AVX512BW)
23448 gen = gen_avx512vl_vpermi2varv16hi3;
23451 if (TARGET_AVX512VBMI)
23452 gen = gen_avx512bw_vpermi2varv64qi3;
23455 if (TARGET_AVX512BW)
23456 gen = gen_avx512bw_vpermi2varv32hi3;
23459 if (TARGET_AVX512VL)
23460 gen = gen_avx512vl_vpermi2varv4si3;
23463 if (TARGET_AVX512VL)
23464 gen = gen_avx512vl_vpermi2varv8si3;
23467 if (TARGET_AVX512F)
23468 gen = gen_avx512f_vpermi2varv16si3;
23471 if (TARGET_AVX512VL)
23473 gen = gen_avx512vl_vpermi2varv4sf3;
23474 maskmode = V4SImode;
23478 if (TARGET_AVX512VL)
23480 gen = gen_avx512vl_vpermi2varv8sf3;
23481 maskmode = V8SImode;
23485 if (TARGET_AVX512F)
23487 gen = gen_avx512f_vpermi2varv16sf3;
23488 maskmode = V16SImode;
23492 if (TARGET_AVX512VL)
23493 gen = gen_avx512vl_vpermi2varv2di3;
23496 if (TARGET_AVX512VL)
23497 gen = gen_avx512vl_vpermi2varv4di3;
23500 if (TARGET_AVX512F)
23501 gen = gen_avx512f_vpermi2varv8di3;
23504 if (TARGET_AVX512VL)
23506 gen = gen_avx512vl_vpermi2varv2df3;
23507 maskmode = V2DImode;
23511 if (TARGET_AVX512VL)
23513 gen = gen_avx512vl_vpermi2varv4df3;
23514 maskmode = V4DImode;
23518 if (TARGET_AVX512F)
23520 gen = gen_avx512f_vpermi2varv8df3;
23521 maskmode = V8DImode;
23531 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23532 expander, so args are either in d, or in op0, op1 etc. */
23536 target = d->target;
23539 for (int i = 0; i < d->nelt; ++i)
23540 vec[i] = GEN_INT (d->perm[i]);
23541 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23544 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23548 /* Expand a variable vector permutation. */
23551 ix86_expand_vec_perm (rtx operands[])
23553 rtx target = operands[0];
23554 rtx op0 = operands[1];
23555 rtx op1 = operands[2];
23556 rtx mask = operands[3];
23557 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23558 machine_mode mode = GET_MODE (op0);
23559 machine_mode maskmode = GET_MODE (mask);
23561 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23563 /* Number of elements in the vector. */
23564 w = GET_MODE_NUNITS (mode);
23565 e = GET_MODE_UNIT_SIZE (mode);
23566 gcc_assert (w <= 64);
23568 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23573 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23575 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23576 an constant shuffle operand. With a tiny bit of effort we can
23577 use VPERMD instead. A re-interpretation stall for V4DFmode is
23578 unfortunate but there's no avoiding it.
23579 Similarly for V16HImode we don't have instructions for variable
23580 shuffling, while for V32QImode we can use after preparing suitable
23581 masks vpshufb; vpshufb; vpermq; vpor. */
23583 if (mode == V16HImode)
23585 maskmode = mode = V32QImode;
23591 maskmode = mode = V8SImode;
23595 t1 = gen_reg_rtx (maskmode);
23597 /* Replicate the low bits of the V4DImode mask into V8SImode:
23599 t1 = { A A B B C C D D }. */
23600 for (i = 0; i < w / 2; ++i)
23601 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23602 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23603 vt = force_reg (maskmode, vt);
23604 mask = gen_lowpart (maskmode, mask);
23605 if (maskmode == V8SImode)
23606 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23608 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23610 /* Multiply the shuffle indicies by two. */
23611 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23614 /* Add one to the odd shuffle indicies:
23615 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23616 for (i = 0; i < w / 2; ++i)
23618 vec[i * 2] = const0_rtx;
23619 vec[i * 2 + 1] = const1_rtx;
23621 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23622 vt = validize_mem (force_const_mem (maskmode, vt));
23623 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23626 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23627 operands[3] = mask = t1;
23628 target = gen_reg_rtx (mode);
23629 op0 = gen_lowpart (mode, op0);
23630 op1 = gen_lowpart (mode, op1);
23636 /* The VPERMD and VPERMPS instructions already properly ignore
23637 the high bits of the shuffle elements. No need for us to
23638 perform an AND ourselves. */
23639 if (one_operand_shuffle)
23641 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23642 if (target != operands[0])
23643 emit_move_insn (operands[0],
23644 gen_lowpart (GET_MODE (operands[0]), target));
23648 t1 = gen_reg_rtx (V8SImode);
23649 t2 = gen_reg_rtx (V8SImode);
23650 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23651 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23657 mask = gen_lowpart (V8SImode, mask);
23658 if (one_operand_shuffle)
23659 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23662 t1 = gen_reg_rtx (V8SFmode);
23663 t2 = gen_reg_rtx (V8SFmode);
23664 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23665 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23671 /* By combining the two 128-bit input vectors into one 256-bit
23672 input vector, we can use VPERMD and VPERMPS for the full
23673 two-operand shuffle. */
23674 t1 = gen_reg_rtx (V8SImode);
23675 t2 = gen_reg_rtx (V8SImode);
23676 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23677 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23678 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23679 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23683 t1 = gen_reg_rtx (V8SFmode);
23684 t2 = gen_reg_rtx (V8SImode);
23685 mask = gen_lowpart (V4SImode, mask);
23686 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23687 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23688 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23689 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23693 t1 = gen_reg_rtx (V32QImode);
23694 t2 = gen_reg_rtx (V32QImode);
23695 t3 = gen_reg_rtx (V32QImode);
23696 vt2 = GEN_INT (-128);
23697 for (i = 0; i < 32; i++)
23699 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23700 vt = force_reg (V32QImode, vt);
23701 for (i = 0; i < 32; i++)
23702 vec[i] = i < 16 ? vt2 : const0_rtx;
23703 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23704 vt2 = force_reg (V32QImode, vt2);
23705 /* From mask create two adjusted masks, which contain the same
23706 bits as mask in the low 7 bits of each vector element.
23707 The first mask will have the most significant bit clear
23708 if it requests element from the same 128-bit lane
23709 and MSB set if it requests element from the other 128-bit lane.
23710 The second mask will have the opposite values of the MSB,
23711 and additionally will have its 128-bit lanes swapped.
23712 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23713 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23714 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23715 stands for other 12 bytes. */
23716 /* The bit whether element is from the same lane or the other
23717 lane is bit 4, so shift it up by 3 to the MSB position. */
23718 t5 = gen_reg_rtx (V4DImode);
23719 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23721 /* Clear MSB bits from the mask just in case it had them set. */
23722 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23723 /* After this t1 will have MSB set for elements from other lane. */
23724 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23725 /* Clear bits other than MSB. */
23726 emit_insn (gen_andv32qi3 (t1, t1, vt));
23727 /* Or in the lower bits from mask into t3. */
23728 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23729 /* And invert MSB bits in t1, so MSB is set for elements from the same
23731 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23732 /* Swap 128-bit lanes in t3. */
23733 t6 = gen_reg_rtx (V4DImode);
23734 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23735 const2_rtx, GEN_INT (3),
23736 const0_rtx, const1_rtx));
23737 /* And or in the lower bits from mask into t1. */
23738 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23739 if (one_operand_shuffle)
23741 /* Each of these shuffles will put 0s in places where
23742 element from the other 128-bit lane is needed, otherwise
23743 will shuffle in the requested value. */
23744 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23745 gen_lowpart (V32QImode, t6)));
23746 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23747 /* For t3 the 128-bit lanes are swapped again. */
23748 t7 = gen_reg_rtx (V4DImode);
23749 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23750 const2_rtx, GEN_INT (3),
23751 const0_rtx, const1_rtx));
23752 /* And oring both together leads to the result. */
23753 emit_insn (gen_iorv32qi3 (target, t1,
23754 gen_lowpart (V32QImode, t7)));
23755 if (target != operands[0])
23756 emit_move_insn (operands[0],
23757 gen_lowpart (GET_MODE (operands[0]), target));
23761 t4 = gen_reg_rtx (V32QImode);
23762 /* Similarly to the above one_operand_shuffle code,
23763 just for repeated twice for each operand. merge_two:
23764 code will merge the two results together. */
23765 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23766 gen_lowpart (V32QImode, t6)));
23767 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23768 gen_lowpart (V32QImode, t6)));
23769 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23770 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23771 t7 = gen_reg_rtx (V4DImode);
23772 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23773 const2_rtx, GEN_INT (3),
23774 const0_rtx, const1_rtx));
23775 t8 = gen_reg_rtx (V4DImode);
23776 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23777 const2_rtx, GEN_INT (3),
23778 const0_rtx, const1_rtx));
23779 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23780 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23786 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23793 /* The XOP VPPERM insn supports three inputs. By ignoring the
23794 one_operand_shuffle special case, we avoid creating another
23795 set of constant vectors in memory. */
23796 one_operand_shuffle = false;
23798 /* mask = mask & {2*w-1, ...} */
23799 vt = GEN_INT (2*w - 1);
23803 /* mask = mask & {w-1, ...} */
23804 vt = GEN_INT (w - 1);
23807 for (i = 0; i < w; i++)
23809 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23810 mask = expand_simple_binop (maskmode, AND, mask, vt,
23811 NULL_RTX, 0, OPTAB_DIRECT);
23813 /* For non-QImode operations, convert the word permutation control
23814 into a byte permutation control. */
23815 if (mode != V16QImode)
23817 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23818 GEN_INT (exact_log2 (e)),
23819 NULL_RTX, 0, OPTAB_DIRECT);
23821 /* Convert mask to vector of chars. */
23822 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23824 /* Replicate each of the input bytes into byte positions:
23825 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23826 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23827 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23828 for (i = 0; i < 16; ++i)
23829 vec[i] = GEN_INT (i/e * e);
23830 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23831 vt = validize_mem (force_const_mem (V16QImode, vt));
23833 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23835 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23837 /* Convert it into the byte positions by doing
23838 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23839 for (i = 0; i < 16; ++i)
23840 vec[i] = GEN_INT (i % e);
23841 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23842 vt = validize_mem (force_const_mem (V16QImode, vt));
23843 emit_insn (gen_addv16qi3 (mask, mask, vt));
23846 /* The actual shuffle operations all operate on V16QImode. */
23847 op0 = gen_lowpart (V16QImode, op0);
23848 op1 = gen_lowpart (V16QImode, op1);
23852 if (GET_MODE (target) != V16QImode)
23853 target = gen_reg_rtx (V16QImode);
23854 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23855 if (target != operands[0])
23856 emit_move_insn (operands[0],
23857 gen_lowpart (GET_MODE (operands[0]), target));
23859 else if (one_operand_shuffle)
23861 if (GET_MODE (target) != V16QImode)
23862 target = gen_reg_rtx (V16QImode);
23863 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23864 if (target != operands[0])
23865 emit_move_insn (operands[0],
23866 gen_lowpart (GET_MODE (operands[0]), target));
23873 /* Shuffle the two input vectors independently. */
23874 t1 = gen_reg_rtx (V16QImode);
23875 t2 = gen_reg_rtx (V16QImode);
23876 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23877 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23880 /* Then merge them together. The key is whether any given control
23881 element contained a bit set that indicates the second word. */
23882 mask = operands[3];
23884 if (maskmode == V2DImode && !TARGET_SSE4_1)
23886 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23887 more shuffle to convert the V2DI input mask into a V4SI
23888 input mask. At which point the masking that expand_int_vcond
23889 will work as desired. */
23890 rtx t3 = gen_reg_rtx (V4SImode);
23891 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23892 const0_rtx, const0_rtx,
23893 const2_rtx, const2_rtx));
23895 maskmode = V4SImode;
23899 for (i = 0; i < w; i++)
23901 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23902 vt = force_reg (maskmode, vt);
23903 mask = expand_simple_binop (maskmode, AND, mask, vt,
23904 NULL_RTX, 0, OPTAB_DIRECT);
23906 if (GET_MODE (target) != mode)
23907 target = gen_reg_rtx (mode);
23909 xops[1] = gen_lowpart (mode, t2);
23910 xops[2] = gen_lowpart (mode, t1);
23911 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23914 ok = ix86_expand_int_vcond (xops);
23916 if (target != operands[0])
23917 emit_move_insn (operands[0],
23918 gen_lowpart (GET_MODE (operands[0]), target));
23922 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23923 true if we should do zero extension, else sign extension. HIGH_P is
23924 true if we want the N/2 high elements, else the low elements. */
23927 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23929 machine_mode imode = GET_MODE (src);
23934 rtx (*unpack)(rtx, rtx);
23935 rtx (*extract)(rtx, rtx) = NULL;
23936 machine_mode halfmode = BLKmode;
23942 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23944 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23945 halfmode = V32QImode;
23947 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23951 unpack = gen_avx2_zero_extendv16qiv16hi2;
23953 unpack = gen_avx2_sign_extendv16qiv16hi2;
23954 halfmode = V16QImode;
23956 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23960 unpack = gen_avx512f_zero_extendv16hiv16si2;
23962 unpack = gen_avx512f_sign_extendv16hiv16si2;
23963 halfmode = V16HImode;
23965 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23969 unpack = gen_avx2_zero_extendv8hiv8si2;
23971 unpack = gen_avx2_sign_extendv8hiv8si2;
23972 halfmode = V8HImode;
23974 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23978 unpack = gen_avx512f_zero_extendv8siv8di2;
23980 unpack = gen_avx512f_sign_extendv8siv8di2;
23981 halfmode = V8SImode;
23983 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
23987 unpack = gen_avx2_zero_extendv4siv4di2;
23989 unpack = gen_avx2_sign_extendv4siv4di2;
23990 halfmode = V4SImode;
23992 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
23996 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
23998 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24002 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24004 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24008 unpack = gen_sse4_1_zero_extendv2siv2di2;
24010 unpack = gen_sse4_1_sign_extendv2siv2di2;
24013 gcc_unreachable ();
24016 if (GET_MODE_SIZE (imode) >= 32)
24018 tmp = gen_reg_rtx (halfmode);
24019 emit_insn (extract (tmp, src));
24023 /* Shift higher 8 bytes to lower 8 bytes. */
24024 tmp = gen_reg_rtx (V1TImode);
24025 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24027 tmp = gen_lowpart (imode, tmp);
24032 emit_insn (unpack (dest, tmp));
24036 rtx (*unpack)(rtx, rtx, rtx);
24042 unpack = gen_vec_interleave_highv16qi;
24044 unpack = gen_vec_interleave_lowv16qi;
24048 unpack = gen_vec_interleave_highv8hi;
24050 unpack = gen_vec_interleave_lowv8hi;
24054 unpack = gen_vec_interleave_highv4si;
24056 unpack = gen_vec_interleave_lowv4si;
24059 gcc_unreachable ();
24063 tmp = force_reg (imode, CONST0_RTX (imode));
24065 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24066 src, pc_rtx, pc_rtx);
24068 rtx tmp2 = gen_reg_rtx (imode);
24069 emit_insn (unpack (tmp2, src, tmp));
24070 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24074 /* Expand conditional increment or decrement using adb/sbb instructions.
24075 The default case using setcc followed by the conditional move can be
24076 done by generic code. */
24078 ix86_expand_int_addcc (rtx operands[])
24080 enum rtx_code code = GET_CODE (operands[1]);
24082 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24084 rtx val = const0_rtx;
24085 bool fpcmp = false;
24087 rtx op0 = XEXP (operands[1], 0);
24088 rtx op1 = XEXP (operands[1], 1);
24090 if (operands[3] != const1_rtx
24091 && operands[3] != constm1_rtx)
24093 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24095 code = GET_CODE (compare_op);
24097 flags = XEXP (compare_op, 0);
24099 if (GET_MODE (flags) == CCFPmode
24100 || GET_MODE (flags) == CCFPUmode)
24103 code = ix86_fp_compare_code_to_integer (code);
24110 PUT_CODE (compare_op,
24111 reverse_condition_maybe_unordered
24112 (GET_CODE (compare_op)));
24114 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24117 mode = GET_MODE (operands[0]);
24119 /* Construct either adc or sbb insn. */
24120 if ((code == LTU) == (operands[3] == constm1_rtx))
24125 insn = gen_subqi3_carry;
24128 insn = gen_subhi3_carry;
24131 insn = gen_subsi3_carry;
24134 insn = gen_subdi3_carry;
24137 gcc_unreachable ();
24145 insn = gen_addqi3_carry;
24148 insn = gen_addhi3_carry;
24151 insn = gen_addsi3_carry;
24154 insn = gen_adddi3_carry;
24157 gcc_unreachable ();
24160 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24166 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24167 but works for floating pointer parameters and nonoffsetable memories.
24168 For pushes, it returns just stack offsets; the values will be saved
24169 in the right order. Maximally three parts are generated. */
24172 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24177 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24179 size = (GET_MODE_SIZE (mode) + 4) / 8;
24181 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24182 gcc_assert (size >= 2 && size <= 4);
24184 /* Optimize constant pool reference to immediates. This is used by fp
24185 moves, that force all constants to memory to allow combining. */
24186 if (MEM_P (operand) && MEM_READONLY_P (operand))
24188 rtx tmp = maybe_get_pool_constant (operand);
24193 if (MEM_P (operand) && !offsettable_memref_p (operand))
24195 /* The only non-offsetable memories we handle are pushes. */
24196 int ok = push_operand (operand, VOIDmode);
24200 operand = copy_rtx (operand);
24201 PUT_MODE (operand, word_mode);
24202 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24206 if (GET_CODE (operand) == CONST_VECTOR)
24208 machine_mode imode = int_mode_for_mode (mode);
24209 /* Caution: if we looked through a constant pool memory above,
24210 the operand may actually have a different mode now. That's
24211 ok, since we want to pun this all the way back to an integer. */
24212 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24213 gcc_assert (operand != NULL);
24219 if (mode == DImode)
24220 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24225 if (REG_P (operand))
24227 gcc_assert (reload_completed);
24228 for (i = 0; i < size; i++)
24229 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24231 else if (offsettable_memref_p (operand))
24233 operand = adjust_address (operand, SImode, 0);
24234 parts[0] = operand;
24235 for (i = 1; i < size; i++)
24236 parts[i] = adjust_address (operand, SImode, 4 * i);
24238 else if (CONST_DOUBLE_P (operand))
24240 const REAL_VALUE_TYPE *r;
24243 r = CONST_DOUBLE_REAL_VALUE (operand);
24247 real_to_target (l, r, mode);
24248 parts[3] = gen_int_mode (l[3], SImode);
24249 parts[2] = gen_int_mode (l[2], SImode);
24252 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24253 long double may not be 80-bit. */
24254 real_to_target (l, r, mode);
24255 parts[2] = gen_int_mode (l[2], SImode);
24258 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24261 gcc_unreachable ();
24263 parts[1] = gen_int_mode (l[1], SImode);
24264 parts[0] = gen_int_mode (l[0], SImode);
24267 gcc_unreachable ();
24272 if (mode == TImode)
24273 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24274 if (mode == XFmode || mode == TFmode)
24276 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24277 if (REG_P (operand))
24279 gcc_assert (reload_completed);
24280 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24281 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24283 else if (offsettable_memref_p (operand))
24285 operand = adjust_address (operand, DImode, 0);
24286 parts[0] = operand;
24287 parts[1] = adjust_address (operand, upper_mode, 8);
24289 else if (CONST_DOUBLE_P (operand))
24293 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24295 /* real_to_target puts 32-bit pieces in each long. */
24298 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
24299 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
24302 if (upper_mode == SImode)
24303 parts[1] = gen_int_mode (l[2], SImode);
24307 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
24308 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
24312 gcc_unreachable ();
24319 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24320 Return false when normal moves are needed; true when all required
24321 insns have been emitted. Operands 2-4 contain the input values
24322 int the correct order; operands 5-7 contain the output values. */
24325 ix86_split_long_move (rtx operands[])
24330 int collisions = 0;
24331 machine_mode mode = GET_MODE (operands[0]);
24332 bool collisionparts[4];
24334 /* The DFmode expanders may ask us to move double.
24335 For 64bit target this is single move. By hiding the fact
24336 here we simplify i386.md splitters. */
24337 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24339 /* Optimize constant pool reference to immediates. This is used by
24340 fp moves, that force all constants to memory to allow combining. */
24342 if (MEM_P (operands[1])
24343 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24344 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24345 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24346 if (push_operand (operands[0], VOIDmode))
24348 operands[0] = copy_rtx (operands[0]);
24349 PUT_MODE (operands[0], word_mode);
24352 operands[0] = gen_lowpart (DImode, operands[0]);
24353 operands[1] = gen_lowpart (DImode, operands[1]);
24354 emit_move_insn (operands[0], operands[1]);
24358 /* The only non-offsettable memory we handle is push. */
24359 if (push_operand (operands[0], VOIDmode))
24362 gcc_assert (!MEM_P (operands[0])
24363 || offsettable_memref_p (operands[0]));
24365 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24366 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24368 /* When emitting push, take care for source operands on the stack. */
24369 if (push && MEM_P (operands[1])
24370 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24372 rtx src_base = XEXP (part[1][nparts - 1], 0);
24374 /* Compensate for the stack decrement by 4. */
24375 if (!TARGET_64BIT && nparts == 3
24376 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24377 src_base = plus_constant (Pmode, src_base, 4);
24379 /* src_base refers to the stack pointer and is
24380 automatically decreased by emitted push. */
24381 for (i = 0; i < nparts; i++)
24382 part[1][i] = change_address (part[1][i],
24383 GET_MODE (part[1][i]), src_base);
24386 /* We need to do copy in the right order in case an address register
24387 of the source overlaps the destination. */
24388 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24392 for (i = 0; i < nparts; i++)
24395 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24396 if (collisionparts[i])
24400 /* Collision in the middle part can be handled by reordering. */
24401 if (collisions == 1 && nparts == 3 && collisionparts [1])
24403 std::swap (part[0][1], part[0][2]);
24404 std::swap (part[1][1], part[1][2]);
24406 else if (collisions == 1
24408 && (collisionparts [1] || collisionparts [2]))
24410 if (collisionparts [1])
24412 std::swap (part[0][1], part[0][2]);
24413 std::swap (part[1][1], part[1][2]);
24417 std::swap (part[0][2], part[0][3]);
24418 std::swap (part[1][2], part[1][3]);
24422 /* If there are more collisions, we can't handle it by reordering.
24423 Do an lea to the last part and use only one colliding move. */
24424 else if (collisions > 1)
24426 rtx base, addr, tls_base = NULL_RTX;
24430 base = part[0][nparts - 1];
24432 /* Handle the case when the last part isn't valid for lea.
24433 Happens in 64-bit mode storing the 12-byte XFmode. */
24434 if (GET_MODE (base) != Pmode)
24435 base = gen_rtx_REG (Pmode, REGNO (base));
24437 addr = XEXP (part[1][0], 0);
24438 if (TARGET_TLS_DIRECT_SEG_REFS)
24440 struct ix86_address parts;
24441 int ok = ix86_decompose_address (addr, &parts);
24443 if (parts.seg == DEFAULT_TLS_SEG_REG)
24445 /* It is not valid to use %gs: or %fs: in
24446 lea though, so we need to remove it from the
24447 address used for lea and add it to each individual
24448 memory loads instead. */
24449 addr = copy_rtx (addr);
24451 while (GET_CODE (*x) == PLUS)
24453 for (i = 0; i < 2; i++)
24455 rtx u = XEXP (*x, i);
24456 if (GET_CODE (u) == ZERO_EXTEND)
24458 if (GET_CODE (u) == UNSPEC
24459 && XINT (u, 1) == UNSPEC_TP)
24461 tls_base = XEXP (*x, i);
24462 *x = XEXP (*x, 1 - i);
24470 gcc_assert (tls_base);
24473 emit_insn (gen_rtx_SET (base, addr));
24475 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24476 part[1][0] = replace_equiv_address (part[1][0], base);
24477 for (i = 1; i < nparts; i++)
24480 base = copy_rtx (base);
24481 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24482 part[1][i] = replace_equiv_address (part[1][i], tmp);
24493 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24494 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24495 stack_pointer_rtx, GEN_INT (-4)));
24496 emit_move_insn (part[0][2], part[1][2]);
24498 else if (nparts == 4)
24500 emit_move_insn (part[0][3], part[1][3]);
24501 emit_move_insn (part[0][2], part[1][2]);
24506 /* In 64bit mode we don't have 32bit push available. In case this is
24507 register, it is OK - we will just use larger counterpart. We also
24508 retype memory - these comes from attempt to avoid REX prefix on
24509 moving of second half of TFmode value. */
24510 if (GET_MODE (part[1][1]) == SImode)
24512 switch (GET_CODE (part[1][1]))
24515 part[1][1] = adjust_address (part[1][1], DImode, 0);
24519 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24523 gcc_unreachable ();
24526 if (GET_MODE (part[1][0]) == SImode)
24527 part[1][0] = part[1][1];
24530 emit_move_insn (part[0][1], part[1][1]);
24531 emit_move_insn (part[0][0], part[1][0]);
24535 /* Choose correct order to not overwrite the source before it is copied. */
24536 if ((REG_P (part[0][0])
24537 && REG_P (part[1][1])
24538 && (REGNO (part[0][0]) == REGNO (part[1][1])
24540 && REGNO (part[0][0]) == REGNO (part[1][2]))
24542 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24544 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24546 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24548 operands[2 + i] = part[0][j];
24549 operands[6 + i] = part[1][j];
24554 for (i = 0; i < nparts; i++)
24556 operands[2 + i] = part[0][i];
24557 operands[6 + i] = part[1][i];
24561 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24562 if (optimize_insn_for_size_p ())
24564 for (j = 0; j < nparts - 1; j++)
24565 if (CONST_INT_P (operands[6 + j])
24566 && operands[6 + j] != const0_rtx
24567 && REG_P (operands[2 + j]))
24568 for (i = j; i < nparts - 1; i++)
24569 if (CONST_INT_P (operands[7 + i])
24570 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24571 operands[7 + i] = operands[2 + j];
24574 for (i = 0; i < nparts; i++)
24575 emit_move_insn (operands[2 + i], operands[6 + i]);
24580 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24581 left shift by a constant, either using a single shift or
24582 a sequence of add instructions. */
24585 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24587 rtx (*insn)(rtx, rtx, rtx);
24590 || (count * ix86_cost->add <= ix86_cost->shift_const
24591 && !optimize_insn_for_size_p ()))
24593 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24594 while (count-- > 0)
24595 emit_insn (insn (operand, operand, operand));
24599 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24600 emit_insn (insn (operand, operand, GEN_INT (count)));
24605 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24607 rtx (*gen_ashl3)(rtx, rtx, rtx);
24608 rtx (*gen_shld)(rtx, rtx, rtx);
24609 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24611 rtx low[2], high[2];
24614 if (CONST_INT_P (operands[2]))
24616 split_double_mode (mode, operands, 2, low, high);
24617 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24619 if (count >= half_width)
24621 emit_move_insn (high[0], low[1]);
24622 emit_move_insn (low[0], const0_rtx);
24624 if (count > half_width)
24625 ix86_expand_ashl_const (high[0], count - half_width, mode);
24629 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24631 if (!rtx_equal_p (operands[0], operands[1]))
24632 emit_move_insn (operands[0], operands[1]);
24634 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24635 ix86_expand_ashl_const (low[0], count, mode);
24640 split_double_mode (mode, operands, 1, low, high);
24642 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24644 if (operands[1] == const1_rtx)
24646 /* Assuming we've chosen a QImode capable registers, then 1 << N
24647 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24648 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24650 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24652 ix86_expand_clear (low[0]);
24653 ix86_expand_clear (high[0]);
24654 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24656 d = gen_lowpart (QImode, low[0]);
24657 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24658 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24659 emit_insn (gen_rtx_SET (d, s));
24661 d = gen_lowpart (QImode, high[0]);
24662 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24663 s = gen_rtx_NE (QImode, flags, const0_rtx);
24664 emit_insn (gen_rtx_SET (d, s));
24667 /* Otherwise, we can get the same results by manually performing
24668 a bit extract operation on bit 5/6, and then performing the two
24669 shifts. The two methods of getting 0/1 into low/high are exactly
24670 the same size. Avoiding the shift in the bit extract case helps
24671 pentium4 a bit; no one else seems to care much either way. */
24674 machine_mode half_mode;
24675 rtx (*gen_lshr3)(rtx, rtx, rtx);
24676 rtx (*gen_and3)(rtx, rtx, rtx);
24677 rtx (*gen_xor3)(rtx, rtx, rtx);
24678 HOST_WIDE_INT bits;
24681 if (mode == DImode)
24683 half_mode = SImode;
24684 gen_lshr3 = gen_lshrsi3;
24685 gen_and3 = gen_andsi3;
24686 gen_xor3 = gen_xorsi3;
24691 half_mode = DImode;
24692 gen_lshr3 = gen_lshrdi3;
24693 gen_and3 = gen_anddi3;
24694 gen_xor3 = gen_xordi3;
24698 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24699 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24701 x = gen_lowpart (half_mode, operands[2]);
24702 emit_insn (gen_rtx_SET (high[0], x));
24704 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24705 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24706 emit_move_insn (low[0], high[0]);
24707 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24710 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24711 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24715 if (operands[1] == constm1_rtx)
24717 /* For -1 << N, we can avoid the shld instruction, because we
24718 know that we're shifting 0...31/63 ones into a -1. */
24719 emit_move_insn (low[0], constm1_rtx);
24720 if (optimize_insn_for_size_p ())
24721 emit_move_insn (high[0], low[0]);
24723 emit_move_insn (high[0], constm1_rtx);
24727 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24729 if (!rtx_equal_p (operands[0], operands[1]))
24730 emit_move_insn (operands[0], operands[1]);
24732 split_double_mode (mode, operands, 1, low, high);
24733 emit_insn (gen_shld (high[0], low[0], operands[2]));
24736 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24738 if (TARGET_CMOVE && scratch)
24740 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24741 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24743 ix86_expand_clear (scratch);
24744 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24748 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24749 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24751 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24756 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24758 rtx (*gen_ashr3)(rtx, rtx, rtx)
24759 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24760 rtx (*gen_shrd)(rtx, rtx, rtx);
24761 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24763 rtx low[2], high[2];
24766 if (CONST_INT_P (operands[2]))
24768 split_double_mode (mode, operands, 2, low, high);
24769 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24771 if (count == GET_MODE_BITSIZE (mode) - 1)
24773 emit_move_insn (high[0], high[1]);
24774 emit_insn (gen_ashr3 (high[0], high[0],
24775 GEN_INT (half_width - 1)));
24776 emit_move_insn (low[0], high[0]);
24779 else if (count >= half_width)
24781 emit_move_insn (low[0], high[1]);
24782 emit_move_insn (high[0], low[0]);
24783 emit_insn (gen_ashr3 (high[0], high[0],
24784 GEN_INT (half_width - 1)));
24786 if (count > half_width)
24787 emit_insn (gen_ashr3 (low[0], low[0],
24788 GEN_INT (count - half_width)));
24792 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24794 if (!rtx_equal_p (operands[0], operands[1]))
24795 emit_move_insn (operands[0], operands[1]);
24797 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24798 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24803 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24805 if (!rtx_equal_p (operands[0], operands[1]))
24806 emit_move_insn (operands[0], operands[1]);
24808 split_double_mode (mode, operands, 1, low, high);
24810 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24811 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24813 if (TARGET_CMOVE && scratch)
24815 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24816 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24818 emit_move_insn (scratch, high[0]);
24819 emit_insn (gen_ashr3 (scratch, scratch,
24820 GEN_INT (half_width - 1)));
24821 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24826 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24827 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24829 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24835 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24837 rtx (*gen_lshr3)(rtx, rtx, rtx)
24838 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24839 rtx (*gen_shrd)(rtx, rtx, rtx);
24840 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24842 rtx low[2], high[2];
24845 if (CONST_INT_P (operands[2]))
24847 split_double_mode (mode, operands, 2, low, high);
24848 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24850 if (count >= half_width)
24852 emit_move_insn (low[0], high[1]);
24853 ix86_expand_clear (high[0]);
24855 if (count > half_width)
24856 emit_insn (gen_lshr3 (low[0], low[0],
24857 GEN_INT (count - half_width)));
24861 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24863 if (!rtx_equal_p (operands[0], operands[1]))
24864 emit_move_insn (operands[0], operands[1]);
24866 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24867 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24872 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24874 if (!rtx_equal_p (operands[0], operands[1]))
24875 emit_move_insn (operands[0], operands[1]);
24877 split_double_mode (mode, operands, 1, low, high);
24879 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24880 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24882 if (TARGET_CMOVE && scratch)
24884 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24885 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24887 ix86_expand_clear (scratch);
24888 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24893 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24894 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24896 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24901 /* Predict just emitted jump instruction to be taken with probability PROB. */
24903 predict_jump (int prob)
24905 rtx insn = get_last_insn ();
24906 gcc_assert (JUMP_P (insn));
24907 add_int_reg_note (insn, REG_BR_PROB, prob);
24910 /* Helper function for the string operations below. Dest VARIABLE whether
24911 it is aligned to VALUE bytes. If true, jump to the label. */
24912 static rtx_code_label *
24913 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24915 rtx_code_label *label = gen_label_rtx ();
24916 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24917 if (GET_MODE (variable) == DImode)
24918 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24920 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24921 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24924 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24926 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24930 /* Adjust COUNTER by the VALUE. */
24932 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24934 rtx (*gen_add)(rtx, rtx, rtx)
24935 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24937 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24940 /* Zero extend possibly SImode EXP to Pmode register. */
24942 ix86_zero_extend_to_Pmode (rtx exp)
24944 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24947 /* Divide COUNTREG by SCALE. */
24949 scale_counter (rtx countreg, int scale)
24955 if (CONST_INT_P (countreg))
24956 return GEN_INT (INTVAL (countreg) / scale);
24957 gcc_assert (REG_P (countreg));
24959 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24960 GEN_INT (exact_log2 (scale)),
24961 NULL, 1, OPTAB_DIRECT);
24965 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24966 DImode for constant loop counts. */
24968 static machine_mode
24969 counter_mode (rtx count_exp)
24971 if (GET_MODE (count_exp) != VOIDmode)
24972 return GET_MODE (count_exp);
24973 if (!CONST_INT_P (count_exp))
24975 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24980 /* Copy the address to a Pmode register. This is used for x32 to
24981 truncate DImode TLS address to a SImode register. */
24984 ix86_copy_addr_to_reg (rtx addr)
24987 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
24989 reg = copy_addr_to_reg (addr);
24990 REG_POINTER (reg) = 1;
24995 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
24996 reg = copy_to_mode_reg (DImode, addr);
24997 REG_POINTER (reg) = 1;
24998 return gen_rtx_SUBREG (SImode, reg, 0);
25002 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25003 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25004 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25005 memory by VALUE (supposed to be in MODE).
25007 The size is rounded down to whole number of chunk size moved at once.
25008 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25012 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25013 rtx destptr, rtx srcptr, rtx value,
25014 rtx count, machine_mode mode, int unroll,
25015 int expected_size, bool issetmem)
25017 rtx_code_label *out_label, *top_label;
25019 machine_mode iter_mode = counter_mode (count);
25020 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25021 rtx piece_size = GEN_INT (piece_size_n);
25022 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25026 top_label = gen_label_rtx ();
25027 out_label = gen_label_rtx ();
25028 iter = gen_reg_rtx (iter_mode);
25030 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25031 NULL, 1, OPTAB_DIRECT);
25032 /* Those two should combine. */
25033 if (piece_size == const1_rtx)
25035 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25037 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25039 emit_move_insn (iter, const0_rtx);
25041 emit_label (top_label);
25043 tmp = convert_modes (Pmode, iter_mode, iter, true);
25045 /* This assert could be relaxed - in this case we'll need to compute
25046 smallest power of two, containing in PIECE_SIZE_N and pass it to
25048 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25049 destmem = offset_address (destmem, tmp, piece_size_n);
25050 destmem = adjust_address (destmem, mode, 0);
25054 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25055 srcmem = adjust_address (srcmem, mode, 0);
25057 /* When unrolling for chips that reorder memory reads and writes,
25058 we can save registers by using single temporary.
25059 Also using 4 temporaries is overkill in 32bit mode. */
25060 if (!TARGET_64BIT && 0)
25062 for (i = 0; i < unroll; i++)
25067 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25069 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25071 emit_move_insn (destmem, srcmem);
25077 gcc_assert (unroll <= 4);
25078 for (i = 0; i < unroll; i++)
25080 tmpreg[i] = gen_reg_rtx (mode);
25084 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25086 emit_move_insn (tmpreg[i], srcmem);
25088 for (i = 0; i < unroll; i++)
25093 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25095 emit_move_insn (destmem, tmpreg[i]);
25100 for (i = 0; i < unroll; i++)
25104 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25105 emit_move_insn (destmem, value);
25108 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25109 true, OPTAB_LIB_WIDEN);
25111 emit_move_insn (iter, tmp);
25113 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25115 if (expected_size != -1)
25117 expected_size /= GET_MODE_SIZE (mode) * unroll;
25118 if (expected_size == 0)
25120 else if (expected_size > REG_BR_PROB_BASE)
25121 predict_jump (REG_BR_PROB_BASE - 1);
25123 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25126 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25127 iter = ix86_zero_extend_to_Pmode (iter);
25128 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25129 true, OPTAB_LIB_WIDEN);
25130 if (tmp != destptr)
25131 emit_move_insn (destptr, tmp);
25134 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25135 true, OPTAB_LIB_WIDEN);
25137 emit_move_insn (srcptr, tmp);
25139 emit_label (out_label);
25142 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25143 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25144 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25145 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25146 ORIG_VALUE is the original value passed to memset to fill the memory with.
25147 Other arguments have same meaning as for previous function. */
25150 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25151 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25153 machine_mode mode, bool issetmem)
25158 HOST_WIDE_INT rounded_count;
25160 /* If possible, it is shorter to use rep movs.
25161 TODO: Maybe it is better to move this logic to decide_alg. */
25162 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25163 && (!issetmem || orig_value == const0_rtx))
25166 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25167 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25169 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25170 GET_MODE_SIZE (mode)));
25171 if (mode != QImode)
25173 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25174 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25175 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25178 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25179 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25182 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25183 destmem = shallow_copy_rtx (destmem);
25184 set_mem_size (destmem, rounded_count);
25186 else if (MEM_SIZE_KNOWN_P (destmem))
25187 clear_mem_size (destmem);
25191 value = force_reg (mode, gen_lowpart (mode, value));
25192 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25196 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25197 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25198 if (mode != QImode)
25200 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25201 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25202 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25205 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25206 if (CONST_INT_P (count))
25209 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25210 srcmem = shallow_copy_rtx (srcmem);
25211 set_mem_size (srcmem, rounded_count);
25215 if (MEM_SIZE_KNOWN_P (srcmem))
25216 clear_mem_size (srcmem);
25218 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25223 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25225 SRC is passed by pointer to be updated on return.
25226 Return value is updated DST. */
25228 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25229 HOST_WIDE_INT size_to_move)
25231 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25232 enum insn_code code;
25233 machine_mode move_mode;
25236 /* Find the widest mode in which we could perform moves.
25237 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25238 it until move of such size is supported. */
25239 piece_size = 1 << floor_log2 (size_to_move);
25240 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25241 code = optab_handler (mov_optab, move_mode);
25242 while (code == CODE_FOR_nothing && piece_size > 1)
25245 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25246 code = optab_handler (mov_optab, move_mode);
25249 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25250 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25251 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25253 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25254 move_mode = mode_for_vector (word_mode, nunits);
25255 code = optab_handler (mov_optab, move_mode);
25256 if (code == CODE_FOR_nothing)
25258 move_mode = word_mode;
25259 piece_size = GET_MODE_SIZE (move_mode);
25260 code = optab_handler (mov_optab, move_mode);
25263 gcc_assert (code != CODE_FOR_nothing);
25265 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25266 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25268 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25269 gcc_assert (size_to_move % piece_size == 0);
25270 adjust = GEN_INT (piece_size);
25271 for (i = 0; i < size_to_move; i += piece_size)
25273 /* We move from memory to memory, so we'll need to do it via
25274 a temporary register. */
25275 tempreg = gen_reg_rtx (move_mode);
25276 emit_insn (GEN_FCN (code) (tempreg, src));
25277 emit_insn (GEN_FCN (code) (dst, tempreg));
25279 emit_move_insn (destptr,
25280 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25281 emit_move_insn (srcptr,
25282 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25284 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25286 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25290 /* Update DST and SRC rtx. */
25295 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25297 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25298 rtx destptr, rtx srcptr, rtx count, int max_size)
25301 if (CONST_INT_P (count))
25303 HOST_WIDE_INT countval = INTVAL (count);
25304 HOST_WIDE_INT epilogue_size = countval % max_size;
25307 /* For now MAX_SIZE should be a power of 2. This assert could be
25308 relaxed, but it'll require a bit more complicated epilogue
25310 gcc_assert ((max_size & (max_size - 1)) == 0);
25311 for (i = max_size; i >= 1; i >>= 1)
25313 if (epilogue_size & i)
25314 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25320 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25321 count, 1, OPTAB_DIRECT);
25322 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25323 count, QImode, 1, 4, false);
25327 /* When there are stringops, we can cheaply increase dest and src pointers.
25328 Otherwise we save code size by maintaining offset (zero is readily
25329 available from preceding rep operation) and using x86 addressing modes.
25331 if (TARGET_SINGLE_STRINGOP)
25335 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25336 src = change_address (srcmem, SImode, srcptr);
25337 dest = change_address (destmem, SImode, destptr);
25338 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25339 emit_label (label);
25340 LABEL_NUSES (label) = 1;
25344 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25345 src = change_address (srcmem, HImode, srcptr);
25346 dest = change_address (destmem, HImode, destptr);
25347 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25348 emit_label (label);
25349 LABEL_NUSES (label) = 1;
25353 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25354 src = change_address (srcmem, QImode, srcptr);
25355 dest = change_address (destmem, QImode, destptr);
25356 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25357 emit_label (label);
25358 LABEL_NUSES (label) = 1;
25363 rtx offset = force_reg (Pmode, const0_rtx);
25368 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25369 src = change_address (srcmem, SImode, srcptr);
25370 dest = change_address (destmem, SImode, destptr);
25371 emit_move_insn (dest, src);
25372 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25373 true, OPTAB_LIB_WIDEN);
25375 emit_move_insn (offset, tmp);
25376 emit_label (label);
25377 LABEL_NUSES (label) = 1;
25381 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25382 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25383 src = change_address (srcmem, HImode, tmp);
25384 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25385 dest = change_address (destmem, HImode, tmp);
25386 emit_move_insn (dest, src);
25387 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25388 true, OPTAB_LIB_WIDEN);
25390 emit_move_insn (offset, tmp);
25391 emit_label (label);
25392 LABEL_NUSES (label) = 1;
25396 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25397 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25398 src = change_address (srcmem, QImode, tmp);
25399 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25400 dest = change_address (destmem, QImode, tmp);
25401 emit_move_insn (dest, src);
25402 emit_label (label);
25403 LABEL_NUSES (label) = 1;
25408 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25409 with value PROMOTED_VAL.
25410 SRC is passed by pointer to be updated on return.
25411 Return value is updated DST. */
25413 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25414 HOST_WIDE_INT size_to_move)
25416 rtx dst = destmem, adjust;
25417 enum insn_code code;
25418 machine_mode move_mode;
25421 /* Find the widest mode in which we could perform moves.
25422 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25423 it until move of such size is supported. */
25424 move_mode = GET_MODE (promoted_val);
25425 if (move_mode == VOIDmode)
25426 move_mode = QImode;
25427 if (size_to_move < GET_MODE_SIZE (move_mode))
25429 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25430 promoted_val = gen_lowpart (move_mode, promoted_val);
25432 piece_size = GET_MODE_SIZE (move_mode);
25433 code = optab_handler (mov_optab, move_mode);
25434 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25436 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25438 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25439 gcc_assert (size_to_move % piece_size == 0);
25440 adjust = GEN_INT (piece_size);
25441 for (i = 0; i < size_to_move; i += piece_size)
25443 if (piece_size <= GET_MODE_SIZE (word_mode))
25445 emit_insn (gen_strset (destptr, dst, promoted_val));
25446 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25451 emit_insn (GEN_FCN (code) (dst, promoted_val));
25453 emit_move_insn (destptr,
25454 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25456 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25460 /* Update DST rtx. */
25463 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25465 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25466 rtx count, int max_size)
25469 expand_simple_binop (counter_mode (count), AND, count,
25470 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25471 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25472 gen_lowpart (QImode, value), count, QImode,
25473 1, max_size / 2, true);
25476 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25478 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25479 rtx count, int max_size)
25483 if (CONST_INT_P (count))
25485 HOST_WIDE_INT countval = INTVAL (count);
25486 HOST_WIDE_INT epilogue_size = countval % max_size;
25489 /* For now MAX_SIZE should be a power of 2. This assert could be
25490 relaxed, but it'll require a bit more complicated epilogue
25492 gcc_assert ((max_size & (max_size - 1)) == 0);
25493 for (i = max_size; i >= 1; i >>= 1)
25495 if (epilogue_size & i)
25497 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25498 destmem = emit_memset (destmem, destptr, vec_value, i);
25500 destmem = emit_memset (destmem, destptr, value, i);
25507 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25512 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25515 dest = change_address (destmem, DImode, destptr);
25516 emit_insn (gen_strset (destptr, dest, value));
25517 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25518 emit_insn (gen_strset (destptr, dest, value));
25522 dest = change_address (destmem, SImode, destptr);
25523 emit_insn (gen_strset (destptr, dest, value));
25524 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25525 emit_insn (gen_strset (destptr, dest, value));
25526 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25527 emit_insn (gen_strset (destptr, dest, value));
25528 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25529 emit_insn (gen_strset (destptr, dest, value));
25531 emit_label (label);
25532 LABEL_NUSES (label) = 1;
25536 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25539 dest = change_address (destmem, DImode, destptr);
25540 emit_insn (gen_strset (destptr, dest, value));
25544 dest = change_address (destmem, SImode, destptr);
25545 emit_insn (gen_strset (destptr, dest, value));
25546 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25547 emit_insn (gen_strset (destptr, dest, value));
25549 emit_label (label);
25550 LABEL_NUSES (label) = 1;
25554 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25555 dest = change_address (destmem, SImode, destptr);
25556 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25557 emit_label (label);
25558 LABEL_NUSES (label) = 1;
25562 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25563 dest = change_address (destmem, HImode, destptr);
25564 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25565 emit_label (label);
25566 LABEL_NUSES (label) = 1;
25570 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25571 dest = change_address (destmem, QImode, destptr);
25572 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25573 emit_label (label);
25574 LABEL_NUSES (label) = 1;
25578 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25579 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25580 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25582 Return value is updated DESTMEM. */
25584 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25585 rtx destptr, rtx srcptr, rtx value,
25586 rtx vec_value, rtx count, int align,
25587 int desired_alignment, bool issetmem)
25590 for (i = 1; i < desired_alignment; i <<= 1)
25594 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25597 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25598 destmem = emit_memset (destmem, destptr, vec_value, i);
25600 destmem = emit_memset (destmem, destptr, value, i);
25603 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25604 ix86_adjust_counter (count, i);
25605 emit_label (label);
25606 LABEL_NUSES (label) = 1;
25607 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25613 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25614 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25615 and jump to DONE_LABEL. */
25617 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25618 rtx destptr, rtx srcptr,
25619 rtx value, rtx vec_value,
25620 rtx count, int size,
25621 rtx done_label, bool issetmem)
25623 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25624 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25628 /* If we do not have vector value to copy, we must reduce size. */
25633 if (GET_MODE (value) == VOIDmode && size > 8)
25635 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25636 mode = GET_MODE (value);
25639 mode = GET_MODE (vec_value), value = vec_value;
25643 /* Choose appropriate vector mode. */
25645 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25646 else if (size >= 16)
25647 mode = TARGET_SSE ? V16QImode : DImode;
25648 srcmem = change_address (srcmem, mode, srcptr);
25650 destmem = change_address (destmem, mode, destptr);
25651 modesize = GEN_INT (GET_MODE_SIZE (mode));
25652 gcc_assert (GET_MODE_SIZE (mode) <= size);
25653 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25656 emit_move_insn (destmem, gen_lowpart (mode, value));
25659 emit_move_insn (destmem, srcmem);
25660 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25662 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25665 destmem = offset_address (destmem, count, 1);
25666 destmem = offset_address (destmem, GEN_INT (-2 * size),
25667 GET_MODE_SIZE (mode));
25670 srcmem = offset_address (srcmem, count, 1);
25671 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25672 GET_MODE_SIZE (mode));
25674 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25677 emit_move_insn (destmem, gen_lowpart (mode, value));
25680 emit_move_insn (destmem, srcmem);
25681 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25683 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25685 emit_jump_insn (gen_jump (done_label));
25688 emit_label (label);
25689 LABEL_NUSES (label) = 1;
25692 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25693 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25694 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25695 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25696 DONE_LABEL is a label after the whole copying sequence. The label is created
25697 on demand if *DONE_LABEL is NULL.
25698 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25699 bounds after the initial copies.
25701 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25702 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25703 we will dispatch to a library call for large blocks.
25705 In pseudocode we do:
25709 Assume that SIZE is 4. Bigger sizes are handled analogously
25712 copy 4 bytes from SRCPTR to DESTPTR
25713 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25718 copy 1 byte from SRCPTR to DESTPTR
25721 copy 2 bytes from SRCPTR to DESTPTR
25722 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25727 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25728 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25730 OLD_DESPTR = DESTPTR;
25731 Align DESTPTR up to DESIRED_ALIGN
25732 SRCPTR += DESTPTR - OLD_DESTPTR
25733 COUNT -= DEST_PTR - OLD_DESTPTR
25735 Round COUNT down to multiple of SIZE
25736 << optional caller supplied zero size guard is here >>
25737 << optional caller supplied dynamic check is here >>
25738 << caller supplied main copy loop is here >>
25743 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25744 rtx *destptr, rtx *srcptr,
25746 rtx value, rtx vec_value,
25748 rtx_code_label **done_label,
25752 unsigned HOST_WIDE_INT *min_size,
25753 bool dynamic_check,
25756 rtx_code_label *loop_label = NULL, *label;
25759 int prolog_size = 0;
25762 /* Chose proper value to copy. */
25763 if (issetmem && VECTOR_MODE_P (mode))
25764 mode_value = vec_value;
25766 mode_value = value;
25767 gcc_assert (GET_MODE_SIZE (mode) <= size);
25769 /* See if block is big or small, handle small blocks. */
25770 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25773 loop_label = gen_label_rtx ();
25776 *done_label = gen_label_rtx ();
25778 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25782 /* Handle sizes > 3. */
25783 for (;size2 > 2; size2 >>= 1)
25784 expand_small_movmem_or_setmem (destmem, srcmem,
25788 size2, *done_label, issetmem);
25789 /* Nothing to copy? Jump to DONE_LABEL if so */
25790 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25793 /* Do a byte copy. */
25794 destmem = change_address (destmem, QImode, *destptr);
25796 emit_move_insn (destmem, gen_lowpart (QImode, value));
25799 srcmem = change_address (srcmem, QImode, *srcptr);
25800 emit_move_insn (destmem, srcmem);
25803 /* Handle sizes 2 and 3. */
25804 label = ix86_expand_aligntest (*count, 2, false);
25805 destmem = change_address (destmem, HImode, *destptr);
25806 destmem = offset_address (destmem, *count, 1);
25807 destmem = offset_address (destmem, GEN_INT (-2), 2);
25809 emit_move_insn (destmem, gen_lowpart (HImode, value));
25812 srcmem = change_address (srcmem, HImode, *srcptr);
25813 srcmem = offset_address (srcmem, *count, 1);
25814 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25815 emit_move_insn (destmem, srcmem);
25818 emit_label (label);
25819 LABEL_NUSES (label) = 1;
25820 emit_jump_insn (gen_jump (*done_label));
25824 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25825 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25827 /* Start memcpy for COUNT >= SIZE. */
25830 emit_label (loop_label);
25831 LABEL_NUSES (loop_label) = 1;
25834 /* Copy first desired_align bytes. */
25836 srcmem = change_address (srcmem, mode, *srcptr);
25837 destmem = change_address (destmem, mode, *destptr);
25838 modesize = GEN_INT (GET_MODE_SIZE (mode));
25839 for (n = 0; prolog_size < desired_align - align; n++)
25842 emit_move_insn (destmem, mode_value);
25845 emit_move_insn (destmem, srcmem);
25846 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25848 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25849 prolog_size += GET_MODE_SIZE (mode);
25853 /* Copy last SIZE bytes. */
25854 destmem = offset_address (destmem, *count, 1);
25855 destmem = offset_address (destmem,
25856 GEN_INT (-size - prolog_size),
25859 emit_move_insn (destmem, mode_value);
25862 srcmem = offset_address (srcmem, *count, 1);
25863 srcmem = offset_address (srcmem,
25864 GEN_INT (-size - prolog_size),
25866 emit_move_insn (destmem, srcmem);
25868 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25870 destmem = offset_address (destmem, modesize, 1);
25872 emit_move_insn (destmem, mode_value);
25875 srcmem = offset_address (srcmem, modesize, 1);
25876 emit_move_insn (destmem, srcmem);
25880 /* Align destination. */
25881 if (desired_align > 1 && desired_align > align)
25883 rtx saveddest = *destptr;
25885 gcc_assert (desired_align <= size);
25886 /* Align destptr up, place it to new register. */
25887 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25888 GEN_INT (prolog_size),
25889 NULL_RTX, 1, OPTAB_DIRECT);
25890 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25891 REG_POINTER (*destptr) = 1;
25892 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25893 GEN_INT (-desired_align),
25894 *destptr, 1, OPTAB_DIRECT);
25895 /* See how many bytes we skipped. */
25896 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25898 saveddest, 1, OPTAB_DIRECT);
25899 /* Adjust srcptr and count. */
25901 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25902 saveddest, *srcptr, 1, OPTAB_DIRECT);
25903 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25904 saveddest, *count, 1, OPTAB_DIRECT);
25905 /* We copied at most size + prolog_size. */
25906 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25908 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25912 /* Our loops always round down the block size, but for dispatch to
25913 library we need precise value. */
25915 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25916 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25920 gcc_assert (prolog_size == 0);
25921 /* Decrease count, so we won't end up copying last word twice. */
25922 if (!CONST_INT_P (*count))
25923 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25924 constm1_rtx, *count, 1, OPTAB_DIRECT);
25926 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25927 (unsigned HOST_WIDE_INT)size));
25929 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25934 /* This function is like the previous one, except here we know how many bytes
25935 need to be copied. That allows us to update alignment not only of DST, which
25936 is returned, but also of SRC, which is passed as a pointer for that
25939 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25940 rtx srcreg, rtx value, rtx vec_value,
25941 int desired_align, int align_bytes,
25945 rtx orig_dst = dst;
25946 rtx orig_src = NULL;
25947 int piece_size = 1;
25948 int copied_bytes = 0;
25952 gcc_assert (srcp != NULL);
25957 for (piece_size = 1;
25958 piece_size <= desired_align && copied_bytes < align_bytes;
25961 if (align_bytes & piece_size)
25965 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25966 dst = emit_memset (dst, destreg, vec_value, piece_size);
25968 dst = emit_memset (dst, destreg, value, piece_size);
25971 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25972 copied_bytes += piece_size;
25975 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25976 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25977 if (MEM_SIZE_KNOWN_P (orig_dst))
25978 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25982 int src_align_bytes = get_mem_align_offset (src, desired_align
25984 if (src_align_bytes >= 0)
25985 src_align_bytes = desired_align - src_align_bytes;
25986 if (src_align_bytes >= 0)
25988 unsigned int src_align;
25989 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
25991 if ((src_align_bytes & (src_align - 1))
25992 == (align_bytes & (src_align - 1)))
25995 if (src_align > (unsigned int) desired_align)
25996 src_align = desired_align;
25997 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
25998 set_mem_align (src, src_align * BITS_PER_UNIT);
26000 if (MEM_SIZE_KNOWN_P (orig_src))
26001 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26008 /* Return true if ALG can be used in current context.
26009 Assume we expand memset if MEMSET is true. */
26011 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26013 if (alg == no_stringop)
26015 if (alg == vector_loop)
26016 return TARGET_SSE || TARGET_AVX;
26017 /* Algorithms using the rep prefix want at least edi and ecx;
26018 additionally, memset wants eax and memcpy wants esi. Don't
26019 consider such algorithms if the user has appropriated those
26020 registers for their own purposes, or if we have a non-default
26021 address space, since some string insns cannot override the segment. */
26022 if (alg == rep_prefix_1_byte
26023 || alg == rep_prefix_4_byte
26024 || alg == rep_prefix_8_byte)
26028 if (fixed_regs[CX_REG]
26029 || fixed_regs[DI_REG]
26030 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26036 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26037 static enum stringop_alg
26038 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26039 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26040 bool memset, bool zero_memset, bool have_as,
26041 int *dynamic_check, bool *noalign, bool recur)
26043 const struct stringop_algs *algs;
26044 bool optimize_for_speed;
26046 const struct processor_costs *cost;
26048 bool any_alg_usable_p = false;
26051 *dynamic_check = -1;
26053 /* Even if the string operation call is cold, we still might spend a lot
26054 of time processing large blocks. */
26055 if (optimize_function_for_size_p (cfun)
26056 || (optimize_insn_for_size_p ()
26058 || (expected_size != -1 && expected_size < 256))))
26059 optimize_for_speed = false;
26061 optimize_for_speed = true;
26063 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26065 algs = &cost->memset[TARGET_64BIT != 0];
26067 algs = &cost->memcpy[TARGET_64BIT != 0];
26069 /* See maximal size for user defined algorithm. */
26070 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26072 enum stringop_alg candidate = algs->size[i].alg;
26073 bool usable = alg_usable_p (candidate, memset, have_as);
26074 any_alg_usable_p |= usable;
26076 if (candidate != libcall && candidate && usable)
26077 max = algs->size[i].max;
26080 /* If expected size is not known but max size is small enough
26081 so inline version is a win, set expected size into
26083 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26084 && expected_size == -1)
26085 expected_size = min_size / 2 + max_size / 2;
26087 /* If user specified the algorithm, honor it if possible. */
26088 if (ix86_stringop_alg != no_stringop
26089 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26090 return ix86_stringop_alg;
26091 /* rep; movq or rep; movl is the smallest variant. */
26092 else if (!optimize_for_speed)
26095 if (!count || (count & 3) || (memset && !zero_memset))
26096 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26097 ? rep_prefix_1_byte : loop_1_byte;
26099 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26100 ? rep_prefix_4_byte : loop;
26102 /* Very tiny blocks are best handled via the loop, REP is expensive to
26104 else if (expected_size != -1 && expected_size < 4)
26105 return loop_1_byte;
26106 else if (expected_size != -1)
26108 enum stringop_alg alg = libcall;
26109 bool alg_noalign = false;
26110 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26112 /* We get here if the algorithms that were not libcall-based
26113 were rep-prefix based and we are unable to use rep prefixes
26114 based on global register usage. Break out of the loop and
26115 use the heuristic below. */
26116 if (algs->size[i].max == 0)
26118 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26120 enum stringop_alg candidate = algs->size[i].alg;
26122 if (candidate != libcall
26123 && alg_usable_p (candidate, memset, have_as))
26126 alg_noalign = algs->size[i].noalign;
26128 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26129 last non-libcall inline algorithm. */
26130 if (TARGET_INLINE_ALL_STRINGOPS)
26132 /* When the current size is best to be copied by a libcall,
26133 but we are still forced to inline, run the heuristic below
26134 that will pick code for medium sized blocks. */
26135 if (alg != libcall)
26137 *noalign = alg_noalign;
26140 else if (!any_alg_usable_p)
26143 else if (alg_usable_p (candidate, memset, have_as))
26145 *noalign = algs->size[i].noalign;
26151 /* When asked to inline the call anyway, try to pick meaningful choice.
26152 We look for maximal size of block that is faster to copy by hand and
26153 take blocks of at most of that size guessing that average size will
26154 be roughly half of the block.
26156 If this turns out to be bad, we might simply specify the preferred
26157 choice in ix86_costs. */
26158 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26159 && (algs->unknown_size == libcall
26160 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26162 enum stringop_alg alg;
26163 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26165 /* If there aren't any usable algorithms or if recursing already,
26166 then recursing on smaller sizes or same size isn't going to
26167 find anything. Just return the simple byte-at-a-time copy loop. */
26168 if (!any_alg_usable_p || recur)
26170 /* Pick something reasonable. */
26171 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26172 *dynamic_check = 128;
26173 return loop_1_byte;
26175 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26176 zero_memset, have_as, dynamic_check, noalign, true);
26177 gcc_assert (*dynamic_check == -1);
26178 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26179 *dynamic_check = max;
26181 gcc_assert (alg != libcall);
26184 return (alg_usable_p (algs->unknown_size, memset, have_as)
26185 ? algs->unknown_size : libcall);
26188 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26189 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26191 decide_alignment (int align,
26192 enum stringop_alg alg,
26194 machine_mode move_mode)
26196 int desired_align = 0;
26198 gcc_assert (alg != no_stringop);
26200 if (alg == libcall)
26202 if (move_mode == VOIDmode)
26205 desired_align = GET_MODE_SIZE (move_mode);
26206 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26207 copying whole cacheline at once. */
26208 if (TARGET_PENTIUMPRO
26209 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26214 if (desired_align < align)
26215 desired_align = align;
26216 if (expected_size != -1 && expected_size < 4)
26217 desired_align = align;
26219 return desired_align;
26223 /* Helper function for memcpy. For QImode value 0xXY produce
26224 0xXYXYXYXY of wide specified by MODE. This is essentially
26225 a * 0x10101010, but we can do slightly better than
26226 synth_mult by unwinding the sequence by hand on CPUs with
26229 promote_duplicated_reg (machine_mode mode, rtx val)
26231 machine_mode valmode = GET_MODE (val);
26233 int nops = mode == DImode ? 3 : 2;
26235 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26236 if (val == const0_rtx)
26237 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26238 if (CONST_INT_P (val))
26240 HOST_WIDE_INT v = INTVAL (val) & 255;
26244 if (mode == DImode)
26245 v |= (v << 16) << 16;
26246 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26249 if (valmode == VOIDmode)
26251 if (valmode != QImode)
26252 val = gen_lowpart (QImode, val);
26253 if (mode == QImode)
26255 if (!TARGET_PARTIAL_REG_STALL)
26257 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26258 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26259 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26260 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26262 rtx reg = convert_modes (mode, QImode, val, true);
26263 tmp = promote_duplicated_reg (mode, const1_rtx);
26264 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26269 rtx reg = convert_modes (mode, QImode, val, true);
26271 if (!TARGET_PARTIAL_REG_STALL)
26272 if (mode == SImode)
26273 emit_insn (gen_insvsi_1 (reg, reg));
26275 emit_insn (gen_insvdi_1 (reg, reg));
26278 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26279 NULL, 1, OPTAB_DIRECT);
26281 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26283 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26284 NULL, 1, OPTAB_DIRECT);
26285 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26286 if (mode == SImode)
26288 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26289 NULL, 1, OPTAB_DIRECT);
26290 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26295 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26296 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26297 alignment from ALIGN to DESIRED_ALIGN. */
26299 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26305 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26306 promoted_val = promote_duplicated_reg (DImode, val);
26307 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26308 promoted_val = promote_duplicated_reg (SImode, val);
26309 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26310 promoted_val = promote_duplicated_reg (HImode, val);
26312 promoted_val = val;
26314 return promoted_val;
26317 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26318 operations when profitable. The code depends upon architecture, block size
26319 and alignment, but always has one of the following overall structures:
26321 Aligned move sequence:
26323 1) Prologue guard: Conditional that jumps up to epilogues for small
26324 blocks that can be handled by epilogue alone. This is faster
26325 but also needed for correctness, since prologue assume the block
26326 is larger than the desired alignment.
26328 Optional dynamic check for size and libcall for large
26329 blocks is emitted here too, with -minline-stringops-dynamically.
26331 2) Prologue: copy first few bytes in order to get destination
26332 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26333 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26334 copied. We emit either a jump tree on power of two sized
26335 blocks, or a byte loop.
26337 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26338 with specified algorithm.
26340 4) Epilogue: code copying tail of the block that is too small to be
26341 handled by main body (or up to size guarded by prologue guard).
26343 Misaligned move sequence
26345 1) missaligned move prologue/epilogue containing:
26346 a) Prologue handling small memory blocks and jumping to done_label
26347 (skipped if blocks are known to be large enough)
26348 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26349 needed by single possibly misaligned move
26350 (skipped if alignment is not needed)
26351 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26353 2) Zero size guard dispatching to done_label, if needed
26355 3) dispatch to library call, if needed,
26357 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26358 with specified algorithm. */
26360 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26361 rtx align_exp, rtx expected_align_exp,
26362 rtx expected_size_exp, rtx min_size_exp,
26363 rtx max_size_exp, rtx probable_max_size_exp,
26368 rtx_code_label *label = NULL;
26370 rtx_code_label *jump_around_label = NULL;
26371 HOST_WIDE_INT align = 1;
26372 unsigned HOST_WIDE_INT count = 0;
26373 HOST_WIDE_INT expected_size = -1;
26374 int size_needed = 0, epilogue_size_needed;
26375 int desired_align = 0, align_bytes = 0;
26376 enum stringop_alg alg;
26377 rtx promoted_val = NULL;
26378 rtx vec_promoted_val = NULL;
26379 bool force_loopy_epilogue = false;
26381 bool need_zero_guard = false;
26383 machine_mode move_mode = VOIDmode;
26384 int unroll_factor = 1;
26385 /* TODO: Once value ranges are available, fill in proper data. */
26386 unsigned HOST_WIDE_INT min_size = 0;
26387 unsigned HOST_WIDE_INT max_size = -1;
26388 unsigned HOST_WIDE_INT probable_max_size = -1;
26389 bool misaligned_prologue_used = false;
26392 if (CONST_INT_P (align_exp))
26393 align = INTVAL (align_exp);
26394 /* i386 can do misaligned access on reasonably increased cost. */
26395 if (CONST_INT_P (expected_align_exp)
26396 && INTVAL (expected_align_exp) > align)
26397 align = INTVAL (expected_align_exp);
26398 /* ALIGN is the minimum of destination and source alignment, but we care here
26399 just about destination alignment. */
26401 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26402 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26404 if (CONST_INT_P (count_exp))
26406 min_size = max_size = probable_max_size = count = expected_size
26407 = INTVAL (count_exp);
26408 /* When COUNT is 0, there is nothing to do. */
26415 min_size = INTVAL (min_size_exp);
26417 max_size = INTVAL (max_size_exp);
26418 if (probable_max_size_exp)
26419 probable_max_size = INTVAL (probable_max_size_exp);
26420 if (CONST_INT_P (expected_size_exp))
26421 expected_size = INTVAL (expected_size_exp);
26424 /* Make sure we don't need to care about overflow later on. */
26425 if (count > (HOST_WIDE_INT_1U << 30))
26428 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26430 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26432 /* Step 0: Decide on preferred algorithm, desired alignment and
26433 size of chunks to be copied by main loop. */
26434 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26436 issetmem && val_exp == const0_rtx, have_as,
26437 &dynamic_check, &noalign, false);
26438 if (alg == libcall)
26440 gcc_assert (alg != no_stringop);
26442 /* For now vector-version of memset is generated only for memory zeroing, as
26443 creating of promoted vector value is very cheap in this case. */
26444 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26445 alg = unrolled_loop;
26448 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26449 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26451 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26454 move_mode = word_mode;
26460 gcc_unreachable ();
26462 need_zero_guard = true;
26463 move_mode = QImode;
26466 need_zero_guard = true;
26468 case unrolled_loop:
26469 need_zero_guard = true;
26470 unroll_factor = (TARGET_64BIT ? 4 : 2);
26473 need_zero_guard = true;
26475 /* Find the widest supported mode. */
26476 move_mode = word_mode;
26477 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26478 != CODE_FOR_nothing)
26479 move_mode = GET_MODE_WIDER_MODE (move_mode);
26481 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26482 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26483 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26485 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26486 move_mode = mode_for_vector (word_mode, nunits);
26487 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26488 move_mode = word_mode;
26490 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26492 case rep_prefix_8_byte:
26493 move_mode = DImode;
26495 case rep_prefix_4_byte:
26496 move_mode = SImode;
26498 case rep_prefix_1_byte:
26499 move_mode = QImode;
26502 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26503 epilogue_size_needed = size_needed;
26505 /* If we are going to call any library calls conditionally, make sure any
26506 pending stack adjustment happen before the first conditional branch,
26507 otherwise they will be emitted before the library call only and won't
26508 happen from the other branches. */
26509 if (dynamic_check != -1)
26510 do_pending_stack_adjust ();
26512 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26513 if (!TARGET_ALIGN_STRINGOPS || noalign)
26514 align = desired_align;
26516 /* Step 1: Prologue guard. */
26518 /* Alignment code needs count to be in register. */
26519 if (CONST_INT_P (count_exp) && desired_align > align)
26521 if (INTVAL (count_exp) > desired_align
26522 && INTVAL (count_exp) > size_needed)
26525 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26526 if (align_bytes <= 0)
26529 align_bytes = desired_align - align_bytes;
26531 if (align_bytes == 0)
26532 count_exp = force_reg (counter_mode (count_exp), count_exp);
26534 gcc_assert (desired_align >= 1 && align >= 1);
26536 /* Misaligned move sequences handle both prologue and epilogue at once.
26537 Default code generation results in a smaller code for large alignments
26538 and also avoids redundant job when sizes are known precisely. */
26539 misaligned_prologue_used
26540 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26541 && MAX (desired_align, epilogue_size_needed) <= 32
26542 && desired_align <= epilogue_size_needed
26543 && ((desired_align > align && !align_bytes)
26544 || (!count && epilogue_size_needed > 1)));
26546 /* Do the cheap promotion to allow better CSE across the
26547 main loop and epilogue (ie one load of the big constant in the
26549 For now the misaligned move sequences do not have fast path
26550 without broadcasting. */
26551 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26553 if (alg == vector_loop)
26555 gcc_assert (val_exp == const0_rtx);
26556 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26557 promoted_val = promote_duplicated_reg_to_size (val_exp,
26558 GET_MODE_SIZE (word_mode),
26559 desired_align, align);
26563 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26564 desired_align, align);
26567 /* Misaligned move sequences handles both prologues and epilogues at once.
26568 Default code generation results in smaller code for large alignments and
26569 also avoids redundant job when sizes are known precisely. */
26570 if (misaligned_prologue_used)
26572 /* Misaligned move prologue handled small blocks by itself. */
26573 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26574 (dst, src, &destreg, &srcreg,
26575 move_mode, promoted_val, vec_promoted_val,
26577 &jump_around_label,
26578 desired_align < align
26579 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26580 desired_align, align, &min_size, dynamic_check, issetmem);
26582 src = change_address (src, BLKmode, srcreg);
26583 dst = change_address (dst, BLKmode, destreg);
26584 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26585 epilogue_size_needed = 0;
26586 if (need_zero_guard
26587 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26589 /* It is possible that we copied enough so the main loop will not
26591 gcc_assert (size_needed > 1);
26592 if (jump_around_label == NULL_RTX)
26593 jump_around_label = gen_label_rtx ();
26594 emit_cmp_and_jump_insns (count_exp,
26595 GEN_INT (size_needed),
26596 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26597 if (expected_size == -1
26598 || expected_size < (desired_align - align) / 2 + size_needed)
26599 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26601 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26604 /* Ensure that alignment prologue won't copy past end of block. */
26605 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26607 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26608 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26609 Make sure it is power of 2. */
26610 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26612 /* To improve performance of small blocks, we jump around the VAL
26613 promoting mode. This mean that if the promoted VAL is not constant,
26614 we might not use it in the epilogue and have to use byte
26616 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26617 force_loopy_epilogue = true;
26618 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26619 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26621 /* If main algorithm works on QImode, no epilogue is needed.
26622 For small sizes just don't align anything. */
26623 if (size_needed == 1)
26624 desired_align = align;
26629 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26631 label = gen_label_rtx ();
26632 emit_cmp_and_jump_insns (count_exp,
26633 GEN_INT (epilogue_size_needed),
26634 LTU, 0, counter_mode (count_exp), 1, label);
26635 if (expected_size == -1 || expected_size < epilogue_size_needed)
26636 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26638 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26642 /* Emit code to decide on runtime whether library call or inline should be
26644 if (dynamic_check != -1)
26646 if (!issetmem && CONST_INT_P (count_exp))
26648 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26650 emit_block_move_via_libcall (dst, src, count_exp, false);
26651 count_exp = const0_rtx;
26657 rtx_code_label *hot_label = gen_label_rtx ();
26658 if (jump_around_label == NULL_RTX)
26659 jump_around_label = gen_label_rtx ();
26660 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26661 LEU, 0, counter_mode (count_exp),
26663 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26665 set_storage_via_libcall (dst, count_exp, val_exp, false);
26667 emit_block_move_via_libcall (dst, src, count_exp, false);
26668 emit_jump (jump_around_label);
26669 emit_label (hot_label);
26673 /* Step 2: Alignment prologue. */
26674 /* Do the expensive promotion once we branched off the small blocks. */
26675 if (issetmem && !promoted_val)
26676 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26677 desired_align, align);
26679 if (desired_align > align && !misaligned_prologue_used)
26681 if (align_bytes == 0)
26683 /* Except for the first move in prologue, we no longer know
26684 constant offset in aliasing info. It don't seems to worth
26685 the pain to maintain it for the first move, so throw away
26687 dst = change_address (dst, BLKmode, destreg);
26689 src = change_address (src, BLKmode, srcreg);
26690 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26691 promoted_val, vec_promoted_val,
26692 count_exp, align, desired_align,
26694 /* At most desired_align - align bytes are copied. */
26695 if (min_size < (unsigned)(desired_align - align))
26698 min_size -= desired_align - align;
26702 /* If we know how many bytes need to be stored before dst is
26703 sufficiently aligned, maintain aliasing info accurately. */
26704 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26712 count_exp = plus_constant (counter_mode (count_exp),
26713 count_exp, -align_bytes);
26714 count -= align_bytes;
26715 min_size -= align_bytes;
26716 max_size -= align_bytes;
26718 if (need_zero_guard
26719 && min_size < (unsigned HOST_WIDE_INT) size_needed
26720 && (count < (unsigned HOST_WIDE_INT) size_needed
26721 || (align_bytes == 0
26722 && count < ((unsigned HOST_WIDE_INT) size_needed
26723 + desired_align - align))))
26725 /* It is possible that we copied enough so the main loop will not
26727 gcc_assert (size_needed > 1);
26728 if (label == NULL_RTX)
26729 label = gen_label_rtx ();
26730 emit_cmp_and_jump_insns (count_exp,
26731 GEN_INT (size_needed),
26732 LTU, 0, counter_mode (count_exp), 1, label);
26733 if (expected_size == -1
26734 || expected_size < (desired_align - align) / 2 + size_needed)
26735 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26737 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26740 if (label && size_needed == 1)
26742 emit_label (label);
26743 LABEL_NUSES (label) = 1;
26745 epilogue_size_needed = 1;
26747 promoted_val = val_exp;
26749 else if (label == NULL_RTX && !misaligned_prologue_used)
26750 epilogue_size_needed = size_needed;
26752 /* Step 3: Main loop. */
26759 gcc_unreachable ();
26762 case unrolled_loop:
26763 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26764 count_exp, move_mode, unroll_factor,
26765 expected_size, issetmem);
26768 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26769 vec_promoted_val, count_exp, move_mode,
26770 unroll_factor, expected_size, issetmem);
26772 case rep_prefix_8_byte:
26773 case rep_prefix_4_byte:
26774 case rep_prefix_1_byte:
26775 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26776 val_exp, count_exp, move_mode, issetmem);
26779 /* Adjust properly the offset of src and dest memory for aliasing. */
26780 if (CONST_INT_P (count_exp))
26783 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26784 (count / size_needed) * size_needed);
26785 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26786 (count / size_needed) * size_needed);
26791 src = change_address (src, BLKmode, srcreg);
26792 dst = change_address (dst, BLKmode, destreg);
26795 /* Step 4: Epilogue to copy the remaining bytes. */
26799 /* When the main loop is done, COUNT_EXP might hold original count,
26800 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26801 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26802 bytes. Compensate if needed. */
26804 if (size_needed < epilogue_size_needed)
26807 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26808 GEN_INT (size_needed - 1), count_exp, 1,
26810 if (tmp != count_exp)
26811 emit_move_insn (count_exp, tmp);
26813 emit_label (label);
26814 LABEL_NUSES (label) = 1;
26817 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26819 if (force_loopy_epilogue)
26820 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26821 epilogue_size_needed);
26825 expand_setmem_epilogue (dst, destreg, promoted_val,
26826 vec_promoted_val, count_exp,
26827 epilogue_size_needed);
26829 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26830 epilogue_size_needed);
26833 if (jump_around_label)
26834 emit_label (jump_around_label);
26839 /* Expand the appropriate insns for doing strlen if not just doing
26842 out = result, initialized with the start address
26843 align_rtx = alignment of the address.
26844 scratch = scratch register, initialized with the startaddress when
26845 not aligned, otherwise undefined
26847 This is just the body. It needs the initializations mentioned above and
26848 some address computing at the end. These things are done in i386.md. */
26851 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26855 rtx_code_label *align_2_label = NULL;
26856 rtx_code_label *align_3_label = NULL;
26857 rtx_code_label *align_4_label = gen_label_rtx ();
26858 rtx_code_label *end_0_label = gen_label_rtx ();
26860 rtx tmpreg = gen_reg_rtx (SImode);
26861 rtx scratch = gen_reg_rtx (SImode);
26865 if (CONST_INT_P (align_rtx))
26866 align = INTVAL (align_rtx);
26868 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26870 /* Is there a known alignment and is it less than 4? */
26873 rtx scratch1 = gen_reg_rtx (Pmode);
26874 emit_move_insn (scratch1, out);
26875 /* Is there a known alignment and is it not 2? */
26878 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26879 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26881 /* Leave just the 3 lower bits. */
26882 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26883 NULL_RTX, 0, OPTAB_WIDEN);
26885 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26886 Pmode, 1, align_4_label);
26887 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26888 Pmode, 1, align_2_label);
26889 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26890 Pmode, 1, align_3_label);
26894 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26895 check if is aligned to 4 - byte. */
26897 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26898 NULL_RTX, 0, OPTAB_WIDEN);
26900 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26901 Pmode, 1, align_4_label);
26904 mem = change_address (src, QImode, out);
26906 /* Now compare the bytes. */
26908 /* Compare the first n unaligned byte on a byte per byte basis. */
26909 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26910 QImode, 1, end_0_label);
26912 /* Increment the address. */
26913 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26915 /* Not needed with an alignment of 2 */
26918 emit_label (align_2_label);
26920 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26923 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26925 emit_label (align_3_label);
26928 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26931 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26934 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26935 align this loop. It gives only huge programs, but does not help to
26937 emit_label (align_4_label);
26939 mem = change_address (src, SImode, out);
26940 emit_move_insn (scratch, mem);
26941 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26943 /* This formula yields a nonzero result iff one of the bytes is zero.
26944 This saves three branches inside loop and many cycles. */
26946 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26947 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26948 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26949 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26950 gen_int_mode (0x80808080, SImode)));
26951 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26956 rtx reg = gen_reg_rtx (SImode);
26957 rtx reg2 = gen_reg_rtx (Pmode);
26958 emit_move_insn (reg, tmpreg);
26959 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26961 /* If zero is not in the first two bytes, move two bytes forward. */
26962 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26963 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26964 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26965 emit_insn (gen_rtx_SET (tmpreg,
26966 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26969 /* Emit lea manually to avoid clobbering of flags. */
26970 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26972 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26973 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26974 emit_insn (gen_rtx_SET (out,
26975 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26981 rtx_code_label *end_2_label = gen_label_rtx ();
26982 /* Is zero in the first two bytes? */
26984 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26985 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26986 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
26987 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26988 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
26990 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
26991 JUMP_LABEL (tmp) = end_2_label;
26993 /* Not in the first two. Move two bytes forward. */
26994 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
26995 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
26997 emit_label (end_2_label);
27001 /* Avoid branch in fixing the byte. */
27002 tmpreg = gen_lowpart (QImode, tmpreg);
27003 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27004 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27005 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27006 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27008 emit_label (end_0_label);
27011 /* Expand strlen. */
27014 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27016 rtx addr, scratch1, scratch2, scratch3, scratch4;
27018 /* The generic case of strlen expander is long. Avoid it's
27019 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27021 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27022 && !TARGET_INLINE_ALL_STRINGOPS
27023 && !optimize_insn_for_size_p ()
27024 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27027 addr = force_reg (Pmode, XEXP (src, 0));
27028 scratch1 = gen_reg_rtx (Pmode);
27030 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27031 && !optimize_insn_for_size_p ())
27033 /* Well it seems that some optimizer does not combine a call like
27034 foo(strlen(bar), strlen(bar));
27035 when the move and the subtraction is done here. It does calculate
27036 the length just once when these instructions are done inside of
27037 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27038 often used and I use one fewer register for the lifetime of
27039 output_strlen_unroll() this is better. */
27041 emit_move_insn (out, addr);
27043 ix86_expand_strlensi_unroll_1 (out, src, align);
27045 /* strlensi_unroll_1 returns the address of the zero at the end of
27046 the string, like memchr(), so compute the length by subtracting
27047 the start address. */
27048 emit_insn (ix86_gen_sub3 (out, out, addr));
27054 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27055 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27057 /* Can't use this for non-default address spaces. */
27058 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27061 scratch2 = gen_reg_rtx (Pmode);
27062 scratch3 = gen_reg_rtx (Pmode);
27063 scratch4 = force_reg (Pmode, constm1_rtx);
27065 emit_move_insn (scratch3, addr);
27066 eoschar = force_reg (QImode, eoschar);
27068 src = replace_equiv_address_nv (src, scratch3);
27070 /* If .md starts supporting :P, this can be done in .md. */
27071 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27072 scratch4), UNSPEC_SCAS);
27073 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27074 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27075 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27080 /* For given symbol (function) construct code to compute address of it's PLT
27081 entry in large x86-64 PIC model. */
27083 construct_plt_address (rtx symbol)
27087 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27088 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27089 gcc_assert (Pmode == DImode);
27091 tmp = gen_reg_rtx (Pmode);
27092 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27094 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27095 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27100 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27102 rtx pop, bool sibcall)
27105 rtx use = NULL, call;
27106 unsigned int vec_len = 0;
27108 if (pop == const0_rtx)
27110 gcc_assert (!TARGET_64BIT || !pop);
27112 if (TARGET_MACHO && !TARGET_64BIT)
27115 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27116 fnaddr = machopic_indirect_call_target (fnaddr);
27121 /* Static functions and indirect calls don't need the pic register. Also,
27122 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27123 it an indirect call. */
27124 rtx addr = XEXP (fnaddr, 0);
27126 && GET_CODE (addr) == SYMBOL_REF
27127 && !SYMBOL_REF_LOCAL_P (addr))
27130 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27131 || !lookup_attribute ("noplt",
27132 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27135 || (ix86_cmodel == CM_LARGE_PIC
27136 && DEFAULT_ABI != MS_ABI))
27138 use_reg (&use, gen_rtx_REG (Pmode,
27139 REAL_PIC_OFFSET_TABLE_REGNUM));
27140 if (ix86_use_pseudo_pic_reg ())
27141 emit_move_insn (gen_rtx_REG (Pmode,
27142 REAL_PIC_OFFSET_TABLE_REGNUM),
27143 pic_offset_table_rtx);
27146 else if (!TARGET_PECOFF && !TARGET_MACHO)
27150 fnaddr = gen_rtx_UNSPEC (Pmode,
27151 gen_rtvec (1, addr),
27153 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27157 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27159 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27160 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27163 fnaddr = gen_const_mem (Pmode, fnaddr);
27164 /* Pmode may not be the same as word_mode for x32, which
27165 doesn't support indirect branch via 32-bit memory slot.
27166 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27167 indirect branch via x32 GOT slot is OK. */
27168 if (GET_MODE (fnaddr) != word_mode)
27169 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27170 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27175 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27176 parameters passed in vector registers. */
27178 && (INTVAL (callarg2) > 0
27179 || (INTVAL (callarg2) == 0
27180 && (TARGET_SSE || !flag_skip_rax_setup))))
27182 rtx al = gen_rtx_REG (QImode, AX_REG);
27183 emit_move_insn (al, callarg2);
27184 use_reg (&use, al);
27187 if (ix86_cmodel == CM_LARGE_PIC
27190 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27191 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27192 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27193 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27194 branch via x32 GOT slot is OK. */
27195 else if (!(TARGET_X32
27197 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27198 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27200 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27201 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27203 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27204 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27207 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27211 /* We should add bounds as destination register in case
27212 pointer with bounds may be returned. */
27213 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27215 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27216 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27217 if (GET_CODE (retval) == PARALLEL)
27219 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27220 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27221 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27222 retval = chkp_join_splitted_slot (retval, par);
27226 retval = gen_rtx_PARALLEL (VOIDmode,
27227 gen_rtvec (3, retval, b0, b1));
27228 chkp_put_regs_to_expr_list (retval);
27232 call = gen_rtx_SET (retval, call);
27234 vec[vec_len++] = call;
27238 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27239 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27240 vec[vec_len++] = pop;
27243 if (TARGET_64BIT_MS_ABI
27244 && (!callarg2 || INTVAL (callarg2) != -2))
27246 int const cregs_size
27247 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
27250 for (i = 0; i < cregs_size; i++)
27252 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27253 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27255 clobber_reg (&use, gen_rtx_REG (mode, regno));
27260 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27261 call = emit_call_insn (call);
27263 CALL_INSN_FUNCTION_USAGE (call) = use;
27268 /* Return true if the function being called was marked with attribute "noplt"
27269 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
27270 handle the non-PIC case in the backend because there is no easy interface
27271 for the front-end to force non-PLT calls to use the GOT. This is currently
27272 used only with 64-bit ELF targets to call the function marked "noplt"
27276 ix86_nopic_noplt_attribute_p (rtx call_op)
27278 if (flag_pic || ix86_cmodel == CM_LARGE
27279 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27280 || SYMBOL_REF_LOCAL_P (call_op))
27283 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27286 || (symbol_decl != NULL_TREE
27287 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27293 /* Output the assembly for a call instruction. */
27296 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27298 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27299 bool seh_nop_p = false;
27302 if (SIBLING_CALL_P (insn))
27306 if (ix86_nopic_noplt_attribute_p (call_op))
27307 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27309 xasm = "%!jmp\t%P0";
27311 /* SEH epilogue detection requires the indirect branch case
27312 to include REX.W. */
27313 else if (TARGET_SEH)
27314 xasm = "%!rex.W jmp\t%A0";
27316 xasm = "%!jmp\t%A0";
27318 output_asm_insn (xasm, &call_op);
27322 /* SEH unwinding can require an extra nop to be emitted in several
27323 circumstances. Determine if we have one of those. */
27328 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27330 /* If we get to another real insn, we don't need the nop. */
27334 /* If we get to the epilogue note, prevent a catch region from
27335 being adjacent to the standard epilogue sequence. If non-
27336 call-exceptions, we'll have done this during epilogue emission. */
27337 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27338 && !flag_non_call_exceptions
27339 && !can_throw_internal (insn))
27346 /* If we didn't find a real insn following the call, prevent the
27347 unwinder from looking into the next function. */
27354 if (ix86_nopic_noplt_attribute_p (call_op))
27355 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27357 xasm = "%!call\t%P0";
27360 xasm = "%!call\t%A0";
27362 output_asm_insn (xasm, &call_op);
27370 /* Clear stack slot assignments remembered from previous functions.
27371 This is called from INIT_EXPANDERS once before RTL is emitted for each
27374 static struct machine_function *
27375 ix86_init_machine_status (void)
27377 struct machine_function *f;
27379 f = ggc_cleared_alloc<machine_function> ();
27380 f->use_fast_prologue_epilogue_nregs = -1;
27381 f->call_abi = ix86_abi;
27386 /* Return a MEM corresponding to a stack slot with mode MODE.
27387 Allocate a new slot if necessary.
27389 The RTL for a function can have several slots available: N is
27390 which slot to use. */
27393 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27395 struct stack_local_entry *s;
27397 gcc_assert (n < MAX_386_STACK_LOCALS);
27399 for (s = ix86_stack_locals; s; s = s->next)
27400 if (s->mode == mode && s->n == n)
27401 return validize_mem (copy_rtx (s->rtl));
27403 s = ggc_alloc<stack_local_entry> ();
27406 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27408 s->next = ix86_stack_locals;
27409 ix86_stack_locals = s;
27410 return validize_mem (copy_rtx (s->rtl));
27414 ix86_instantiate_decls (void)
27416 struct stack_local_entry *s;
27418 for (s = ix86_stack_locals; s; s = s->next)
27419 if (s->rtl != NULL_RTX)
27420 instantiate_decl_rtl (s->rtl);
27423 /* Return the number used for encoding REG, in the range 0..7. */
27426 reg_encoded_number (rtx reg)
27428 unsigned regno = REGNO (reg);
27450 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27451 return regno - FIRST_STACK_REG;
27452 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27453 return regno - FIRST_SSE_REG;
27454 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27455 return regno - FIRST_MMX_REG;
27456 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27457 return regno - FIRST_REX_SSE_REG;
27458 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27459 return regno - FIRST_REX_INT_REG;
27460 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27461 return regno - FIRST_MASK_REG;
27462 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27463 return regno - FIRST_BND_REG;
27467 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27468 in its encoding if it could be relevant for ROP mitigation, otherwise
27469 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27470 used for calculating it into them. */
27473 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27474 int *popno0 = 0, int *popno1 = 0)
27476 if (asm_noperands (PATTERN (insn)) >= 0)
27478 int has_modrm = get_attr_modrm (insn);
27481 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27485 case MODRM_CLASS_OP02:
27486 gcc_assert (noperands >= 3);
27495 case MODRM_CLASS_OP01:
27496 gcc_assert (noperands >= 2);
27508 if (REG_P (op0) && REG_P (op1))
27510 int enc0 = reg_encoded_number (op0);
27511 int enc1 = reg_encoded_number (op1);
27512 return 0xc0 + (enc1 << 3) + enc0;
27517 /* Check whether x86 address PARTS is a pc-relative address. */
27520 rip_relative_addr_p (struct ix86_address *parts)
27522 rtx base, index, disp;
27524 base = parts->base;
27525 index = parts->index;
27526 disp = parts->disp;
27528 if (disp && !base && !index)
27534 if (GET_CODE (disp) == CONST)
27535 symbol = XEXP (disp, 0);
27536 if (GET_CODE (symbol) == PLUS
27537 && CONST_INT_P (XEXP (symbol, 1)))
27538 symbol = XEXP (symbol, 0);
27540 if (GET_CODE (symbol) == LABEL_REF
27541 || (GET_CODE (symbol) == SYMBOL_REF
27542 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27543 || (GET_CODE (symbol) == UNSPEC
27544 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27545 || XINT (symbol, 1) == UNSPEC_PCREL
27546 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27553 /* Calculate the length of the memory address in the instruction encoding.
27554 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27555 or other prefixes. We never generate addr32 prefix for LEA insn. */
27558 memory_address_length (rtx addr, bool lea)
27560 struct ix86_address parts;
27561 rtx base, index, disp;
27565 if (GET_CODE (addr) == PRE_DEC
27566 || GET_CODE (addr) == POST_INC
27567 || GET_CODE (addr) == PRE_MODIFY
27568 || GET_CODE (addr) == POST_MODIFY)
27571 ok = ix86_decompose_address (addr, &parts);
27574 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
27576 /* If this is not LEA instruction, add the length of addr32 prefix. */
27577 if (TARGET_64BIT && !lea
27578 && (SImode_address_operand (addr, VOIDmode)
27579 || (parts.base && GET_MODE (parts.base) == SImode)
27580 || (parts.index && GET_MODE (parts.index) == SImode)))
27584 index = parts.index;
27587 if (base && SUBREG_P (base))
27588 base = SUBREG_REG (base);
27589 if (index && SUBREG_P (index))
27590 index = SUBREG_REG (index);
27592 gcc_assert (base == NULL_RTX || REG_P (base));
27593 gcc_assert (index == NULL_RTX || REG_P (index));
27596 - esp as the base always wants an index,
27597 - ebp as the base always wants a displacement,
27598 - r12 as the base always wants an index,
27599 - r13 as the base always wants a displacement. */
27601 /* Register Indirect. */
27602 if (base && !index && !disp)
27604 /* esp (for its index) and ebp (for its displacement) need
27605 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27607 if (base == arg_pointer_rtx
27608 || base == frame_pointer_rtx
27609 || REGNO (base) == SP_REG
27610 || REGNO (base) == BP_REG
27611 || REGNO (base) == R12_REG
27612 || REGNO (base) == R13_REG)
27616 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27617 is not disp32, but disp32(%rip), so for disp32
27618 SIB byte is needed, unless print_operand_address
27619 optimizes it into disp32(%rip) or (%rip) is implied
27621 else if (disp && !base && !index)
27624 if (rip_relative_addr_p (&parts))
27629 /* Find the length of the displacement constant. */
27632 if (base && satisfies_constraint_K (disp))
27637 /* ebp always wants a displacement. Similarly r13. */
27638 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27641 /* An index requires the two-byte modrm form.... */
27643 /* ...like esp (or r12), which always wants an index. */
27644 || base == arg_pointer_rtx
27645 || base == frame_pointer_rtx
27646 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27653 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27654 is set, expect that insn have 8bit immediate alternative. */
27656 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27660 extract_insn_cached (insn);
27661 for (i = recog_data.n_operands - 1; i >= 0; --i)
27662 if (CONSTANT_P (recog_data.operand[i]))
27664 enum attr_mode mode = get_attr_mode (insn);
27667 if (shortform && CONST_INT_P (recog_data.operand[i]))
27669 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27676 ival = trunc_int_for_mode (ival, HImode);
27679 ival = trunc_int_for_mode (ival, SImode);
27684 if (IN_RANGE (ival, -128, 127))
27701 /* Immediates for DImode instructions are encoded
27702 as 32bit sign extended values. */
27707 fatal_insn ("unknown insn mode", insn);
27713 /* Compute default value for "length_address" attribute. */
27715 ix86_attr_length_address_default (rtx_insn *insn)
27719 if (get_attr_type (insn) == TYPE_LEA)
27721 rtx set = PATTERN (insn), addr;
27723 if (GET_CODE (set) == PARALLEL)
27724 set = XVECEXP (set, 0, 0);
27726 gcc_assert (GET_CODE (set) == SET);
27728 addr = SET_SRC (set);
27730 return memory_address_length (addr, true);
27733 extract_insn_cached (insn);
27734 for (i = recog_data.n_operands - 1; i >= 0; --i)
27736 rtx op = recog_data.operand[i];
27739 constrain_operands_cached (insn, reload_completed);
27740 if (which_alternative != -1)
27742 const char *constraints = recog_data.constraints[i];
27743 int alt = which_alternative;
27745 while (*constraints == '=' || *constraints == '+')
27748 while (*constraints++ != ',')
27750 /* Skip ignored operands. */
27751 if (*constraints == 'X')
27755 int len = memory_address_length (XEXP (op, 0), false);
27757 /* Account for segment prefix for non-default addr spaces. */
27758 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
27767 /* Compute default value for "length_vex" attribute. It includes
27768 2 or 3 byte VEX prefix and 1 opcode byte. */
27771 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27776 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27777 byte VEX prefix. */
27778 if (!has_0f_opcode || has_vex_w)
27781 /* We can always use 2 byte VEX prefix in 32bit. */
27785 extract_insn_cached (insn);
27787 for (i = recog_data.n_operands - 1; i >= 0; --i)
27788 if (REG_P (recog_data.operand[i]))
27790 /* REX.W bit uses 3 byte VEX prefix. */
27791 if (GET_MODE (recog_data.operand[i]) == DImode
27792 && GENERAL_REG_P (recog_data.operand[i]))
27797 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27798 if (MEM_P (recog_data.operand[i])
27799 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27806 /* Return the maximum number of instructions a cpu can issue. */
27809 ix86_issue_rate (void)
27813 case PROCESSOR_PENTIUM:
27814 case PROCESSOR_LAKEMONT:
27815 case PROCESSOR_BONNELL:
27816 case PROCESSOR_SILVERMONT:
27817 case PROCESSOR_KNL:
27818 case PROCESSOR_INTEL:
27820 case PROCESSOR_BTVER2:
27821 case PROCESSOR_PENTIUM4:
27822 case PROCESSOR_NOCONA:
27825 case PROCESSOR_PENTIUMPRO:
27826 case PROCESSOR_ATHLON:
27828 case PROCESSOR_AMDFAM10:
27829 case PROCESSOR_GENERIC:
27830 case PROCESSOR_BTVER1:
27833 case PROCESSOR_BDVER1:
27834 case PROCESSOR_BDVER2:
27835 case PROCESSOR_BDVER3:
27836 case PROCESSOR_BDVER4:
27837 case PROCESSOR_ZNVER1:
27838 case PROCESSOR_CORE2:
27839 case PROCESSOR_NEHALEM:
27840 case PROCESSOR_SANDYBRIDGE:
27841 case PROCESSOR_HASWELL:
27849 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27850 by DEP_INSN and nothing set by DEP_INSN. */
27853 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27857 /* Simplify the test for uninteresting insns. */
27858 if (insn_type != TYPE_SETCC
27859 && insn_type != TYPE_ICMOV
27860 && insn_type != TYPE_FCMOV
27861 && insn_type != TYPE_IBR)
27864 if ((set = single_set (dep_insn)) != 0)
27866 set = SET_DEST (set);
27869 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27870 && XVECLEN (PATTERN (dep_insn), 0) == 2
27871 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27872 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27874 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27875 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27880 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27883 /* This test is true if the dependent insn reads the flags but
27884 not any other potentially set register. */
27885 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27888 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27894 /* Return true iff USE_INSN has a memory address with operands set by
27898 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27901 extract_insn_cached (use_insn);
27902 for (i = recog_data.n_operands - 1; i >= 0; --i)
27903 if (MEM_P (recog_data.operand[i]))
27905 rtx addr = XEXP (recog_data.operand[i], 0);
27906 return modified_in_p (addr, set_insn) != 0;
27911 /* Helper function for exact_store_load_dependency.
27912 Return true if addr is found in insn. */
27914 exact_dependency_1 (rtx addr, rtx insn)
27916 enum rtx_code code;
27917 const char *format_ptr;
27920 code = GET_CODE (insn);
27924 if (rtx_equal_p (addr, insn))
27939 format_ptr = GET_RTX_FORMAT (code);
27940 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27942 switch (*format_ptr++)
27945 if (exact_dependency_1 (addr, XEXP (insn, i)))
27949 for (j = 0; j < XVECLEN (insn, i); j++)
27950 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27958 /* Return true if there exists exact dependency for store & load, i.e.
27959 the same memory address is used in them. */
27961 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27965 set1 = single_set (store);
27968 if (!MEM_P (SET_DEST (set1)))
27970 set2 = single_set (load);
27973 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27979 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27981 enum attr_type insn_type, dep_insn_type;
27982 enum attr_memory memory;
27984 int dep_insn_code_number;
27986 /* Anti and output dependencies have zero cost on all CPUs. */
27987 if (REG_NOTE_KIND (link) != 0)
27990 dep_insn_code_number = recog_memoized (dep_insn);
27992 /* If we can't recognize the insns, we can't really do anything. */
27993 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
27996 insn_type = get_attr_type (insn);
27997 dep_insn_type = get_attr_type (dep_insn);
28001 case PROCESSOR_PENTIUM:
28002 case PROCESSOR_LAKEMONT:
28003 /* Address Generation Interlock adds a cycle of latency. */
28004 if (insn_type == TYPE_LEA)
28006 rtx addr = PATTERN (insn);
28008 if (GET_CODE (addr) == PARALLEL)
28009 addr = XVECEXP (addr, 0, 0);
28011 gcc_assert (GET_CODE (addr) == SET);
28013 addr = SET_SRC (addr);
28014 if (modified_in_p (addr, dep_insn))
28017 else if (ix86_agi_dependent (dep_insn, insn))
28020 /* ??? Compares pair with jump/setcc. */
28021 if (ix86_flags_dependent (insn, dep_insn, insn_type))
28024 /* Floating point stores require value to be ready one cycle earlier. */
28025 if (insn_type == TYPE_FMOV
28026 && get_attr_memory (insn) == MEMORY_STORE
28027 && !ix86_agi_dependent (dep_insn, insn))
28031 case PROCESSOR_PENTIUMPRO:
28032 /* INT->FP conversion is expensive. */
28033 if (get_attr_fp_int_src (dep_insn))
28036 /* There is one cycle extra latency between an FP op and a store. */
28037 if (insn_type == TYPE_FMOV
28038 && (set = single_set (dep_insn)) != NULL_RTX
28039 && (set2 = single_set (insn)) != NULL_RTX
28040 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
28041 && MEM_P (SET_DEST (set2)))
28044 memory = get_attr_memory (insn);
28046 /* Show ability of reorder buffer to hide latency of load by executing
28047 in parallel with previous instruction in case
28048 previous instruction is not needed to compute the address. */
28049 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28050 && !ix86_agi_dependent (dep_insn, insn))
28052 /* Claim moves to take one cycle, as core can issue one load
28053 at time and the next load can start cycle later. */
28054 if (dep_insn_type == TYPE_IMOV
28055 || dep_insn_type == TYPE_FMOV)
28063 /* The esp dependency is resolved before
28064 the instruction is really finished. */
28065 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28066 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28069 /* INT->FP conversion is expensive. */
28070 if (get_attr_fp_int_src (dep_insn))
28073 memory = get_attr_memory (insn);
28075 /* Show ability of reorder buffer to hide latency of load by executing
28076 in parallel with previous instruction in case
28077 previous instruction is not needed to compute the address. */
28078 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28079 && !ix86_agi_dependent (dep_insn, insn))
28081 /* Claim moves to take one cycle, as core can issue one load
28082 at time and the next load can start cycle later. */
28083 if (dep_insn_type == TYPE_IMOV
28084 || dep_insn_type == TYPE_FMOV)
28093 case PROCESSOR_AMDFAM10:
28094 case PROCESSOR_BDVER1:
28095 case PROCESSOR_BDVER2:
28096 case PROCESSOR_BDVER3:
28097 case PROCESSOR_BDVER4:
28098 case PROCESSOR_ZNVER1:
28099 case PROCESSOR_BTVER1:
28100 case PROCESSOR_BTVER2:
28101 case PROCESSOR_GENERIC:
28102 /* Stack engine allows to execute push&pop instructions in parall. */
28103 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28104 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28108 case PROCESSOR_ATHLON:
28110 memory = get_attr_memory (insn);
28112 /* Show ability of reorder buffer to hide latency of load by executing
28113 in parallel with previous instruction in case
28114 previous instruction is not needed to compute the address. */
28115 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28116 && !ix86_agi_dependent (dep_insn, insn))
28118 enum attr_unit unit = get_attr_unit (insn);
28121 /* Because of the difference between the length of integer and
28122 floating unit pipeline preparation stages, the memory operands
28123 for floating point are cheaper.
28125 ??? For Athlon it the difference is most probably 2. */
28126 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28129 loadcost = TARGET_ATHLON ? 2 : 0;
28131 if (cost >= loadcost)
28138 case PROCESSOR_CORE2:
28139 case PROCESSOR_NEHALEM:
28140 case PROCESSOR_SANDYBRIDGE:
28141 case PROCESSOR_HASWELL:
28142 /* Stack engine allows to execute push&pop instructions in parall. */
28143 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28144 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28147 memory = get_attr_memory (insn);
28149 /* Show ability of reorder buffer to hide latency of load by executing
28150 in parallel with previous instruction in case
28151 previous instruction is not needed to compute the address. */
28152 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28153 && !ix86_agi_dependent (dep_insn, insn))
28162 case PROCESSOR_SILVERMONT:
28163 case PROCESSOR_KNL:
28164 case PROCESSOR_INTEL:
28165 if (!reload_completed)
28168 /* Increase cost of integer loads. */
28169 memory = get_attr_memory (dep_insn);
28170 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28172 enum attr_unit unit = get_attr_unit (dep_insn);
28173 if (unit == UNIT_INTEGER && cost == 1)
28175 if (memory == MEMORY_LOAD)
28179 /* Increase cost of ld/st for short int types only
28180 because of store forwarding issue. */
28181 rtx set = single_set (dep_insn);
28182 if (set && (GET_MODE (SET_DEST (set)) == QImode
28183 || GET_MODE (SET_DEST (set)) == HImode))
28185 /* Increase cost of store/load insn if exact
28186 dependence exists and it is load insn. */
28187 enum attr_memory insn_memory = get_attr_memory (insn);
28188 if (insn_memory == MEMORY_LOAD
28189 && exact_store_load_dependency (dep_insn, insn))
28203 /* How many alternative schedules to try. This should be as wide as the
28204 scheduling freedom in the DFA, but no wider. Making this value too
28205 large results extra work for the scheduler. */
28208 ia32_multipass_dfa_lookahead (void)
28212 case PROCESSOR_PENTIUM:
28213 case PROCESSOR_LAKEMONT:
28216 case PROCESSOR_PENTIUMPRO:
28220 case PROCESSOR_BDVER1:
28221 case PROCESSOR_BDVER2:
28222 case PROCESSOR_BDVER3:
28223 case PROCESSOR_BDVER4:
28224 /* We use lookahead value 4 for BD both before and after reload
28225 schedules. Plan is to have value 8 included for O3. */
28228 case PROCESSOR_CORE2:
28229 case PROCESSOR_NEHALEM:
28230 case PROCESSOR_SANDYBRIDGE:
28231 case PROCESSOR_HASWELL:
28232 case PROCESSOR_BONNELL:
28233 case PROCESSOR_SILVERMONT:
28234 case PROCESSOR_KNL:
28235 case PROCESSOR_INTEL:
28236 /* Generally, we want haifa-sched:max_issue() to look ahead as far
28237 as many instructions can be executed on a cycle, i.e.,
28238 issue_rate. I wonder why tuning for many CPUs does not do this. */
28239 if (reload_completed)
28240 return ix86_issue_rate ();
28241 /* Don't use lookahead for pre-reload schedule to save compile time. */
28249 /* Return true if target platform supports macro-fusion. */
28252 ix86_macro_fusion_p ()
28254 return TARGET_FUSE_CMP_AND_BRANCH;
28257 /* Check whether current microarchitecture support macro fusion
28258 for insn pair "CONDGEN + CONDJMP". Refer to
28259 "Intel Architectures Optimization Reference Manual". */
28262 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
28265 enum rtx_code ccode;
28266 rtx compare_set = NULL_RTX, test_if, cond;
28267 rtx alu_set = NULL_RTX, addr = NULL_RTX;
28269 if (!any_condjump_p (condjmp))
28272 if (get_attr_type (condgen) != TYPE_TEST
28273 && get_attr_type (condgen) != TYPE_ICMP
28274 && get_attr_type (condgen) != TYPE_INCDEC
28275 && get_attr_type (condgen) != TYPE_ALU)
28278 compare_set = single_set (condgen);
28279 if (compare_set == NULL_RTX
28280 && !TARGET_FUSE_ALU_AND_BRANCH)
28283 if (compare_set == NULL_RTX)
28286 rtx pat = PATTERN (condgen);
28287 for (i = 0; i < XVECLEN (pat, 0); i++)
28288 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28290 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
28291 if (GET_CODE (set_src) == COMPARE)
28292 compare_set = XVECEXP (pat, 0, i);
28294 alu_set = XVECEXP (pat, 0, i);
28297 if (compare_set == NULL_RTX)
28299 src = SET_SRC (compare_set);
28300 if (GET_CODE (src) != COMPARE)
28303 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
28305 if ((MEM_P (XEXP (src, 0))
28306 && CONST_INT_P (XEXP (src, 1)))
28307 || (MEM_P (XEXP (src, 1))
28308 && CONST_INT_P (XEXP (src, 0))))
28311 /* No fusion for RIP-relative address. */
28312 if (MEM_P (XEXP (src, 0)))
28313 addr = XEXP (XEXP (src, 0), 0);
28314 else if (MEM_P (XEXP (src, 1)))
28315 addr = XEXP (XEXP (src, 1), 0);
28318 ix86_address parts;
28319 int ok = ix86_decompose_address (addr, &parts);
28322 if (rip_relative_addr_p (&parts))
28326 test_if = SET_SRC (pc_set (condjmp));
28327 cond = XEXP (test_if, 0);
28328 ccode = GET_CODE (cond);
28329 /* Check whether conditional jump use Sign or Overflow Flags. */
28330 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
28337 /* Return true for TYPE_TEST and TYPE_ICMP. */
28338 if (get_attr_type (condgen) == TYPE_TEST
28339 || get_attr_type (condgen) == TYPE_ICMP)
28342 /* The following is the case that macro-fusion for alu + jmp. */
28343 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
28346 /* No fusion for alu op with memory destination operand. */
28347 dest = SET_DEST (alu_set);
28351 /* Macro-fusion for inc/dec + unsigned conditional jump is not
28353 if (get_attr_type (condgen) == TYPE_INCDEC
28363 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
28364 execution. It is applied if
28365 (1) IMUL instruction is on the top of list;
28366 (2) There exists the only producer of independent IMUL instruction in
28368 Return index of IMUL producer if it was found and -1 otherwise. */
28370 do_reorder_for_imul (rtx_insn **ready, int n_ready)
28373 rtx set, insn1, insn2;
28374 sd_iterator_def sd_it;
28379 if (!TARGET_BONNELL)
28382 /* Check that IMUL instruction is on the top of ready list. */
28383 insn = ready[n_ready - 1];
28384 set = single_set (insn);
28387 if (!(GET_CODE (SET_SRC (set)) == MULT
28388 && GET_MODE (SET_SRC (set)) == SImode))
28391 /* Search for producer of independent IMUL instruction. */
28392 for (i = n_ready - 2; i >= 0; i--)
28395 if (!NONDEBUG_INSN_P (insn))
28397 /* Skip IMUL instruction. */
28398 insn2 = PATTERN (insn);
28399 if (GET_CODE (insn2) == PARALLEL)
28400 insn2 = XVECEXP (insn2, 0, 0);
28401 if (GET_CODE (insn2) == SET
28402 && GET_CODE (SET_SRC (insn2)) == MULT
28403 && GET_MODE (SET_SRC (insn2)) == SImode)
28406 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
28409 con = DEP_CON (dep);
28410 if (!NONDEBUG_INSN_P (con))
28412 insn1 = PATTERN (con);
28413 if (GET_CODE (insn1) == PARALLEL)
28414 insn1 = XVECEXP (insn1, 0, 0);
28416 if (GET_CODE (insn1) == SET
28417 && GET_CODE (SET_SRC (insn1)) == MULT
28418 && GET_MODE (SET_SRC (insn1)) == SImode)
28420 sd_iterator_def sd_it1;
28422 /* Check if there is no other dependee for IMUL. */
28424 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
28427 pro = DEP_PRO (dep1);
28428 if (!NONDEBUG_INSN_P (pro))
28443 /* Try to find the best candidate on the top of ready list if two insns
28444 have the same priority - candidate is best if its dependees were
28445 scheduled earlier. Applied for Silvermont only.
28446 Return true if top 2 insns must be interchanged. */
28448 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
28450 rtx_insn *top = ready[n_ready - 1];
28451 rtx_insn *next = ready[n_ready - 2];
28453 sd_iterator_def sd_it;
28457 #define INSN_TICK(INSN) (HID (INSN)->tick)
28459 if (!TARGET_SILVERMONT && !TARGET_INTEL)
28462 if (!NONDEBUG_INSN_P (top))
28464 if (!NONJUMP_INSN_P (top))
28466 if (!NONDEBUG_INSN_P (next))
28468 if (!NONJUMP_INSN_P (next))
28470 set = single_set (top);
28473 set = single_set (next);
28477 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
28479 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
28481 /* Determine winner more precise. */
28482 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
28485 pro = DEP_PRO (dep);
28486 if (!NONDEBUG_INSN_P (pro))
28488 if (INSN_TICK (pro) > clock1)
28489 clock1 = INSN_TICK (pro);
28491 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
28494 pro = DEP_PRO (dep);
28495 if (!NONDEBUG_INSN_P (pro))
28497 if (INSN_TICK (pro) > clock2)
28498 clock2 = INSN_TICK (pro);
28501 if (clock1 == clock2)
28503 /* Determine winner - load must win. */
28504 enum attr_memory memory1, memory2;
28505 memory1 = get_attr_memory (top);
28506 memory2 = get_attr_memory (next);
28507 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
28510 return (bool) (clock2 < clock1);
28516 /* Perform possible reodering of ready list for Atom/Silvermont only.
28517 Return issue rate. */
28519 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28520 int *pn_ready, int clock_var)
28522 int issue_rate = -1;
28523 int n_ready = *pn_ready;
28528 /* Set up issue rate. */
28529 issue_rate = ix86_issue_rate ();
28531 /* Do reodering for BONNELL/SILVERMONT only. */
28532 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28535 /* Nothing to do if ready list contains only 1 instruction. */
28539 /* Do reodering for post-reload scheduler only. */
28540 if (!reload_completed)
28543 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28545 if (sched_verbose > 1)
28546 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28547 INSN_UID (ready[index]));
28549 /* Put IMUL producer (ready[index]) at the top of ready list. */
28550 insn = ready[index];
28551 for (i = index; i < n_ready - 1; i++)
28552 ready[i] = ready[i + 1];
28553 ready[n_ready - 1] = insn;
28557 /* Skip selective scheduling since HID is not populated in it. */
28560 && swap_top_of_ready_list (ready, n_ready))
28562 if (sched_verbose > 1)
28563 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28564 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28565 /* Swap 2 top elements of ready list. */
28566 insn = ready[n_ready - 1];
28567 ready[n_ready - 1] = ready[n_ready - 2];
28568 ready[n_ready - 2] = insn;
28574 ix86_class_likely_spilled_p (reg_class_t);
28576 /* Returns true if lhs of insn is HW function argument register and set up
28577 is_spilled to true if it is likely spilled HW register. */
28579 insn_is_function_arg (rtx insn, bool* is_spilled)
28583 if (!NONDEBUG_INSN_P (insn))
28585 /* Call instructions are not movable, ignore it. */
28588 insn = PATTERN (insn);
28589 if (GET_CODE (insn) == PARALLEL)
28590 insn = XVECEXP (insn, 0, 0);
28591 if (GET_CODE (insn) != SET)
28593 dst = SET_DEST (insn);
28594 if (REG_P (dst) && HARD_REGISTER_P (dst)
28595 && ix86_function_arg_regno_p (REGNO (dst)))
28597 /* Is it likely spilled HW register? */
28598 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28599 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28600 *is_spilled = true;
28606 /* Add output dependencies for chain of function adjacent arguments if only
28607 there is a move to likely spilled HW register. Return first argument
28608 if at least one dependence was added or NULL otherwise. */
28610 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28613 rtx_insn *last = call;
28614 rtx_insn *first_arg = NULL;
28615 bool is_spilled = false;
28617 head = PREV_INSN (head);
28619 /* Find nearest to call argument passing instruction. */
28622 last = PREV_INSN (last);
28625 if (!NONDEBUG_INSN_P (last))
28627 if (insn_is_function_arg (last, &is_spilled))
28635 insn = PREV_INSN (last);
28636 if (!INSN_P (insn))
28640 if (!NONDEBUG_INSN_P (insn))
28645 if (insn_is_function_arg (insn, &is_spilled))
28647 /* Add output depdendence between two function arguments if chain
28648 of output arguments contains likely spilled HW registers. */
28650 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28651 first_arg = last = insn;
28661 /* Add output or anti dependency from insn to first_arg to restrict its code
28664 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28669 /* Add anti dependencies for bounds stores. */
28671 && GET_CODE (PATTERN (insn)) == PARALLEL
28672 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28673 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28675 add_dependence (first_arg, insn, REG_DEP_ANTI);
28679 set = single_set (insn);
28682 tmp = SET_DEST (set);
28685 /* Add output dependency to the first function argument. */
28686 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28689 /* Add anti dependency. */
28690 add_dependence (first_arg, insn, REG_DEP_ANTI);
28693 /* Avoid cross block motion of function argument through adding dependency
28694 from the first non-jump instruction in bb. */
28696 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28698 rtx_insn *insn = BB_END (bb);
28702 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28704 rtx set = single_set (insn);
28707 avoid_func_arg_motion (arg, insn);
28711 if (insn == BB_HEAD (bb))
28713 insn = PREV_INSN (insn);
28717 /* Hook for pre-reload schedule - avoid motion of function arguments
28718 passed in likely spilled HW registers. */
28720 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28723 rtx_insn *first_arg = NULL;
28724 if (reload_completed)
28726 while (head != tail && DEBUG_INSN_P (head))
28727 head = NEXT_INSN (head);
28728 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28729 if (INSN_P (insn) && CALL_P (insn))
28731 first_arg = add_parameter_dependencies (insn, head);
28734 /* Add dependee for first argument to predecessors if only
28735 region contains more than one block. */
28736 basic_block bb = BLOCK_FOR_INSN (insn);
28737 int rgn = CONTAINING_RGN (bb->index);
28738 int nr_blks = RGN_NR_BLOCKS (rgn);
28739 /* Skip trivial regions and region head blocks that can have
28740 predecessors outside of region. */
28741 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28746 /* Regions are SCCs with the exception of selective
28747 scheduling with pipelining of outer blocks enabled.
28748 So also check that immediate predecessors of a non-head
28749 block are in the same region. */
28750 FOR_EACH_EDGE (e, ei, bb->preds)
28752 /* Avoid creating of loop-carried dependencies through
28753 using topological ordering in the region. */
28754 if (rgn == CONTAINING_RGN (e->src->index)
28755 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28756 add_dependee_for_func_arg (first_arg, e->src);
28764 else if (first_arg)
28765 avoid_func_arg_motion (first_arg, insn);
28768 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28769 HW registers to maximum, to schedule them at soon as possible. These are
28770 moves from function argument registers at the top of the function entry
28771 and moves from function return value registers after call. */
28773 ix86_adjust_priority (rtx_insn *insn, int priority)
28777 if (reload_completed)
28780 if (!NONDEBUG_INSN_P (insn))
28783 set = single_set (insn);
28786 rtx tmp = SET_SRC (set);
28788 && HARD_REGISTER_P (tmp)
28789 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28790 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28791 return current_sched_info->sched_max_insns_priority;
28797 /* Model decoder of Core 2/i7.
28798 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28799 track the instruction fetch block boundaries and make sure that long
28800 (9+ bytes) instructions are assigned to D0. */
28802 /* Maximum length of an insn that can be handled by
28803 a secondary decoder unit. '8' for Core 2/i7. */
28804 static int core2i7_secondary_decoder_max_insn_size;
28806 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28807 '16' for Core 2/i7. */
28808 static int core2i7_ifetch_block_size;
28810 /* Maximum number of instructions decoder can handle per cycle.
28811 '6' for Core 2/i7. */
28812 static int core2i7_ifetch_block_max_insns;
28814 typedef struct ix86_first_cycle_multipass_data_ *
28815 ix86_first_cycle_multipass_data_t;
28816 typedef const struct ix86_first_cycle_multipass_data_ *
28817 const_ix86_first_cycle_multipass_data_t;
28819 /* A variable to store target state across calls to max_issue within
28821 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28822 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28824 /* Initialize DATA. */
28826 core2i7_first_cycle_multipass_init (void *_data)
28828 ix86_first_cycle_multipass_data_t data
28829 = (ix86_first_cycle_multipass_data_t) _data;
28831 data->ifetch_block_len = 0;
28832 data->ifetch_block_n_insns = 0;
28833 data->ready_try_change = NULL;
28834 data->ready_try_change_size = 0;
28837 /* Advancing the cycle; reset ifetch block counts. */
28839 core2i7_dfa_post_advance_cycle (void)
28841 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28843 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28845 data->ifetch_block_len = 0;
28846 data->ifetch_block_n_insns = 0;
28849 static int min_insn_size (rtx_insn *);
28851 /* Filter out insns from ready_try that the core will not be able to issue
28852 on current cycle due to decoder. */
28854 core2i7_first_cycle_multipass_filter_ready_try
28855 (const_ix86_first_cycle_multipass_data_t data,
28856 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28863 if (ready_try[n_ready])
28866 insn = get_ready_element (n_ready);
28867 insn_size = min_insn_size (insn);
28869 if (/* If this is a too long an insn for a secondary decoder ... */
28870 (!first_cycle_insn_p
28871 && insn_size > core2i7_secondary_decoder_max_insn_size)
28872 /* ... or it would not fit into the ifetch block ... */
28873 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28874 /* ... or the decoder is full already ... */
28875 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28876 /* ... mask the insn out. */
28878 ready_try[n_ready] = 1;
28880 if (data->ready_try_change)
28881 bitmap_set_bit (data->ready_try_change, n_ready);
28886 /* Prepare for a new round of multipass lookahead scheduling. */
28888 core2i7_first_cycle_multipass_begin (void *_data,
28889 signed char *ready_try, int n_ready,
28890 bool first_cycle_insn_p)
28892 ix86_first_cycle_multipass_data_t data
28893 = (ix86_first_cycle_multipass_data_t) _data;
28894 const_ix86_first_cycle_multipass_data_t prev_data
28895 = ix86_first_cycle_multipass_data;
28897 /* Restore the state from the end of the previous round. */
28898 data->ifetch_block_len = prev_data->ifetch_block_len;
28899 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28901 /* Filter instructions that cannot be issued on current cycle due to
28902 decoder restrictions. */
28903 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28904 first_cycle_insn_p);
28907 /* INSN is being issued in current solution. Account for its impact on
28908 the decoder model. */
28910 core2i7_first_cycle_multipass_issue (void *_data,
28911 signed char *ready_try, int n_ready,
28912 rtx_insn *insn, const void *_prev_data)
28914 ix86_first_cycle_multipass_data_t data
28915 = (ix86_first_cycle_multipass_data_t) _data;
28916 const_ix86_first_cycle_multipass_data_t prev_data
28917 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28919 int insn_size = min_insn_size (insn);
28921 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28922 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28923 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28924 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28926 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28927 if (!data->ready_try_change)
28929 data->ready_try_change = sbitmap_alloc (n_ready);
28930 data->ready_try_change_size = n_ready;
28932 else if (data->ready_try_change_size < n_ready)
28934 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28936 data->ready_try_change_size = n_ready;
28938 bitmap_clear (data->ready_try_change);
28940 /* Filter out insns from ready_try that the core will not be able to issue
28941 on current cycle due to decoder. */
28942 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28946 /* Revert the effect on ready_try. */
28948 core2i7_first_cycle_multipass_backtrack (const void *_data,
28949 signed char *ready_try,
28950 int n_ready ATTRIBUTE_UNUSED)
28952 const_ix86_first_cycle_multipass_data_t data
28953 = (const_ix86_first_cycle_multipass_data_t) _data;
28954 unsigned int i = 0;
28955 sbitmap_iterator sbi;
28957 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28958 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28964 /* Save the result of multipass lookahead scheduling for the next round. */
28966 core2i7_first_cycle_multipass_end (const void *_data)
28968 const_ix86_first_cycle_multipass_data_t data
28969 = (const_ix86_first_cycle_multipass_data_t) _data;
28970 ix86_first_cycle_multipass_data_t next_data
28971 = ix86_first_cycle_multipass_data;
28975 next_data->ifetch_block_len = data->ifetch_block_len;
28976 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28980 /* Deallocate target data. */
28982 core2i7_first_cycle_multipass_fini (void *_data)
28984 ix86_first_cycle_multipass_data_t data
28985 = (ix86_first_cycle_multipass_data_t) _data;
28987 if (data->ready_try_change)
28989 sbitmap_free (data->ready_try_change);
28990 data->ready_try_change = NULL;
28991 data->ready_try_change_size = 0;
28995 /* Prepare for scheduling pass. */
28997 ix86_sched_init_global (FILE *, int, int)
28999 /* Install scheduling hooks for current CPU. Some of these hooks are used
29000 in time-critical parts of the scheduler, so we only set them up when
29001 they are actually used. */
29004 case PROCESSOR_CORE2:
29005 case PROCESSOR_NEHALEM:
29006 case PROCESSOR_SANDYBRIDGE:
29007 case PROCESSOR_HASWELL:
29008 /* Do not perform multipass scheduling for pre-reload schedule
29009 to save compile time. */
29010 if (reload_completed)
29012 targetm.sched.dfa_post_advance_cycle
29013 = core2i7_dfa_post_advance_cycle;
29014 targetm.sched.first_cycle_multipass_init
29015 = core2i7_first_cycle_multipass_init;
29016 targetm.sched.first_cycle_multipass_begin
29017 = core2i7_first_cycle_multipass_begin;
29018 targetm.sched.first_cycle_multipass_issue
29019 = core2i7_first_cycle_multipass_issue;
29020 targetm.sched.first_cycle_multipass_backtrack
29021 = core2i7_first_cycle_multipass_backtrack;
29022 targetm.sched.first_cycle_multipass_end
29023 = core2i7_first_cycle_multipass_end;
29024 targetm.sched.first_cycle_multipass_fini
29025 = core2i7_first_cycle_multipass_fini;
29027 /* Set decoder parameters. */
29028 core2i7_secondary_decoder_max_insn_size = 8;
29029 core2i7_ifetch_block_size = 16;
29030 core2i7_ifetch_block_max_insns = 6;
29033 /* ... Fall through ... */
29035 targetm.sched.dfa_post_advance_cycle = NULL;
29036 targetm.sched.first_cycle_multipass_init = NULL;
29037 targetm.sched.first_cycle_multipass_begin = NULL;
29038 targetm.sched.first_cycle_multipass_issue = NULL;
29039 targetm.sched.first_cycle_multipass_backtrack = NULL;
29040 targetm.sched.first_cycle_multipass_end = NULL;
29041 targetm.sched.first_cycle_multipass_fini = NULL;
29047 /* Compute the alignment given to a constant that is being placed in memory.
29048 EXP is the constant and ALIGN is the alignment that the object would
29050 The value of this function is used instead of that alignment to align
29054 ix86_constant_alignment (tree exp, int align)
29056 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29057 || TREE_CODE (exp) == INTEGER_CST)
29059 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
29061 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
29064 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29065 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29066 return BITS_PER_WORD;
29071 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
29072 the data type, and ALIGN is the alignment that the object would
29073 ordinarily have. */
29076 iamcu_alignment (tree type, int align)
29078 enum machine_mode mode;
29080 if (align < 32 || TYPE_USER_ALIGN (type))
29083 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29085 mode = TYPE_MODE (strip_array_types (type));
29086 switch (GET_MODE_CLASS (mode))
29089 case MODE_COMPLEX_INT:
29090 case MODE_COMPLEX_FLOAT:
29092 case MODE_DECIMAL_FLOAT:
29099 /* Compute the alignment for a static variable.
29100 TYPE is the data type, and ALIGN is the alignment that
29101 the object would ordinarily have. The value of this function is used
29102 instead of that alignment to align the object. */
29105 ix86_data_alignment (tree type, int align, bool opt)
29107 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29108 for symbols from other compilation units or symbols that don't need
29109 to bind locally. In order to preserve some ABI compatibility with
29110 those compilers, ensure we don't decrease alignment from what we
29113 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29115 /* A data structure, equal or greater than the size of a cache line
29116 (64 bytes in the Pentium 4 and other recent Intel processors, including
29117 processors based on Intel Core microarchitecture) should be aligned
29118 so that its base address is a multiple of a cache line size. */
29121 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29123 if (max_align < BITS_PER_WORD)
29124 max_align = BITS_PER_WORD;
29126 switch (ix86_align_data_type)
29128 case ix86_align_data_type_abi: opt = false; break;
29129 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29130 case ix86_align_data_type_cacheline: break;
29134 align = iamcu_alignment (type, align);
29137 && AGGREGATE_TYPE_P (type)
29138 && TYPE_SIZE (type)
29139 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29141 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29142 && align < max_align_compat)
29143 align = max_align_compat;
29144 if (wi::geu_p (TYPE_SIZE (type), max_align)
29145 && align < max_align)
29149 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29150 to 16byte boundary. */
29153 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29154 && TYPE_SIZE (type)
29155 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29156 && wi::geu_p (TYPE_SIZE (type), 128)
29164 if (TREE_CODE (type) == ARRAY_TYPE)
29166 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29168 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29171 else if (TREE_CODE (type) == COMPLEX_TYPE)
29174 if (TYPE_MODE (type) == DCmode && align < 64)
29176 if ((TYPE_MODE (type) == XCmode
29177 || TYPE_MODE (type) == TCmode) && align < 128)
29180 else if ((TREE_CODE (type) == RECORD_TYPE
29181 || TREE_CODE (type) == UNION_TYPE
29182 || TREE_CODE (type) == QUAL_UNION_TYPE)
29183 && TYPE_FIELDS (type))
29185 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29187 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29190 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29191 || TREE_CODE (type) == INTEGER_TYPE)
29193 if (TYPE_MODE (type) == DFmode && align < 64)
29195 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29202 /* Compute the alignment for a local variable or a stack slot. EXP is
29203 the data type or decl itself, MODE is the widest mode available and
29204 ALIGN is the alignment that the object would ordinarily have. The
29205 value of this macro is used instead of that alignment to align the
29209 ix86_local_alignment (tree exp, machine_mode mode,
29210 unsigned int align)
29214 if (exp && DECL_P (exp))
29216 type = TREE_TYPE (exp);
29225 /* Don't do dynamic stack realignment for long long objects with
29226 -mpreferred-stack-boundary=2. */
29229 && ix86_preferred_stack_boundary < 64
29230 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29231 && (!type || !TYPE_USER_ALIGN (type))
29232 && (!decl || !DECL_USER_ALIGN (decl)))
29235 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29236 register in MODE. We will return the largest alignment of XF
29240 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29241 align = GET_MODE_ALIGNMENT (DFmode);
29245 /* Don't increase alignment for Intel MCU psABI. */
29249 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29250 to 16byte boundary. Exact wording is:
29252 An array uses the same alignment as its elements, except that a local or
29253 global array variable of length at least 16 bytes or
29254 a C99 variable-length array variable always has alignment of at least 16 bytes.
29256 This was added to allow use of aligned SSE instructions at arrays. This
29257 rule is meant for static storage (where compiler can not do the analysis
29258 by itself). We follow it for automatic variables only when convenient.
29259 We fully control everything in the function compiled and functions from
29260 other unit can not rely on the alignment.
29262 Exclude va_list type. It is the common case of local array where
29263 we can not benefit from the alignment.
29265 TODO: Probably one should optimize for size only when var is not escaping. */
29266 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29269 if (AGGREGATE_TYPE_P (type)
29270 && (va_list_type_node == NULL_TREE
29271 || (TYPE_MAIN_VARIANT (type)
29272 != TYPE_MAIN_VARIANT (va_list_type_node)))
29273 && TYPE_SIZE (type)
29274 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29275 && wi::geu_p (TYPE_SIZE (type), 16)
29279 if (TREE_CODE (type) == ARRAY_TYPE)
29281 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29283 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29286 else if (TREE_CODE (type) == COMPLEX_TYPE)
29288 if (TYPE_MODE (type) == DCmode && align < 64)
29290 if ((TYPE_MODE (type) == XCmode
29291 || TYPE_MODE (type) == TCmode) && align < 128)
29294 else if ((TREE_CODE (type) == RECORD_TYPE
29295 || TREE_CODE (type) == UNION_TYPE
29296 || TREE_CODE (type) == QUAL_UNION_TYPE)
29297 && TYPE_FIELDS (type))
29299 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29301 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29304 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29305 || TREE_CODE (type) == INTEGER_TYPE)
29308 if (TYPE_MODE (type) == DFmode && align < 64)
29310 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29316 /* Compute the minimum required alignment for dynamic stack realignment
29317 purposes for a local variable, parameter or a stack slot. EXP is
29318 the data type or decl itself, MODE is its mode and ALIGN is the
29319 alignment that the object would ordinarily have. */
29322 ix86_minimum_alignment (tree exp, machine_mode mode,
29323 unsigned int align)
29327 if (exp && DECL_P (exp))
29329 type = TREE_TYPE (exp);
29338 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29341 /* Don't do dynamic stack realignment for long long objects with
29342 -mpreferred-stack-boundary=2. */
29343 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29344 && (!type || !TYPE_USER_ALIGN (type))
29345 && (!decl || !DECL_USER_ALIGN (decl)))
29347 gcc_checking_assert (!TARGET_STV);
29354 /* Find a location for the static chain incoming to a nested function.
29355 This is a register, unless all free registers are used by arguments. */
29358 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29362 /* While this function won't be called by the middle-end when a static
29363 chain isn't needed, it's also used throughout the backend so it's
29364 easiest to keep this check centralized. */
29365 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29370 /* We always use R10 in 64-bit mode. */
29375 const_tree fntype, fndecl;
29378 /* By default in 32-bit mode we use ECX to pass the static chain. */
29381 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29383 fntype = TREE_TYPE (fndecl_or_type);
29384 fndecl = fndecl_or_type;
29388 fntype = fndecl_or_type;
29392 ccvt = ix86_get_callcvt (fntype);
29393 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29395 /* Fastcall functions use ecx/edx for arguments, which leaves
29396 us with EAX for the static chain.
29397 Thiscall functions use ecx for arguments, which also
29398 leaves us with EAX for the static chain. */
29401 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29403 /* Thiscall functions use ecx for arguments, which leaves
29404 us with EAX and EDX for the static chain.
29405 We are using for abi-compatibility EAX. */
29408 else if (ix86_function_regparm (fntype, fndecl) == 3)
29410 /* For regparm 3, we have no free call-clobbered registers in
29411 which to store the static chain. In order to implement this,
29412 we have the trampoline push the static chain to the stack.
29413 However, we can't push a value below the return address when
29414 we call the nested function directly, so we have to use an
29415 alternate entry point. For this we use ESI, and have the
29416 alternate entry point push ESI, so that things appear the
29417 same once we're executing the nested function. */
29420 if (fndecl == current_function_decl)
29421 ix86_static_chain_on_stack = true;
29422 return gen_frame_mem (SImode,
29423 plus_constant (Pmode,
29424 arg_pointer_rtx, -8));
29430 return gen_rtx_REG (Pmode, regno);
29433 /* Emit RTL insns to initialize the variable parts of a trampoline.
29434 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29435 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29436 to be passed to the target function. */
29439 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29445 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29451 /* Load the function address to r11. Try to load address using
29452 the shorter movl instead of movabs. We may want to support
29453 movq for kernel mode, but kernel does not use trampolines at
29454 the moment. FNADDR is a 32bit address and may not be in
29455 DImode when ptr_mode == SImode. Always use movl in this
29457 if (ptr_mode == SImode
29458 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29460 fnaddr = copy_addr_to_reg (fnaddr);
29462 mem = adjust_address (m_tramp, HImode, offset);
29463 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29465 mem = adjust_address (m_tramp, SImode, offset + 2);
29466 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29471 mem = adjust_address (m_tramp, HImode, offset);
29472 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29474 mem = adjust_address (m_tramp, DImode, offset + 2);
29475 emit_move_insn (mem, fnaddr);
29479 /* Load static chain using movabs to r10. Use the shorter movl
29480 instead of movabs when ptr_mode == SImode. */
29481 if (ptr_mode == SImode)
29492 mem = adjust_address (m_tramp, HImode, offset);
29493 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29495 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29496 emit_move_insn (mem, chain_value);
29499 /* Jump to r11; the last (unused) byte is a nop, only there to
29500 pad the write out to a single 32-bit store. */
29501 mem = adjust_address (m_tramp, SImode, offset);
29502 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29509 /* Depending on the static chain location, either load a register
29510 with a constant, or push the constant to the stack. All of the
29511 instructions are the same size. */
29512 chain = ix86_static_chain (fndecl, true);
29515 switch (REGNO (chain))
29518 opcode = 0xb8; break;
29520 opcode = 0xb9; break;
29522 gcc_unreachable ();
29528 mem = adjust_address (m_tramp, QImode, offset);
29529 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29531 mem = adjust_address (m_tramp, SImode, offset + 1);
29532 emit_move_insn (mem, chain_value);
29535 mem = adjust_address (m_tramp, QImode, offset);
29536 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29538 mem = adjust_address (m_tramp, SImode, offset + 1);
29540 /* Compute offset from the end of the jmp to the target function.
29541 In the case in which the trampoline stores the static chain on
29542 the stack, we need to skip the first insn which pushes the
29543 (call-saved) register static chain; this push is 1 byte. */
29545 disp = expand_binop (SImode, sub_optab, fnaddr,
29546 plus_constant (Pmode, XEXP (m_tramp, 0),
29547 offset - (MEM_P (chain) ? 1 : 0)),
29548 NULL_RTX, 1, OPTAB_DIRECT);
29549 emit_move_insn (mem, disp);
29552 gcc_assert (offset <= TRAMPOLINE_SIZE);
29554 #ifdef HAVE_ENABLE_EXECUTE_STACK
29555 #ifdef CHECK_EXECUTE_STACK_ENABLED
29556 if (CHECK_EXECUTE_STACK_ENABLED)
29558 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29559 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29563 /* The following file contains several enumerations and data structures
29564 built from the definitions in i386-builtin-types.def. */
29566 #include "i386-builtin-types.inc"
29568 /* Table for the ix86 builtin non-function types. */
29569 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29571 /* Retrieve an element from the above table, building some of
29572 the types lazily. */
29575 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29577 unsigned int index;
29580 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29582 type = ix86_builtin_type_tab[(int) tcode];
29586 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29587 if (tcode <= IX86_BT_LAST_VECT)
29591 index = tcode - IX86_BT_LAST_PRIM - 1;
29592 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29593 mode = ix86_builtin_type_vect_mode[index];
29595 type = build_vector_type_for_mode (itype, mode);
29601 index = tcode - IX86_BT_LAST_VECT - 1;
29602 if (tcode <= IX86_BT_LAST_PTR)
29603 quals = TYPE_UNQUALIFIED;
29605 quals = TYPE_QUAL_CONST;
29607 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29608 if (quals != TYPE_UNQUALIFIED)
29609 itype = build_qualified_type (itype, quals);
29611 type = build_pointer_type (itype);
29614 ix86_builtin_type_tab[(int) tcode] = type;
29618 /* Table for the ix86 builtin function types. */
29619 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29621 /* Retrieve an element from the above table, building some of
29622 the types lazily. */
29625 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29629 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29631 type = ix86_builtin_func_type_tab[(int) tcode];
29635 if (tcode <= IX86_BT_LAST_FUNC)
29637 unsigned start = ix86_builtin_func_start[(int) tcode];
29638 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29639 tree rtype, atype, args = void_list_node;
29642 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29643 for (i = after - 1; i > start; --i)
29645 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29646 args = tree_cons (NULL, atype, args);
29649 type = build_function_type (rtype, args);
29653 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29654 enum ix86_builtin_func_type icode;
29656 icode = ix86_builtin_func_alias_base[index];
29657 type = ix86_get_builtin_func_type (icode);
29660 ix86_builtin_func_type_tab[(int) tcode] = type;
29665 /* Codes for all the SSE/MMX builtins. */
29668 IX86_BUILTIN_ADDPS,
29669 IX86_BUILTIN_ADDSS,
29670 IX86_BUILTIN_DIVPS,
29671 IX86_BUILTIN_DIVSS,
29672 IX86_BUILTIN_MULPS,
29673 IX86_BUILTIN_MULSS,
29674 IX86_BUILTIN_SUBPS,
29675 IX86_BUILTIN_SUBSS,
29677 IX86_BUILTIN_CMPEQPS,
29678 IX86_BUILTIN_CMPLTPS,
29679 IX86_BUILTIN_CMPLEPS,
29680 IX86_BUILTIN_CMPGTPS,
29681 IX86_BUILTIN_CMPGEPS,
29682 IX86_BUILTIN_CMPNEQPS,
29683 IX86_BUILTIN_CMPNLTPS,
29684 IX86_BUILTIN_CMPNLEPS,
29685 IX86_BUILTIN_CMPNGTPS,
29686 IX86_BUILTIN_CMPNGEPS,
29687 IX86_BUILTIN_CMPORDPS,
29688 IX86_BUILTIN_CMPUNORDPS,
29689 IX86_BUILTIN_CMPEQSS,
29690 IX86_BUILTIN_CMPLTSS,
29691 IX86_BUILTIN_CMPLESS,
29692 IX86_BUILTIN_CMPNEQSS,
29693 IX86_BUILTIN_CMPNLTSS,
29694 IX86_BUILTIN_CMPNLESS,
29695 IX86_BUILTIN_CMPORDSS,
29696 IX86_BUILTIN_CMPUNORDSS,
29698 IX86_BUILTIN_COMIEQSS,
29699 IX86_BUILTIN_COMILTSS,
29700 IX86_BUILTIN_COMILESS,
29701 IX86_BUILTIN_COMIGTSS,
29702 IX86_BUILTIN_COMIGESS,
29703 IX86_BUILTIN_COMINEQSS,
29704 IX86_BUILTIN_UCOMIEQSS,
29705 IX86_BUILTIN_UCOMILTSS,
29706 IX86_BUILTIN_UCOMILESS,
29707 IX86_BUILTIN_UCOMIGTSS,
29708 IX86_BUILTIN_UCOMIGESS,
29709 IX86_BUILTIN_UCOMINEQSS,
29711 IX86_BUILTIN_CVTPI2PS,
29712 IX86_BUILTIN_CVTPS2PI,
29713 IX86_BUILTIN_CVTSI2SS,
29714 IX86_BUILTIN_CVTSI642SS,
29715 IX86_BUILTIN_CVTSS2SI,
29716 IX86_BUILTIN_CVTSS2SI64,
29717 IX86_BUILTIN_CVTTPS2PI,
29718 IX86_BUILTIN_CVTTSS2SI,
29719 IX86_BUILTIN_CVTTSS2SI64,
29721 IX86_BUILTIN_MAXPS,
29722 IX86_BUILTIN_MAXSS,
29723 IX86_BUILTIN_MINPS,
29724 IX86_BUILTIN_MINSS,
29726 IX86_BUILTIN_LOADUPS,
29727 IX86_BUILTIN_STOREUPS,
29728 IX86_BUILTIN_MOVSS,
29730 IX86_BUILTIN_MOVHLPS,
29731 IX86_BUILTIN_MOVLHPS,
29732 IX86_BUILTIN_LOADHPS,
29733 IX86_BUILTIN_LOADLPS,
29734 IX86_BUILTIN_STOREHPS,
29735 IX86_BUILTIN_STORELPS,
29737 IX86_BUILTIN_MASKMOVQ,
29738 IX86_BUILTIN_MOVMSKPS,
29739 IX86_BUILTIN_PMOVMSKB,
29741 IX86_BUILTIN_MOVNTPS,
29742 IX86_BUILTIN_MOVNTQ,
29744 IX86_BUILTIN_LOADDQU,
29745 IX86_BUILTIN_STOREDQU,
29747 IX86_BUILTIN_PACKSSWB,
29748 IX86_BUILTIN_PACKSSDW,
29749 IX86_BUILTIN_PACKUSWB,
29751 IX86_BUILTIN_PADDB,
29752 IX86_BUILTIN_PADDW,
29753 IX86_BUILTIN_PADDD,
29754 IX86_BUILTIN_PADDQ,
29755 IX86_BUILTIN_PADDSB,
29756 IX86_BUILTIN_PADDSW,
29757 IX86_BUILTIN_PADDUSB,
29758 IX86_BUILTIN_PADDUSW,
29759 IX86_BUILTIN_PSUBB,
29760 IX86_BUILTIN_PSUBW,
29761 IX86_BUILTIN_PSUBD,
29762 IX86_BUILTIN_PSUBQ,
29763 IX86_BUILTIN_PSUBSB,
29764 IX86_BUILTIN_PSUBSW,
29765 IX86_BUILTIN_PSUBUSB,
29766 IX86_BUILTIN_PSUBUSW,
29769 IX86_BUILTIN_PANDN,
29773 IX86_BUILTIN_PAVGB,
29774 IX86_BUILTIN_PAVGW,
29776 IX86_BUILTIN_PCMPEQB,
29777 IX86_BUILTIN_PCMPEQW,
29778 IX86_BUILTIN_PCMPEQD,
29779 IX86_BUILTIN_PCMPGTB,
29780 IX86_BUILTIN_PCMPGTW,
29781 IX86_BUILTIN_PCMPGTD,
29783 IX86_BUILTIN_PMADDWD,
29785 IX86_BUILTIN_PMAXSW,
29786 IX86_BUILTIN_PMAXUB,
29787 IX86_BUILTIN_PMINSW,
29788 IX86_BUILTIN_PMINUB,
29790 IX86_BUILTIN_PMULHUW,
29791 IX86_BUILTIN_PMULHW,
29792 IX86_BUILTIN_PMULLW,
29794 IX86_BUILTIN_PSADBW,
29795 IX86_BUILTIN_PSHUFW,
29797 IX86_BUILTIN_PSLLW,
29798 IX86_BUILTIN_PSLLD,
29799 IX86_BUILTIN_PSLLQ,
29800 IX86_BUILTIN_PSRAW,
29801 IX86_BUILTIN_PSRAD,
29802 IX86_BUILTIN_PSRLW,
29803 IX86_BUILTIN_PSRLD,
29804 IX86_BUILTIN_PSRLQ,
29805 IX86_BUILTIN_PSLLWI,
29806 IX86_BUILTIN_PSLLDI,
29807 IX86_BUILTIN_PSLLQI,
29808 IX86_BUILTIN_PSRAWI,
29809 IX86_BUILTIN_PSRADI,
29810 IX86_BUILTIN_PSRLWI,
29811 IX86_BUILTIN_PSRLDI,
29812 IX86_BUILTIN_PSRLQI,
29814 IX86_BUILTIN_PUNPCKHBW,
29815 IX86_BUILTIN_PUNPCKHWD,
29816 IX86_BUILTIN_PUNPCKHDQ,
29817 IX86_BUILTIN_PUNPCKLBW,
29818 IX86_BUILTIN_PUNPCKLWD,
29819 IX86_BUILTIN_PUNPCKLDQ,
29821 IX86_BUILTIN_SHUFPS,
29823 IX86_BUILTIN_RCPPS,
29824 IX86_BUILTIN_RCPSS,
29825 IX86_BUILTIN_RSQRTPS,
29826 IX86_BUILTIN_RSQRTPS_NR,
29827 IX86_BUILTIN_RSQRTSS,
29828 IX86_BUILTIN_RSQRTF,
29829 IX86_BUILTIN_SQRTPS,
29830 IX86_BUILTIN_SQRTPS_NR,
29831 IX86_BUILTIN_SQRTSS,
29833 IX86_BUILTIN_UNPCKHPS,
29834 IX86_BUILTIN_UNPCKLPS,
29836 IX86_BUILTIN_ANDPS,
29837 IX86_BUILTIN_ANDNPS,
29839 IX86_BUILTIN_XORPS,
29842 IX86_BUILTIN_LDMXCSR,
29843 IX86_BUILTIN_STMXCSR,
29844 IX86_BUILTIN_SFENCE,
29846 IX86_BUILTIN_FXSAVE,
29847 IX86_BUILTIN_FXRSTOR,
29848 IX86_BUILTIN_FXSAVE64,
29849 IX86_BUILTIN_FXRSTOR64,
29851 IX86_BUILTIN_XSAVE,
29852 IX86_BUILTIN_XRSTOR,
29853 IX86_BUILTIN_XSAVE64,
29854 IX86_BUILTIN_XRSTOR64,
29856 IX86_BUILTIN_XSAVEOPT,
29857 IX86_BUILTIN_XSAVEOPT64,
29859 IX86_BUILTIN_XSAVEC,
29860 IX86_BUILTIN_XSAVEC64,
29862 IX86_BUILTIN_XSAVES,
29863 IX86_BUILTIN_XRSTORS,
29864 IX86_BUILTIN_XSAVES64,
29865 IX86_BUILTIN_XRSTORS64,
29867 /* 3DNow! Original */
29868 IX86_BUILTIN_FEMMS,
29869 IX86_BUILTIN_PAVGUSB,
29870 IX86_BUILTIN_PF2ID,
29871 IX86_BUILTIN_PFACC,
29872 IX86_BUILTIN_PFADD,
29873 IX86_BUILTIN_PFCMPEQ,
29874 IX86_BUILTIN_PFCMPGE,
29875 IX86_BUILTIN_PFCMPGT,
29876 IX86_BUILTIN_PFMAX,
29877 IX86_BUILTIN_PFMIN,
29878 IX86_BUILTIN_PFMUL,
29879 IX86_BUILTIN_PFRCP,
29880 IX86_BUILTIN_PFRCPIT1,
29881 IX86_BUILTIN_PFRCPIT2,
29882 IX86_BUILTIN_PFRSQIT1,
29883 IX86_BUILTIN_PFRSQRT,
29884 IX86_BUILTIN_PFSUB,
29885 IX86_BUILTIN_PFSUBR,
29886 IX86_BUILTIN_PI2FD,
29887 IX86_BUILTIN_PMULHRW,
29889 /* 3DNow! Athlon Extensions */
29890 IX86_BUILTIN_PF2IW,
29891 IX86_BUILTIN_PFNACC,
29892 IX86_BUILTIN_PFPNACC,
29893 IX86_BUILTIN_PI2FW,
29894 IX86_BUILTIN_PSWAPDSI,
29895 IX86_BUILTIN_PSWAPDSF,
29898 IX86_BUILTIN_ADDPD,
29899 IX86_BUILTIN_ADDSD,
29900 IX86_BUILTIN_DIVPD,
29901 IX86_BUILTIN_DIVSD,
29902 IX86_BUILTIN_MULPD,
29903 IX86_BUILTIN_MULSD,
29904 IX86_BUILTIN_SUBPD,
29905 IX86_BUILTIN_SUBSD,
29907 IX86_BUILTIN_CMPEQPD,
29908 IX86_BUILTIN_CMPLTPD,
29909 IX86_BUILTIN_CMPLEPD,
29910 IX86_BUILTIN_CMPGTPD,
29911 IX86_BUILTIN_CMPGEPD,
29912 IX86_BUILTIN_CMPNEQPD,
29913 IX86_BUILTIN_CMPNLTPD,
29914 IX86_BUILTIN_CMPNLEPD,
29915 IX86_BUILTIN_CMPNGTPD,
29916 IX86_BUILTIN_CMPNGEPD,
29917 IX86_BUILTIN_CMPORDPD,
29918 IX86_BUILTIN_CMPUNORDPD,
29919 IX86_BUILTIN_CMPEQSD,
29920 IX86_BUILTIN_CMPLTSD,
29921 IX86_BUILTIN_CMPLESD,
29922 IX86_BUILTIN_CMPNEQSD,
29923 IX86_BUILTIN_CMPNLTSD,
29924 IX86_BUILTIN_CMPNLESD,
29925 IX86_BUILTIN_CMPORDSD,
29926 IX86_BUILTIN_CMPUNORDSD,
29928 IX86_BUILTIN_COMIEQSD,
29929 IX86_BUILTIN_COMILTSD,
29930 IX86_BUILTIN_COMILESD,
29931 IX86_BUILTIN_COMIGTSD,
29932 IX86_BUILTIN_COMIGESD,
29933 IX86_BUILTIN_COMINEQSD,
29934 IX86_BUILTIN_UCOMIEQSD,
29935 IX86_BUILTIN_UCOMILTSD,
29936 IX86_BUILTIN_UCOMILESD,
29937 IX86_BUILTIN_UCOMIGTSD,
29938 IX86_BUILTIN_UCOMIGESD,
29939 IX86_BUILTIN_UCOMINEQSD,
29941 IX86_BUILTIN_MAXPD,
29942 IX86_BUILTIN_MAXSD,
29943 IX86_BUILTIN_MINPD,
29944 IX86_BUILTIN_MINSD,
29946 IX86_BUILTIN_ANDPD,
29947 IX86_BUILTIN_ANDNPD,
29949 IX86_BUILTIN_XORPD,
29951 IX86_BUILTIN_SQRTPD,
29952 IX86_BUILTIN_SQRTSD,
29954 IX86_BUILTIN_UNPCKHPD,
29955 IX86_BUILTIN_UNPCKLPD,
29957 IX86_BUILTIN_SHUFPD,
29959 IX86_BUILTIN_LOADUPD,
29960 IX86_BUILTIN_STOREUPD,
29961 IX86_BUILTIN_MOVSD,
29963 IX86_BUILTIN_LOADHPD,
29964 IX86_BUILTIN_LOADLPD,
29966 IX86_BUILTIN_CVTDQ2PD,
29967 IX86_BUILTIN_CVTDQ2PS,
29969 IX86_BUILTIN_CVTPD2DQ,
29970 IX86_BUILTIN_CVTPD2PI,
29971 IX86_BUILTIN_CVTPD2PS,
29972 IX86_BUILTIN_CVTTPD2DQ,
29973 IX86_BUILTIN_CVTTPD2PI,
29975 IX86_BUILTIN_CVTPI2PD,
29976 IX86_BUILTIN_CVTSI2SD,
29977 IX86_BUILTIN_CVTSI642SD,
29979 IX86_BUILTIN_CVTSD2SI,
29980 IX86_BUILTIN_CVTSD2SI64,
29981 IX86_BUILTIN_CVTSD2SS,
29982 IX86_BUILTIN_CVTSS2SD,
29983 IX86_BUILTIN_CVTTSD2SI,
29984 IX86_BUILTIN_CVTTSD2SI64,
29986 IX86_BUILTIN_CVTPS2DQ,
29987 IX86_BUILTIN_CVTPS2PD,
29988 IX86_BUILTIN_CVTTPS2DQ,
29990 IX86_BUILTIN_MOVNTI,
29991 IX86_BUILTIN_MOVNTI64,
29992 IX86_BUILTIN_MOVNTPD,
29993 IX86_BUILTIN_MOVNTDQ,
29995 IX86_BUILTIN_MOVQ128,
29998 IX86_BUILTIN_MASKMOVDQU,
29999 IX86_BUILTIN_MOVMSKPD,
30000 IX86_BUILTIN_PMOVMSKB128,
30002 IX86_BUILTIN_PACKSSWB128,
30003 IX86_BUILTIN_PACKSSDW128,
30004 IX86_BUILTIN_PACKUSWB128,
30006 IX86_BUILTIN_PADDB128,
30007 IX86_BUILTIN_PADDW128,
30008 IX86_BUILTIN_PADDD128,
30009 IX86_BUILTIN_PADDQ128,
30010 IX86_BUILTIN_PADDSB128,
30011 IX86_BUILTIN_PADDSW128,
30012 IX86_BUILTIN_PADDUSB128,
30013 IX86_BUILTIN_PADDUSW128,
30014 IX86_BUILTIN_PSUBB128,
30015 IX86_BUILTIN_PSUBW128,
30016 IX86_BUILTIN_PSUBD128,
30017 IX86_BUILTIN_PSUBQ128,
30018 IX86_BUILTIN_PSUBSB128,
30019 IX86_BUILTIN_PSUBSW128,
30020 IX86_BUILTIN_PSUBUSB128,
30021 IX86_BUILTIN_PSUBUSW128,
30023 IX86_BUILTIN_PAND128,
30024 IX86_BUILTIN_PANDN128,
30025 IX86_BUILTIN_POR128,
30026 IX86_BUILTIN_PXOR128,
30028 IX86_BUILTIN_PAVGB128,
30029 IX86_BUILTIN_PAVGW128,
30031 IX86_BUILTIN_PCMPEQB128,
30032 IX86_BUILTIN_PCMPEQW128,
30033 IX86_BUILTIN_PCMPEQD128,
30034 IX86_BUILTIN_PCMPGTB128,
30035 IX86_BUILTIN_PCMPGTW128,
30036 IX86_BUILTIN_PCMPGTD128,
30038 IX86_BUILTIN_PMADDWD128,
30040 IX86_BUILTIN_PMAXSW128,
30041 IX86_BUILTIN_PMAXUB128,
30042 IX86_BUILTIN_PMINSW128,
30043 IX86_BUILTIN_PMINUB128,
30045 IX86_BUILTIN_PMULUDQ,
30046 IX86_BUILTIN_PMULUDQ128,
30047 IX86_BUILTIN_PMULHUW128,
30048 IX86_BUILTIN_PMULHW128,
30049 IX86_BUILTIN_PMULLW128,
30051 IX86_BUILTIN_PSADBW128,
30052 IX86_BUILTIN_PSHUFHW,
30053 IX86_BUILTIN_PSHUFLW,
30054 IX86_BUILTIN_PSHUFD,
30056 IX86_BUILTIN_PSLLDQI128,
30057 IX86_BUILTIN_PSLLWI128,
30058 IX86_BUILTIN_PSLLDI128,
30059 IX86_BUILTIN_PSLLQI128,
30060 IX86_BUILTIN_PSRAWI128,
30061 IX86_BUILTIN_PSRADI128,
30062 IX86_BUILTIN_PSRLDQI128,
30063 IX86_BUILTIN_PSRLWI128,
30064 IX86_BUILTIN_PSRLDI128,
30065 IX86_BUILTIN_PSRLQI128,
30067 IX86_BUILTIN_PSLLDQ128,
30068 IX86_BUILTIN_PSLLW128,
30069 IX86_BUILTIN_PSLLD128,
30070 IX86_BUILTIN_PSLLQ128,
30071 IX86_BUILTIN_PSRAW128,
30072 IX86_BUILTIN_PSRAD128,
30073 IX86_BUILTIN_PSRLW128,
30074 IX86_BUILTIN_PSRLD128,
30075 IX86_BUILTIN_PSRLQ128,
30077 IX86_BUILTIN_PUNPCKHBW128,
30078 IX86_BUILTIN_PUNPCKHWD128,
30079 IX86_BUILTIN_PUNPCKHDQ128,
30080 IX86_BUILTIN_PUNPCKHQDQ128,
30081 IX86_BUILTIN_PUNPCKLBW128,
30082 IX86_BUILTIN_PUNPCKLWD128,
30083 IX86_BUILTIN_PUNPCKLDQ128,
30084 IX86_BUILTIN_PUNPCKLQDQ128,
30086 IX86_BUILTIN_CLFLUSH,
30087 IX86_BUILTIN_MFENCE,
30088 IX86_BUILTIN_LFENCE,
30089 IX86_BUILTIN_PAUSE,
30091 IX86_BUILTIN_FNSTENV,
30092 IX86_BUILTIN_FLDENV,
30093 IX86_BUILTIN_FNSTSW,
30094 IX86_BUILTIN_FNCLEX,
30096 IX86_BUILTIN_BSRSI,
30097 IX86_BUILTIN_BSRDI,
30098 IX86_BUILTIN_RDPMC,
30099 IX86_BUILTIN_RDTSC,
30100 IX86_BUILTIN_RDTSCP,
30101 IX86_BUILTIN_ROLQI,
30102 IX86_BUILTIN_ROLHI,
30103 IX86_BUILTIN_RORQI,
30104 IX86_BUILTIN_RORHI,
30107 IX86_BUILTIN_ADDSUBPS,
30108 IX86_BUILTIN_HADDPS,
30109 IX86_BUILTIN_HSUBPS,
30110 IX86_BUILTIN_MOVSHDUP,
30111 IX86_BUILTIN_MOVSLDUP,
30112 IX86_BUILTIN_ADDSUBPD,
30113 IX86_BUILTIN_HADDPD,
30114 IX86_BUILTIN_HSUBPD,
30115 IX86_BUILTIN_LDDQU,
30117 IX86_BUILTIN_MONITOR,
30118 IX86_BUILTIN_MWAIT,
30119 IX86_BUILTIN_CLZERO,
30122 IX86_BUILTIN_PHADDW,
30123 IX86_BUILTIN_PHADDD,
30124 IX86_BUILTIN_PHADDSW,
30125 IX86_BUILTIN_PHSUBW,
30126 IX86_BUILTIN_PHSUBD,
30127 IX86_BUILTIN_PHSUBSW,
30128 IX86_BUILTIN_PMADDUBSW,
30129 IX86_BUILTIN_PMULHRSW,
30130 IX86_BUILTIN_PSHUFB,
30131 IX86_BUILTIN_PSIGNB,
30132 IX86_BUILTIN_PSIGNW,
30133 IX86_BUILTIN_PSIGND,
30134 IX86_BUILTIN_PALIGNR,
30135 IX86_BUILTIN_PABSB,
30136 IX86_BUILTIN_PABSW,
30137 IX86_BUILTIN_PABSD,
30139 IX86_BUILTIN_PHADDW128,
30140 IX86_BUILTIN_PHADDD128,
30141 IX86_BUILTIN_PHADDSW128,
30142 IX86_BUILTIN_PHSUBW128,
30143 IX86_BUILTIN_PHSUBD128,
30144 IX86_BUILTIN_PHSUBSW128,
30145 IX86_BUILTIN_PMADDUBSW128,
30146 IX86_BUILTIN_PMULHRSW128,
30147 IX86_BUILTIN_PSHUFB128,
30148 IX86_BUILTIN_PSIGNB128,
30149 IX86_BUILTIN_PSIGNW128,
30150 IX86_BUILTIN_PSIGND128,
30151 IX86_BUILTIN_PALIGNR128,
30152 IX86_BUILTIN_PABSB128,
30153 IX86_BUILTIN_PABSW128,
30154 IX86_BUILTIN_PABSD128,
30156 /* AMDFAM10 - SSE4A New Instructions. */
30157 IX86_BUILTIN_MOVNTSD,
30158 IX86_BUILTIN_MOVNTSS,
30159 IX86_BUILTIN_EXTRQI,
30160 IX86_BUILTIN_EXTRQ,
30161 IX86_BUILTIN_INSERTQI,
30162 IX86_BUILTIN_INSERTQ,
30165 IX86_BUILTIN_BLENDPD,
30166 IX86_BUILTIN_BLENDPS,
30167 IX86_BUILTIN_BLENDVPD,
30168 IX86_BUILTIN_BLENDVPS,
30169 IX86_BUILTIN_PBLENDVB128,
30170 IX86_BUILTIN_PBLENDW128,
30175 IX86_BUILTIN_INSERTPS128,
30177 IX86_BUILTIN_MOVNTDQA,
30178 IX86_BUILTIN_MPSADBW128,
30179 IX86_BUILTIN_PACKUSDW128,
30180 IX86_BUILTIN_PCMPEQQ,
30181 IX86_BUILTIN_PHMINPOSUW128,
30183 IX86_BUILTIN_PMAXSB128,
30184 IX86_BUILTIN_PMAXSD128,
30185 IX86_BUILTIN_PMAXUD128,
30186 IX86_BUILTIN_PMAXUW128,
30188 IX86_BUILTIN_PMINSB128,
30189 IX86_BUILTIN_PMINSD128,
30190 IX86_BUILTIN_PMINUD128,
30191 IX86_BUILTIN_PMINUW128,
30193 IX86_BUILTIN_PMOVSXBW128,
30194 IX86_BUILTIN_PMOVSXBD128,
30195 IX86_BUILTIN_PMOVSXBQ128,
30196 IX86_BUILTIN_PMOVSXWD128,
30197 IX86_BUILTIN_PMOVSXWQ128,
30198 IX86_BUILTIN_PMOVSXDQ128,
30200 IX86_BUILTIN_PMOVZXBW128,
30201 IX86_BUILTIN_PMOVZXBD128,
30202 IX86_BUILTIN_PMOVZXBQ128,
30203 IX86_BUILTIN_PMOVZXWD128,
30204 IX86_BUILTIN_PMOVZXWQ128,
30205 IX86_BUILTIN_PMOVZXDQ128,
30207 IX86_BUILTIN_PMULDQ128,
30208 IX86_BUILTIN_PMULLD128,
30210 IX86_BUILTIN_ROUNDSD,
30211 IX86_BUILTIN_ROUNDSS,
30213 IX86_BUILTIN_ROUNDPD,
30214 IX86_BUILTIN_ROUNDPS,
30216 IX86_BUILTIN_FLOORPD,
30217 IX86_BUILTIN_CEILPD,
30218 IX86_BUILTIN_TRUNCPD,
30219 IX86_BUILTIN_RINTPD,
30220 IX86_BUILTIN_ROUNDPD_AZ,
30222 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
30223 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
30224 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
30226 IX86_BUILTIN_FLOORPS,
30227 IX86_BUILTIN_CEILPS,
30228 IX86_BUILTIN_TRUNCPS,
30229 IX86_BUILTIN_RINTPS,
30230 IX86_BUILTIN_ROUNDPS_AZ,
30232 IX86_BUILTIN_FLOORPS_SFIX,
30233 IX86_BUILTIN_CEILPS_SFIX,
30234 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
30236 IX86_BUILTIN_PTESTZ,
30237 IX86_BUILTIN_PTESTC,
30238 IX86_BUILTIN_PTESTNZC,
30240 IX86_BUILTIN_VEC_INIT_V2SI,
30241 IX86_BUILTIN_VEC_INIT_V4HI,
30242 IX86_BUILTIN_VEC_INIT_V8QI,
30243 IX86_BUILTIN_VEC_EXT_V2DF,
30244 IX86_BUILTIN_VEC_EXT_V2DI,
30245 IX86_BUILTIN_VEC_EXT_V4SF,
30246 IX86_BUILTIN_VEC_EXT_V4SI,
30247 IX86_BUILTIN_VEC_EXT_V8HI,
30248 IX86_BUILTIN_VEC_EXT_V2SI,
30249 IX86_BUILTIN_VEC_EXT_V4HI,
30250 IX86_BUILTIN_VEC_EXT_V16QI,
30251 IX86_BUILTIN_VEC_SET_V2DI,
30252 IX86_BUILTIN_VEC_SET_V4SF,
30253 IX86_BUILTIN_VEC_SET_V4SI,
30254 IX86_BUILTIN_VEC_SET_V8HI,
30255 IX86_BUILTIN_VEC_SET_V4HI,
30256 IX86_BUILTIN_VEC_SET_V16QI,
30258 IX86_BUILTIN_VEC_PACK_SFIX,
30259 IX86_BUILTIN_VEC_PACK_SFIX256,
30262 IX86_BUILTIN_CRC32QI,
30263 IX86_BUILTIN_CRC32HI,
30264 IX86_BUILTIN_CRC32SI,
30265 IX86_BUILTIN_CRC32DI,
30267 IX86_BUILTIN_PCMPESTRI128,
30268 IX86_BUILTIN_PCMPESTRM128,
30269 IX86_BUILTIN_PCMPESTRA128,
30270 IX86_BUILTIN_PCMPESTRC128,
30271 IX86_BUILTIN_PCMPESTRO128,
30272 IX86_BUILTIN_PCMPESTRS128,
30273 IX86_BUILTIN_PCMPESTRZ128,
30274 IX86_BUILTIN_PCMPISTRI128,
30275 IX86_BUILTIN_PCMPISTRM128,
30276 IX86_BUILTIN_PCMPISTRA128,
30277 IX86_BUILTIN_PCMPISTRC128,
30278 IX86_BUILTIN_PCMPISTRO128,
30279 IX86_BUILTIN_PCMPISTRS128,
30280 IX86_BUILTIN_PCMPISTRZ128,
30282 IX86_BUILTIN_PCMPGTQ,
30284 /* AES instructions */
30285 IX86_BUILTIN_AESENC128,
30286 IX86_BUILTIN_AESENCLAST128,
30287 IX86_BUILTIN_AESDEC128,
30288 IX86_BUILTIN_AESDECLAST128,
30289 IX86_BUILTIN_AESIMC128,
30290 IX86_BUILTIN_AESKEYGENASSIST128,
30292 /* PCLMUL instruction */
30293 IX86_BUILTIN_PCLMULQDQ128,
30296 IX86_BUILTIN_ADDPD256,
30297 IX86_BUILTIN_ADDPS256,
30298 IX86_BUILTIN_ADDSUBPD256,
30299 IX86_BUILTIN_ADDSUBPS256,
30300 IX86_BUILTIN_ANDPD256,
30301 IX86_BUILTIN_ANDPS256,
30302 IX86_BUILTIN_ANDNPD256,
30303 IX86_BUILTIN_ANDNPS256,
30304 IX86_BUILTIN_BLENDPD256,
30305 IX86_BUILTIN_BLENDPS256,
30306 IX86_BUILTIN_BLENDVPD256,
30307 IX86_BUILTIN_BLENDVPS256,
30308 IX86_BUILTIN_DIVPD256,
30309 IX86_BUILTIN_DIVPS256,
30310 IX86_BUILTIN_DPPS256,
30311 IX86_BUILTIN_HADDPD256,
30312 IX86_BUILTIN_HADDPS256,
30313 IX86_BUILTIN_HSUBPD256,
30314 IX86_BUILTIN_HSUBPS256,
30315 IX86_BUILTIN_MAXPD256,
30316 IX86_BUILTIN_MAXPS256,
30317 IX86_BUILTIN_MINPD256,
30318 IX86_BUILTIN_MINPS256,
30319 IX86_BUILTIN_MULPD256,
30320 IX86_BUILTIN_MULPS256,
30321 IX86_BUILTIN_ORPD256,
30322 IX86_BUILTIN_ORPS256,
30323 IX86_BUILTIN_SHUFPD256,
30324 IX86_BUILTIN_SHUFPS256,
30325 IX86_BUILTIN_SUBPD256,
30326 IX86_BUILTIN_SUBPS256,
30327 IX86_BUILTIN_XORPD256,
30328 IX86_BUILTIN_XORPS256,
30329 IX86_BUILTIN_CMPSD,
30330 IX86_BUILTIN_CMPSS,
30331 IX86_BUILTIN_CMPPD,
30332 IX86_BUILTIN_CMPPS,
30333 IX86_BUILTIN_CMPPD256,
30334 IX86_BUILTIN_CMPPS256,
30335 IX86_BUILTIN_CVTDQ2PD256,
30336 IX86_BUILTIN_CVTDQ2PS256,
30337 IX86_BUILTIN_CVTPD2PS256,
30338 IX86_BUILTIN_CVTPS2DQ256,
30339 IX86_BUILTIN_CVTPS2PD256,
30340 IX86_BUILTIN_CVTTPD2DQ256,
30341 IX86_BUILTIN_CVTPD2DQ256,
30342 IX86_BUILTIN_CVTTPS2DQ256,
30343 IX86_BUILTIN_EXTRACTF128PD256,
30344 IX86_BUILTIN_EXTRACTF128PS256,
30345 IX86_BUILTIN_EXTRACTF128SI256,
30346 IX86_BUILTIN_VZEROALL,
30347 IX86_BUILTIN_VZEROUPPER,
30348 IX86_BUILTIN_VPERMILVARPD,
30349 IX86_BUILTIN_VPERMILVARPS,
30350 IX86_BUILTIN_VPERMILVARPD256,
30351 IX86_BUILTIN_VPERMILVARPS256,
30352 IX86_BUILTIN_VPERMILPD,
30353 IX86_BUILTIN_VPERMILPS,
30354 IX86_BUILTIN_VPERMILPD256,
30355 IX86_BUILTIN_VPERMILPS256,
30356 IX86_BUILTIN_VPERMIL2PD,
30357 IX86_BUILTIN_VPERMIL2PS,
30358 IX86_BUILTIN_VPERMIL2PD256,
30359 IX86_BUILTIN_VPERMIL2PS256,
30360 IX86_BUILTIN_VPERM2F128PD256,
30361 IX86_BUILTIN_VPERM2F128PS256,
30362 IX86_BUILTIN_VPERM2F128SI256,
30363 IX86_BUILTIN_VBROADCASTSS,
30364 IX86_BUILTIN_VBROADCASTSD256,
30365 IX86_BUILTIN_VBROADCASTSS256,
30366 IX86_BUILTIN_VBROADCASTPD256,
30367 IX86_BUILTIN_VBROADCASTPS256,
30368 IX86_BUILTIN_VINSERTF128PD256,
30369 IX86_BUILTIN_VINSERTF128PS256,
30370 IX86_BUILTIN_VINSERTF128SI256,
30371 IX86_BUILTIN_LOADUPD256,
30372 IX86_BUILTIN_LOADUPS256,
30373 IX86_BUILTIN_STOREUPD256,
30374 IX86_BUILTIN_STOREUPS256,
30375 IX86_BUILTIN_LDDQU256,
30376 IX86_BUILTIN_MOVNTDQ256,
30377 IX86_BUILTIN_MOVNTPD256,
30378 IX86_BUILTIN_MOVNTPS256,
30379 IX86_BUILTIN_LOADDQU256,
30380 IX86_BUILTIN_STOREDQU256,
30381 IX86_BUILTIN_MASKLOADPD,
30382 IX86_BUILTIN_MASKLOADPS,
30383 IX86_BUILTIN_MASKSTOREPD,
30384 IX86_BUILTIN_MASKSTOREPS,
30385 IX86_BUILTIN_MASKLOADPD256,
30386 IX86_BUILTIN_MASKLOADPS256,
30387 IX86_BUILTIN_MASKSTOREPD256,
30388 IX86_BUILTIN_MASKSTOREPS256,
30389 IX86_BUILTIN_MOVSHDUP256,
30390 IX86_BUILTIN_MOVSLDUP256,
30391 IX86_BUILTIN_MOVDDUP256,
30393 IX86_BUILTIN_SQRTPD256,
30394 IX86_BUILTIN_SQRTPS256,
30395 IX86_BUILTIN_SQRTPS_NR256,
30396 IX86_BUILTIN_RSQRTPS256,
30397 IX86_BUILTIN_RSQRTPS_NR256,
30399 IX86_BUILTIN_RCPPS256,
30401 IX86_BUILTIN_ROUNDPD256,
30402 IX86_BUILTIN_ROUNDPS256,
30404 IX86_BUILTIN_FLOORPD256,
30405 IX86_BUILTIN_CEILPD256,
30406 IX86_BUILTIN_TRUNCPD256,
30407 IX86_BUILTIN_RINTPD256,
30408 IX86_BUILTIN_ROUNDPD_AZ256,
30410 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
30411 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
30412 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
30414 IX86_BUILTIN_FLOORPS256,
30415 IX86_BUILTIN_CEILPS256,
30416 IX86_BUILTIN_TRUNCPS256,
30417 IX86_BUILTIN_RINTPS256,
30418 IX86_BUILTIN_ROUNDPS_AZ256,
30420 IX86_BUILTIN_FLOORPS_SFIX256,
30421 IX86_BUILTIN_CEILPS_SFIX256,
30422 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
30424 IX86_BUILTIN_UNPCKHPD256,
30425 IX86_BUILTIN_UNPCKLPD256,
30426 IX86_BUILTIN_UNPCKHPS256,
30427 IX86_BUILTIN_UNPCKLPS256,
30429 IX86_BUILTIN_SI256_SI,
30430 IX86_BUILTIN_PS256_PS,
30431 IX86_BUILTIN_PD256_PD,
30432 IX86_BUILTIN_SI_SI256,
30433 IX86_BUILTIN_PS_PS256,
30434 IX86_BUILTIN_PD_PD256,
30436 IX86_BUILTIN_VTESTZPD,
30437 IX86_BUILTIN_VTESTCPD,
30438 IX86_BUILTIN_VTESTNZCPD,
30439 IX86_BUILTIN_VTESTZPS,
30440 IX86_BUILTIN_VTESTCPS,
30441 IX86_BUILTIN_VTESTNZCPS,
30442 IX86_BUILTIN_VTESTZPD256,
30443 IX86_BUILTIN_VTESTCPD256,
30444 IX86_BUILTIN_VTESTNZCPD256,
30445 IX86_BUILTIN_VTESTZPS256,
30446 IX86_BUILTIN_VTESTCPS256,
30447 IX86_BUILTIN_VTESTNZCPS256,
30448 IX86_BUILTIN_PTESTZ256,
30449 IX86_BUILTIN_PTESTC256,
30450 IX86_BUILTIN_PTESTNZC256,
30452 IX86_BUILTIN_MOVMSKPD256,
30453 IX86_BUILTIN_MOVMSKPS256,
30456 IX86_BUILTIN_MPSADBW256,
30457 IX86_BUILTIN_PABSB256,
30458 IX86_BUILTIN_PABSW256,
30459 IX86_BUILTIN_PABSD256,
30460 IX86_BUILTIN_PACKSSDW256,
30461 IX86_BUILTIN_PACKSSWB256,
30462 IX86_BUILTIN_PACKUSDW256,
30463 IX86_BUILTIN_PACKUSWB256,
30464 IX86_BUILTIN_PADDB256,
30465 IX86_BUILTIN_PADDW256,
30466 IX86_BUILTIN_PADDD256,
30467 IX86_BUILTIN_PADDQ256,
30468 IX86_BUILTIN_PADDSB256,
30469 IX86_BUILTIN_PADDSW256,
30470 IX86_BUILTIN_PADDUSB256,
30471 IX86_BUILTIN_PADDUSW256,
30472 IX86_BUILTIN_PALIGNR256,
30473 IX86_BUILTIN_AND256I,
30474 IX86_BUILTIN_ANDNOT256I,
30475 IX86_BUILTIN_PAVGB256,
30476 IX86_BUILTIN_PAVGW256,
30477 IX86_BUILTIN_PBLENDVB256,
30478 IX86_BUILTIN_PBLENDVW256,
30479 IX86_BUILTIN_PCMPEQB256,
30480 IX86_BUILTIN_PCMPEQW256,
30481 IX86_BUILTIN_PCMPEQD256,
30482 IX86_BUILTIN_PCMPEQQ256,
30483 IX86_BUILTIN_PCMPGTB256,
30484 IX86_BUILTIN_PCMPGTW256,
30485 IX86_BUILTIN_PCMPGTD256,
30486 IX86_BUILTIN_PCMPGTQ256,
30487 IX86_BUILTIN_PHADDW256,
30488 IX86_BUILTIN_PHADDD256,
30489 IX86_BUILTIN_PHADDSW256,
30490 IX86_BUILTIN_PHSUBW256,
30491 IX86_BUILTIN_PHSUBD256,
30492 IX86_BUILTIN_PHSUBSW256,
30493 IX86_BUILTIN_PMADDUBSW256,
30494 IX86_BUILTIN_PMADDWD256,
30495 IX86_BUILTIN_PMAXSB256,
30496 IX86_BUILTIN_PMAXSW256,
30497 IX86_BUILTIN_PMAXSD256,
30498 IX86_BUILTIN_PMAXUB256,
30499 IX86_BUILTIN_PMAXUW256,
30500 IX86_BUILTIN_PMAXUD256,
30501 IX86_BUILTIN_PMINSB256,
30502 IX86_BUILTIN_PMINSW256,
30503 IX86_BUILTIN_PMINSD256,
30504 IX86_BUILTIN_PMINUB256,
30505 IX86_BUILTIN_PMINUW256,
30506 IX86_BUILTIN_PMINUD256,
30507 IX86_BUILTIN_PMOVMSKB256,
30508 IX86_BUILTIN_PMOVSXBW256,
30509 IX86_BUILTIN_PMOVSXBD256,
30510 IX86_BUILTIN_PMOVSXBQ256,
30511 IX86_BUILTIN_PMOVSXWD256,
30512 IX86_BUILTIN_PMOVSXWQ256,
30513 IX86_BUILTIN_PMOVSXDQ256,
30514 IX86_BUILTIN_PMOVZXBW256,
30515 IX86_BUILTIN_PMOVZXBD256,
30516 IX86_BUILTIN_PMOVZXBQ256,
30517 IX86_BUILTIN_PMOVZXWD256,
30518 IX86_BUILTIN_PMOVZXWQ256,
30519 IX86_BUILTIN_PMOVZXDQ256,
30520 IX86_BUILTIN_PMULDQ256,
30521 IX86_BUILTIN_PMULHRSW256,
30522 IX86_BUILTIN_PMULHUW256,
30523 IX86_BUILTIN_PMULHW256,
30524 IX86_BUILTIN_PMULLW256,
30525 IX86_BUILTIN_PMULLD256,
30526 IX86_BUILTIN_PMULUDQ256,
30527 IX86_BUILTIN_POR256,
30528 IX86_BUILTIN_PSADBW256,
30529 IX86_BUILTIN_PSHUFB256,
30530 IX86_BUILTIN_PSHUFD256,
30531 IX86_BUILTIN_PSHUFHW256,
30532 IX86_BUILTIN_PSHUFLW256,
30533 IX86_BUILTIN_PSIGNB256,
30534 IX86_BUILTIN_PSIGNW256,
30535 IX86_BUILTIN_PSIGND256,
30536 IX86_BUILTIN_PSLLDQI256,
30537 IX86_BUILTIN_PSLLWI256,
30538 IX86_BUILTIN_PSLLW256,
30539 IX86_BUILTIN_PSLLDI256,
30540 IX86_BUILTIN_PSLLD256,
30541 IX86_BUILTIN_PSLLQI256,
30542 IX86_BUILTIN_PSLLQ256,
30543 IX86_BUILTIN_PSRAWI256,
30544 IX86_BUILTIN_PSRAW256,
30545 IX86_BUILTIN_PSRADI256,
30546 IX86_BUILTIN_PSRAD256,
30547 IX86_BUILTIN_PSRLDQI256,
30548 IX86_BUILTIN_PSRLWI256,
30549 IX86_BUILTIN_PSRLW256,
30550 IX86_BUILTIN_PSRLDI256,
30551 IX86_BUILTIN_PSRLD256,
30552 IX86_BUILTIN_PSRLQI256,
30553 IX86_BUILTIN_PSRLQ256,
30554 IX86_BUILTIN_PSUBB256,
30555 IX86_BUILTIN_PSUBW256,
30556 IX86_BUILTIN_PSUBD256,
30557 IX86_BUILTIN_PSUBQ256,
30558 IX86_BUILTIN_PSUBSB256,
30559 IX86_BUILTIN_PSUBSW256,
30560 IX86_BUILTIN_PSUBUSB256,
30561 IX86_BUILTIN_PSUBUSW256,
30562 IX86_BUILTIN_PUNPCKHBW256,
30563 IX86_BUILTIN_PUNPCKHWD256,
30564 IX86_BUILTIN_PUNPCKHDQ256,
30565 IX86_BUILTIN_PUNPCKHQDQ256,
30566 IX86_BUILTIN_PUNPCKLBW256,
30567 IX86_BUILTIN_PUNPCKLWD256,
30568 IX86_BUILTIN_PUNPCKLDQ256,
30569 IX86_BUILTIN_PUNPCKLQDQ256,
30570 IX86_BUILTIN_PXOR256,
30571 IX86_BUILTIN_MOVNTDQA256,
30572 IX86_BUILTIN_VBROADCASTSS_PS,
30573 IX86_BUILTIN_VBROADCASTSS_PS256,
30574 IX86_BUILTIN_VBROADCASTSD_PD256,
30575 IX86_BUILTIN_VBROADCASTSI256,
30576 IX86_BUILTIN_PBLENDD256,
30577 IX86_BUILTIN_PBLENDD128,
30578 IX86_BUILTIN_PBROADCASTB256,
30579 IX86_BUILTIN_PBROADCASTW256,
30580 IX86_BUILTIN_PBROADCASTD256,
30581 IX86_BUILTIN_PBROADCASTQ256,
30582 IX86_BUILTIN_PBROADCASTB128,
30583 IX86_BUILTIN_PBROADCASTW128,
30584 IX86_BUILTIN_PBROADCASTD128,
30585 IX86_BUILTIN_PBROADCASTQ128,
30586 IX86_BUILTIN_VPERMVARSI256,
30587 IX86_BUILTIN_VPERMDF256,
30588 IX86_BUILTIN_VPERMVARSF256,
30589 IX86_BUILTIN_VPERMDI256,
30590 IX86_BUILTIN_VPERMTI256,
30591 IX86_BUILTIN_VEXTRACT128I256,
30592 IX86_BUILTIN_VINSERT128I256,
30593 IX86_BUILTIN_MASKLOADD,
30594 IX86_BUILTIN_MASKLOADQ,
30595 IX86_BUILTIN_MASKLOADD256,
30596 IX86_BUILTIN_MASKLOADQ256,
30597 IX86_BUILTIN_MASKSTORED,
30598 IX86_BUILTIN_MASKSTOREQ,
30599 IX86_BUILTIN_MASKSTORED256,
30600 IX86_BUILTIN_MASKSTOREQ256,
30601 IX86_BUILTIN_PSLLVV4DI,
30602 IX86_BUILTIN_PSLLVV2DI,
30603 IX86_BUILTIN_PSLLVV8SI,
30604 IX86_BUILTIN_PSLLVV4SI,
30605 IX86_BUILTIN_PSRAVV8SI,
30606 IX86_BUILTIN_PSRAVV4SI,
30607 IX86_BUILTIN_PSRLVV4DI,
30608 IX86_BUILTIN_PSRLVV2DI,
30609 IX86_BUILTIN_PSRLVV8SI,
30610 IX86_BUILTIN_PSRLVV4SI,
30612 IX86_BUILTIN_GATHERSIV2DF,
30613 IX86_BUILTIN_GATHERSIV4DF,
30614 IX86_BUILTIN_GATHERDIV2DF,
30615 IX86_BUILTIN_GATHERDIV4DF,
30616 IX86_BUILTIN_GATHERSIV4SF,
30617 IX86_BUILTIN_GATHERSIV8SF,
30618 IX86_BUILTIN_GATHERDIV4SF,
30619 IX86_BUILTIN_GATHERDIV8SF,
30620 IX86_BUILTIN_GATHERSIV2DI,
30621 IX86_BUILTIN_GATHERSIV4DI,
30622 IX86_BUILTIN_GATHERDIV2DI,
30623 IX86_BUILTIN_GATHERDIV4DI,
30624 IX86_BUILTIN_GATHERSIV4SI,
30625 IX86_BUILTIN_GATHERSIV8SI,
30626 IX86_BUILTIN_GATHERDIV4SI,
30627 IX86_BUILTIN_GATHERDIV8SI,
30630 IX86_BUILTIN_SI512_SI256,
30631 IX86_BUILTIN_PD512_PD256,
30632 IX86_BUILTIN_PS512_PS256,
30633 IX86_BUILTIN_SI512_SI,
30634 IX86_BUILTIN_PD512_PD,
30635 IX86_BUILTIN_PS512_PS,
30636 IX86_BUILTIN_ADDPD512,
30637 IX86_BUILTIN_ADDPS512,
30638 IX86_BUILTIN_ADDSD_ROUND,
30639 IX86_BUILTIN_ADDSS_ROUND,
30640 IX86_BUILTIN_ALIGND512,
30641 IX86_BUILTIN_ALIGNQ512,
30642 IX86_BUILTIN_BLENDMD512,
30643 IX86_BUILTIN_BLENDMPD512,
30644 IX86_BUILTIN_BLENDMPS512,
30645 IX86_BUILTIN_BLENDMQ512,
30646 IX86_BUILTIN_BROADCASTF32X4_512,
30647 IX86_BUILTIN_BROADCASTF64X4_512,
30648 IX86_BUILTIN_BROADCASTI32X4_512,
30649 IX86_BUILTIN_BROADCASTI64X4_512,
30650 IX86_BUILTIN_BROADCASTSD512,
30651 IX86_BUILTIN_BROADCASTSS512,
30652 IX86_BUILTIN_CMPD512,
30653 IX86_BUILTIN_CMPPD512,
30654 IX86_BUILTIN_CMPPS512,
30655 IX86_BUILTIN_CMPQ512,
30656 IX86_BUILTIN_CMPSD_MASK,
30657 IX86_BUILTIN_CMPSS_MASK,
30658 IX86_BUILTIN_COMIDF,
30659 IX86_BUILTIN_COMISF,
30660 IX86_BUILTIN_COMPRESSPD512,
30661 IX86_BUILTIN_COMPRESSPDSTORE512,
30662 IX86_BUILTIN_COMPRESSPS512,
30663 IX86_BUILTIN_COMPRESSPSSTORE512,
30664 IX86_BUILTIN_CVTDQ2PD512,
30665 IX86_BUILTIN_CVTDQ2PS512,
30666 IX86_BUILTIN_CVTPD2DQ512,
30667 IX86_BUILTIN_CVTPD2PS512,
30668 IX86_BUILTIN_CVTPD2UDQ512,
30669 IX86_BUILTIN_CVTPH2PS512,
30670 IX86_BUILTIN_CVTPS2DQ512,
30671 IX86_BUILTIN_CVTPS2PD512,
30672 IX86_BUILTIN_CVTPS2PH512,
30673 IX86_BUILTIN_CVTPS2UDQ512,
30674 IX86_BUILTIN_CVTSD2SS_ROUND,
30675 IX86_BUILTIN_CVTSI2SD64,
30676 IX86_BUILTIN_CVTSI2SS32,
30677 IX86_BUILTIN_CVTSI2SS64,
30678 IX86_BUILTIN_CVTSS2SD_ROUND,
30679 IX86_BUILTIN_CVTTPD2DQ512,
30680 IX86_BUILTIN_CVTTPD2UDQ512,
30681 IX86_BUILTIN_CVTTPS2DQ512,
30682 IX86_BUILTIN_CVTTPS2UDQ512,
30683 IX86_BUILTIN_CVTUDQ2PD512,
30684 IX86_BUILTIN_CVTUDQ2PS512,
30685 IX86_BUILTIN_CVTUSI2SD32,
30686 IX86_BUILTIN_CVTUSI2SD64,
30687 IX86_BUILTIN_CVTUSI2SS32,
30688 IX86_BUILTIN_CVTUSI2SS64,
30689 IX86_BUILTIN_DIVPD512,
30690 IX86_BUILTIN_DIVPS512,
30691 IX86_BUILTIN_DIVSD_ROUND,
30692 IX86_BUILTIN_DIVSS_ROUND,
30693 IX86_BUILTIN_EXPANDPD512,
30694 IX86_BUILTIN_EXPANDPD512Z,
30695 IX86_BUILTIN_EXPANDPDLOAD512,
30696 IX86_BUILTIN_EXPANDPDLOAD512Z,
30697 IX86_BUILTIN_EXPANDPS512,
30698 IX86_BUILTIN_EXPANDPS512Z,
30699 IX86_BUILTIN_EXPANDPSLOAD512,
30700 IX86_BUILTIN_EXPANDPSLOAD512Z,
30701 IX86_BUILTIN_EXTRACTF32X4,
30702 IX86_BUILTIN_EXTRACTF64X4,
30703 IX86_BUILTIN_EXTRACTI32X4,
30704 IX86_BUILTIN_EXTRACTI64X4,
30705 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30706 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30707 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30708 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30709 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30710 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30711 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30712 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30713 IX86_BUILTIN_GETEXPPD512,
30714 IX86_BUILTIN_GETEXPPS512,
30715 IX86_BUILTIN_GETEXPSD128,
30716 IX86_BUILTIN_GETEXPSS128,
30717 IX86_BUILTIN_GETMANTPD512,
30718 IX86_BUILTIN_GETMANTPS512,
30719 IX86_BUILTIN_GETMANTSD128,
30720 IX86_BUILTIN_GETMANTSS128,
30721 IX86_BUILTIN_INSERTF32X4,
30722 IX86_BUILTIN_INSERTF64X4,
30723 IX86_BUILTIN_INSERTI32X4,
30724 IX86_BUILTIN_INSERTI64X4,
30725 IX86_BUILTIN_LOADAPD512,
30726 IX86_BUILTIN_LOADAPS512,
30727 IX86_BUILTIN_LOADDQUDI512,
30728 IX86_BUILTIN_LOADDQUSI512,
30729 IX86_BUILTIN_LOADUPD512,
30730 IX86_BUILTIN_LOADUPS512,
30731 IX86_BUILTIN_MAXPD512,
30732 IX86_BUILTIN_MAXPS512,
30733 IX86_BUILTIN_MAXSD_ROUND,
30734 IX86_BUILTIN_MAXSS_ROUND,
30735 IX86_BUILTIN_MINPD512,
30736 IX86_BUILTIN_MINPS512,
30737 IX86_BUILTIN_MINSD_ROUND,
30738 IX86_BUILTIN_MINSS_ROUND,
30739 IX86_BUILTIN_MOVAPD512,
30740 IX86_BUILTIN_MOVAPS512,
30741 IX86_BUILTIN_MOVDDUP512,
30742 IX86_BUILTIN_MOVDQA32LOAD512,
30743 IX86_BUILTIN_MOVDQA32STORE512,
30744 IX86_BUILTIN_MOVDQA32_512,
30745 IX86_BUILTIN_MOVDQA64LOAD512,
30746 IX86_BUILTIN_MOVDQA64STORE512,
30747 IX86_BUILTIN_MOVDQA64_512,
30748 IX86_BUILTIN_MOVNTDQ512,
30749 IX86_BUILTIN_MOVNTDQA512,
30750 IX86_BUILTIN_MOVNTPD512,
30751 IX86_BUILTIN_MOVNTPS512,
30752 IX86_BUILTIN_MOVSHDUP512,
30753 IX86_BUILTIN_MOVSLDUP512,
30754 IX86_BUILTIN_MULPD512,
30755 IX86_BUILTIN_MULPS512,
30756 IX86_BUILTIN_MULSD_ROUND,
30757 IX86_BUILTIN_MULSS_ROUND,
30758 IX86_BUILTIN_PABSD512,
30759 IX86_BUILTIN_PABSQ512,
30760 IX86_BUILTIN_PADDD512,
30761 IX86_BUILTIN_PADDQ512,
30762 IX86_BUILTIN_PANDD512,
30763 IX86_BUILTIN_PANDND512,
30764 IX86_BUILTIN_PANDNQ512,
30765 IX86_BUILTIN_PANDQ512,
30766 IX86_BUILTIN_PBROADCASTD512,
30767 IX86_BUILTIN_PBROADCASTD512_GPR,
30768 IX86_BUILTIN_PBROADCASTMB512,
30769 IX86_BUILTIN_PBROADCASTMW512,
30770 IX86_BUILTIN_PBROADCASTQ512,
30771 IX86_BUILTIN_PBROADCASTQ512_GPR,
30772 IX86_BUILTIN_PCMPEQD512_MASK,
30773 IX86_BUILTIN_PCMPEQQ512_MASK,
30774 IX86_BUILTIN_PCMPGTD512_MASK,
30775 IX86_BUILTIN_PCMPGTQ512_MASK,
30776 IX86_BUILTIN_PCOMPRESSD512,
30777 IX86_BUILTIN_PCOMPRESSDSTORE512,
30778 IX86_BUILTIN_PCOMPRESSQ512,
30779 IX86_BUILTIN_PCOMPRESSQSTORE512,
30780 IX86_BUILTIN_PEXPANDD512,
30781 IX86_BUILTIN_PEXPANDD512Z,
30782 IX86_BUILTIN_PEXPANDDLOAD512,
30783 IX86_BUILTIN_PEXPANDDLOAD512Z,
30784 IX86_BUILTIN_PEXPANDQ512,
30785 IX86_BUILTIN_PEXPANDQ512Z,
30786 IX86_BUILTIN_PEXPANDQLOAD512,
30787 IX86_BUILTIN_PEXPANDQLOAD512Z,
30788 IX86_BUILTIN_PMAXSD512,
30789 IX86_BUILTIN_PMAXSQ512,
30790 IX86_BUILTIN_PMAXUD512,
30791 IX86_BUILTIN_PMAXUQ512,
30792 IX86_BUILTIN_PMINSD512,
30793 IX86_BUILTIN_PMINSQ512,
30794 IX86_BUILTIN_PMINUD512,
30795 IX86_BUILTIN_PMINUQ512,
30796 IX86_BUILTIN_PMOVDB512,
30797 IX86_BUILTIN_PMOVDB512_MEM,
30798 IX86_BUILTIN_PMOVDW512,
30799 IX86_BUILTIN_PMOVDW512_MEM,
30800 IX86_BUILTIN_PMOVQB512,
30801 IX86_BUILTIN_PMOVQB512_MEM,
30802 IX86_BUILTIN_PMOVQD512,
30803 IX86_BUILTIN_PMOVQD512_MEM,
30804 IX86_BUILTIN_PMOVQW512,
30805 IX86_BUILTIN_PMOVQW512_MEM,
30806 IX86_BUILTIN_PMOVSDB512,
30807 IX86_BUILTIN_PMOVSDB512_MEM,
30808 IX86_BUILTIN_PMOVSDW512,
30809 IX86_BUILTIN_PMOVSDW512_MEM,
30810 IX86_BUILTIN_PMOVSQB512,
30811 IX86_BUILTIN_PMOVSQB512_MEM,
30812 IX86_BUILTIN_PMOVSQD512,
30813 IX86_BUILTIN_PMOVSQD512_MEM,
30814 IX86_BUILTIN_PMOVSQW512,
30815 IX86_BUILTIN_PMOVSQW512_MEM,
30816 IX86_BUILTIN_PMOVSXBD512,
30817 IX86_BUILTIN_PMOVSXBQ512,
30818 IX86_BUILTIN_PMOVSXDQ512,
30819 IX86_BUILTIN_PMOVSXWD512,
30820 IX86_BUILTIN_PMOVSXWQ512,
30821 IX86_BUILTIN_PMOVUSDB512,
30822 IX86_BUILTIN_PMOVUSDB512_MEM,
30823 IX86_BUILTIN_PMOVUSDW512,
30824 IX86_BUILTIN_PMOVUSDW512_MEM,
30825 IX86_BUILTIN_PMOVUSQB512,
30826 IX86_BUILTIN_PMOVUSQB512_MEM,
30827 IX86_BUILTIN_PMOVUSQD512,
30828 IX86_BUILTIN_PMOVUSQD512_MEM,
30829 IX86_BUILTIN_PMOVUSQW512,
30830 IX86_BUILTIN_PMOVUSQW512_MEM,
30831 IX86_BUILTIN_PMOVZXBD512,
30832 IX86_BUILTIN_PMOVZXBQ512,
30833 IX86_BUILTIN_PMOVZXDQ512,
30834 IX86_BUILTIN_PMOVZXWD512,
30835 IX86_BUILTIN_PMOVZXWQ512,
30836 IX86_BUILTIN_PMULDQ512,
30837 IX86_BUILTIN_PMULLD512,
30838 IX86_BUILTIN_PMULUDQ512,
30839 IX86_BUILTIN_PORD512,
30840 IX86_BUILTIN_PORQ512,
30841 IX86_BUILTIN_PROLD512,
30842 IX86_BUILTIN_PROLQ512,
30843 IX86_BUILTIN_PROLVD512,
30844 IX86_BUILTIN_PROLVQ512,
30845 IX86_BUILTIN_PRORD512,
30846 IX86_BUILTIN_PRORQ512,
30847 IX86_BUILTIN_PRORVD512,
30848 IX86_BUILTIN_PRORVQ512,
30849 IX86_BUILTIN_PSHUFD512,
30850 IX86_BUILTIN_PSLLD512,
30851 IX86_BUILTIN_PSLLDI512,
30852 IX86_BUILTIN_PSLLQ512,
30853 IX86_BUILTIN_PSLLQI512,
30854 IX86_BUILTIN_PSLLVV16SI,
30855 IX86_BUILTIN_PSLLVV8DI,
30856 IX86_BUILTIN_PSRAD512,
30857 IX86_BUILTIN_PSRADI512,
30858 IX86_BUILTIN_PSRAQ512,
30859 IX86_BUILTIN_PSRAQI512,
30860 IX86_BUILTIN_PSRAVV16SI,
30861 IX86_BUILTIN_PSRAVV8DI,
30862 IX86_BUILTIN_PSRLD512,
30863 IX86_BUILTIN_PSRLDI512,
30864 IX86_BUILTIN_PSRLQ512,
30865 IX86_BUILTIN_PSRLQI512,
30866 IX86_BUILTIN_PSRLVV16SI,
30867 IX86_BUILTIN_PSRLVV8DI,
30868 IX86_BUILTIN_PSUBD512,
30869 IX86_BUILTIN_PSUBQ512,
30870 IX86_BUILTIN_PTESTMD512,
30871 IX86_BUILTIN_PTESTMQ512,
30872 IX86_BUILTIN_PTESTNMD512,
30873 IX86_BUILTIN_PTESTNMQ512,
30874 IX86_BUILTIN_PUNPCKHDQ512,
30875 IX86_BUILTIN_PUNPCKHQDQ512,
30876 IX86_BUILTIN_PUNPCKLDQ512,
30877 IX86_BUILTIN_PUNPCKLQDQ512,
30878 IX86_BUILTIN_PXORD512,
30879 IX86_BUILTIN_PXORQ512,
30880 IX86_BUILTIN_RCP14PD512,
30881 IX86_BUILTIN_RCP14PS512,
30882 IX86_BUILTIN_RCP14SD,
30883 IX86_BUILTIN_RCP14SS,
30884 IX86_BUILTIN_RNDSCALEPD,
30885 IX86_BUILTIN_RNDSCALEPS,
30886 IX86_BUILTIN_RNDSCALESD,
30887 IX86_BUILTIN_RNDSCALESS,
30888 IX86_BUILTIN_RSQRT14PD512,
30889 IX86_BUILTIN_RSQRT14PS512,
30890 IX86_BUILTIN_RSQRT14SD,
30891 IX86_BUILTIN_RSQRT14SS,
30892 IX86_BUILTIN_SCALEFPD512,
30893 IX86_BUILTIN_SCALEFPS512,
30894 IX86_BUILTIN_SCALEFSD,
30895 IX86_BUILTIN_SCALEFSS,
30896 IX86_BUILTIN_SHUFPD512,
30897 IX86_BUILTIN_SHUFPS512,
30898 IX86_BUILTIN_SHUF_F32x4,
30899 IX86_BUILTIN_SHUF_F64x2,
30900 IX86_BUILTIN_SHUF_I32x4,
30901 IX86_BUILTIN_SHUF_I64x2,
30902 IX86_BUILTIN_SQRTPD512,
30903 IX86_BUILTIN_SQRTPD512_MASK,
30904 IX86_BUILTIN_SQRTPS512_MASK,
30905 IX86_BUILTIN_SQRTPS_NR512,
30906 IX86_BUILTIN_SQRTSD_ROUND,
30907 IX86_BUILTIN_SQRTSS_ROUND,
30908 IX86_BUILTIN_STOREAPD512,
30909 IX86_BUILTIN_STOREAPS512,
30910 IX86_BUILTIN_STOREDQUDI512,
30911 IX86_BUILTIN_STOREDQUSI512,
30912 IX86_BUILTIN_STOREUPD512,
30913 IX86_BUILTIN_STOREUPS512,
30914 IX86_BUILTIN_SUBPD512,
30915 IX86_BUILTIN_SUBPS512,
30916 IX86_BUILTIN_SUBSD_ROUND,
30917 IX86_BUILTIN_SUBSS_ROUND,
30918 IX86_BUILTIN_UCMPD512,
30919 IX86_BUILTIN_UCMPQ512,
30920 IX86_BUILTIN_UNPCKHPD512,
30921 IX86_BUILTIN_UNPCKHPS512,
30922 IX86_BUILTIN_UNPCKLPD512,
30923 IX86_BUILTIN_UNPCKLPS512,
30924 IX86_BUILTIN_VCVTSD2SI32,
30925 IX86_BUILTIN_VCVTSD2SI64,
30926 IX86_BUILTIN_VCVTSD2USI32,
30927 IX86_BUILTIN_VCVTSD2USI64,
30928 IX86_BUILTIN_VCVTSS2SI32,
30929 IX86_BUILTIN_VCVTSS2SI64,
30930 IX86_BUILTIN_VCVTSS2USI32,
30931 IX86_BUILTIN_VCVTSS2USI64,
30932 IX86_BUILTIN_VCVTTSD2SI32,
30933 IX86_BUILTIN_VCVTTSD2SI64,
30934 IX86_BUILTIN_VCVTTSD2USI32,
30935 IX86_BUILTIN_VCVTTSD2USI64,
30936 IX86_BUILTIN_VCVTTSS2SI32,
30937 IX86_BUILTIN_VCVTTSS2SI64,
30938 IX86_BUILTIN_VCVTTSS2USI32,
30939 IX86_BUILTIN_VCVTTSS2USI64,
30940 IX86_BUILTIN_VFMADDPD512_MASK,
30941 IX86_BUILTIN_VFMADDPD512_MASK3,
30942 IX86_BUILTIN_VFMADDPD512_MASKZ,
30943 IX86_BUILTIN_VFMADDPS512_MASK,
30944 IX86_BUILTIN_VFMADDPS512_MASK3,
30945 IX86_BUILTIN_VFMADDPS512_MASKZ,
30946 IX86_BUILTIN_VFMADDSD3_ROUND,
30947 IX86_BUILTIN_VFMADDSS3_ROUND,
30948 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30949 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30950 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30951 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30952 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30953 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30954 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30955 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30956 IX86_BUILTIN_VFMSUBPD512_MASK3,
30957 IX86_BUILTIN_VFMSUBPS512_MASK3,
30958 IX86_BUILTIN_VFMSUBSD3_MASK3,
30959 IX86_BUILTIN_VFMSUBSS3_MASK3,
30960 IX86_BUILTIN_VFNMADDPD512_MASK,
30961 IX86_BUILTIN_VFNMADDPS512_MASK,
30962 IX86_BUILTIN_VFNMSUBPD512_MASK,
30963 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30964 IX86_BUILTIN_VFNMSUBPS512_MASK,
30965 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30966 IX86_BUILTIN_VPCLZCNTD512,
30967 IX86_BUILTIN_VPCLZCNTQ512,
30968 IX86_BUILTIN_VPCONFLICTD512,
30969 IX86_BUILTIN_VPCONFLICTQ512,
30970 IX86_BUILTIN_VPERMDF512,
30971 IX86_BUILTIN_VPERMDI512,
30972 IX86_BUILTIN_VPERMI2VARD512,
30973 IX86_BUILTIN_VPERMI2VARPD512,
30974 IX86_BUILTIN_VPERMI2VARPS512,
30975 IX86_BUILTIN_VPERMI2VARQ512,
30976 IX86_BUILTIN_VPERMILPD512,
30977 IX86_BUILTIN_VPERMILPS512,
30978 IX86_BUILTIN_VPERMILVARPD512,
30979 IX86_BUILTIN_VPERMILVARPS512,
30980 IX86_BUILTIN_VPERMT2VARD512,
30981 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30982 IX86_BUILTIN_VPERMT2VARPD512,
30983 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
30984 IX86_BUILTIN_VPERMT2VARPS512,
30985 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
30986 IX86_BUILTIN_VPERMT2VARQ512,
30987 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
30988 IX86_BUILTIN_VPERMVARDF512,
30989 IX86_BUILTIN_VPERMVARDI512,
30990 IX86_BUILTIN_VPERMVARSF512,
30991 IX86_BUILTIN_VPERMVARSI512,
30992 IX86_BUILTIN_VTERNLOGD512_MASK,
30993 IX86_BUILTIN_VTERNLOGD512_MASKZ,
30994 IX86_BUILTIN_VTERNLOGQ512_MASK,
30995 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
30997 /* Mask arithmetic operations */
30998 IX86_BUILTIN_KAND16,
30999 IX86_BUILTIN_KANDN16,
31000 IX86_BUILTIN_KNOT16,
31001 IX86_BUILTIN_KOR16,
31002 IX86_BUILTIN_KORTESTC16,
31003 IX86_BUILTIN_KORTESTZ16,
31004 IX86_BUILTIN_KUNPCKBW,
31005 IX86_BUILTIN_KXNOR16,
31006 IX86_BUILTIN_KXOR16,
31007 IX86_BUILTIN_KMOV16,
31010 IX86_BUILTIN_PMOVUSQD256_MEM,
31011 IX86_BUILTIN_PMOVUSQD128_MEM,
31012 IX86_BUILTIN_PMOVSQD256_MEM,
31013 IX86_BUILTIN_PMOVSQD128_MEM,
31014 IX86_BUILTIN_PMOVQD256_MEM,
31015 IX86_BUILTIN_PMOVQD128_MEM,
31016 IX86_BUILTIN_PMOVUSQW256_MEM,
31017 IX86_BUILTIN_PMOVUSQW128_MEM,
31018 IX86_BUILTIN_PMOVSQW256_MEM,
31019 IX86_BUILTIN_PMOVSQW128_MEM,
31020 IX86_BUILTIN_PMOVQW256_MEM,
31021 IX86_BUILTIN_PMOVQW128_MEM,
31022 IX86_BUILTIN_PMOVUSQB256_MEM,
31023 IX86_BUILTIN_PMOVUSQB128_MEM,
31024 IX86_BUILTIN_PMOVSQB256_MEM,
31025 IX86_BUILTIN_PMOVSQB128_MEM,
31026 IX86_BUILTIN_PMOVQB256_MEM,
31027 IX86_BUILTIN_PMOVQB128_MEM,
31028 IX86_BUILTIN_PMOVUSDW256_MEM,
31029 IX86_BUILTIN_PMOVUSDW128_MEM,
31030 IX86_BUILTIN_PMOVSDW256_MEM,
31031 IX86_BUILTIN_PMOVSDW128_MEM,
31032 IX86_BUILTIN_PMOVDW256_MEM,
31033 IX86_BUILTIN_PMOVDW128_MEM,
31034 IX86_BUILTIN_PMOVUSDB256_MEM,
31035 IX86_BUILTIN_PMOVUSDB128_MEM,
31036 IX86_BUILTIN_PMOVSDB256_MEM,
31037 IX86_BUILTIN_PMOVSDB128_MEM,
31038 IX86_BUILTIN_PMOVDB256_MEM,
31039 IX86_BUILTIN_PMOVDB128_MEM,
31040 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
31041 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
31042 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
31043 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
31044 IX86_BUILTIN_MOVDQA64STORE256_MASK,
31045 IX86_BUILTIN_MOVDQA64STORE128_MASK,
31046 IX86_BUILTIN_MOVDQA32STORE256_MASK,
31047 IX86_BUILTIN_MOVDQA32STORE128_MASK,
31048 IX86_BUILTIN_LOADAPD256_MASK,
31049 IX86_BUILTIN_LOADAPD128_MASK,
31050 IX86_BUILTIN_LOADAPS256_MASK,
31051 IX86_BUILTIN_LOADAPS128_MASK,
31052 IX86_BUILTIN_STOREAPD256_MASK,
31053 IX86_BUILTIN_STOREAPD128_MASK,
31054 IX86_BUILTIN_STOREAPS256_MASK,
31055 IX86_BUILTIN_STOREAPS128_MASK,
31056 IX86_BUILTIN_LOADUPD256_MASK,
31057 IX86_BUILTIN_LOADUPD128_MASK,
31058 IX86_BUILTIN_LOADUPS256_MASK,
31059 IX86_BUILTIN_LOADUPS128_MASK,
31060 IX86_BUILTIN_STOREUPD256_MASK,
31061 IX86_BUILTIN_STOREUPD128_MASK,
31062 IX86_BUILTIN_STOREUPS256_MASK,
31063 IX86_BUILTIN_STOREUPS128_MASK,
31064 IX86_BUILTIN_LOADDQUDI256_MASK,
31065 IX86_BUILTIN_LOADDQUDI128_MASK,
31066 IX86_BUILTIN_LOADDQUSI256_MASK,
31067 IX86_BUILTIN_LOADDQUSI128_MASK,
31068 IX86_BUILTIN_LOADDQUHI256_MASK,
31069 IX86_BUILTIN_LOADDQUHI128_MASK,
31070 IX86_BUILTIN_LOADDQUQI256_MASK,
31071 IX86_BUILTIN_LOADDQUQI128_MASK,
31072 IX86_BUILTIN_STOREDQUDI256_MASK,
31073 IX86_BUILTIN_STOREDQUDI128_MASK,
31074 IX86_BUILTIN_STOREDQUSI256_MASK,
31075 IX86_BUILTIN_STOREDQUSI128_MASK,
31076 IX86_BUILTIN_STOREDQUHI256_MASK,
31077 IX86_BUILTIN_STOREDQUHI128_MASK,
31078 IX86_BUILTIN_STOREDQUQI256_MASK,
31079 IX86_BUILTIN_STOREDQUQI128_MASK,
31080 IX86_BUILTIN_COMPRESSPDSTORE256,
31081 IX86_BUILTIN_COMPRESSPDSTORE128,
31082 IX86_BUILTIN_COMPRESSPSSTORE256,
31083 IX86_BUILTIN_COMPRESSPSSTORE128,
31084 IX86_BUILTIN_PCOMPRESSQSTORE256,
31085 IX86_BUILTIN_PCOMPRESSQSTORE128,
31086 IX86_BUILTIN_PCOMPRESSDSTORE256,
31087 IX86_BUILTIN_PCOMPRESSDSTORE128,
31088 IX86_BUILTIN_EXPANDPDLOAD256,
31089 IX86_BUILTIN_EXPANDPDLOAD128,
31090 IX86_BUILTIN_EXPANDPSLOAD256,
31091 IX86_BUILTIN_EXPANDPSLOAD128,
31092 IX86_BUILTIN_PEXPANDQLOAD256,
31093 IX86_BUILTIN_PEXPANDQLOAD128,
31094 IX86_BUILTIN_PEXPANDDLOAD256,
31095 IX86_BUILTIN_PEXPANDDLOAD128,
31096 IX86_BUILTIN_EXPANDPDLOAD256Z,
31097 IX86_BUILTIN_EXPANDPDLOAD128Z,
31098 IX86_BUILTIN_EXPANDPSLOAD256Z,
31099 IX86_BUILTIN_EXPANDPSLOAD128Z,
31100 IX86_BUILTIN_PEXPANDQLOAD256Z,
31101 IX86_BUILTIN_PEXPANDQLOAD128Z,
31102 IX86_BUILTIN_PEXPANDDLOAD256Z,
31103 IX86_BUILTIN_PEXPANDDLOAD128Z,
31104 IX86_BUILTIN_PALIGNR256_MASK,
31105 IX86_BUILTIN_PALIGNR128_MASK,
31106 IX86_BUILTIN_MOVDQA64_256_MASK,
31107 IX86_BUILTIN_MOVDQA64_128_MASK,
31108 IX86_BUILTIN_MOVDQA32_256_MASK,
31109 IX86_BUILTIN_MOVDQA32_128_MASK,
31110 IX86_BUILTIN_MOVAPD256_MASK,
31111 IX86_BUILTIN_MOVAPD128_MASK,
31112 IX86_BUILTIN_MOVAPS256_MASK,
31113 IX86_BUILTIN_MOVAPS128_MASK,
31114 IX86_BUILTIN_MOVDQUHI256_MASK,
31115 IX86_BUILTIN_MOVDQUHI128_MASK,
31116 IX86_BUILTIN_MOVDQUQI256_MASK,
31117 IX86_BUILTIN_MOVDQUQI128_MASK,
31118 IX86_BUILTIN_MINPS128_MASK,
31119 IX86_BUILTIN_MAXPS128_MASK,
31120 IX86_BUILTIN_MINPD128_MASK,
31121 IX86_BUILTIN_MAXPD128_MASK,
31122 IX86_BUILTIN_MAXPD256_MASK,
31123 IX86_BUILTIN_MAXPS256_MASK,
31124 IX86_BUILTIN_MINPD256_MASK,
31125 IX86_BUILTIN_MINPS256_MASK,
31126 IX86_BUILTIN_MULPS128_MASK,
31127 IX86_BUILTIN_DIVPS128_MASK,
31128 IX86_BUILTIN_MULPD128_MASK,
31129 IX86_BUILTIN_DIVPD128_MASK,
31130 IX86_BUILTIN_DIVPD256_MASK,
31131 IX86_BUILTIN_DIVPS256_MASK,
31132 IX86_BUILTIN_MULPD256_MASK,
31133 IX86_BUILTIN_MULPS256_MASK,
31134 IX86_BUILTIN_ADDPD128_MASK,
31135 IX86_BUILTIN_ADDPD256_MASK,
31136 IX86_BUILTIN_ADDPS128_MASK,
31137 IX86_BUILTIN_ADDPS256_MASK,
31138 IX86_BUILTIN_SUBPD128_MASK,
31139 IX86_BUILTIN_SUBPD256_MASK,
31140 IX86_BUILTIN_SUBPS128_MASK,
31141 IX86_BUILTIN_SUBPS256_MASK,
31142 IX86_BUILTIN_XORPD256_MASK,
31143 IX86_BUILTIN_XORPD128_MASK,
31144 IX86_BUILTIN_XORPS256_MASK,
31145 IX86_BUILTIN_XORPS128_MASK,
31146 IX86_BUILTIN_ORPD256_MASK,
31147 IX86_BUILTIN_ORPD128_MASK,
31148 IX86_BUILTIN_ORPS256_MASK,
31149 IX86_BUILTIN_ORPS128_MASK,
31150 IX86_BUILTIN_BROADCASTF32x2_256,
31151 IX86_BUILTIN_BROADCASTI32x2_256,
31152 IX86_BUILTIN_BROADCASTI32x2_128,
31153 IX86_BUILTIN_BROADCASTF64X2_256,
31154 IX86_BUILTIN_BROADCASTI64X2_256,
31155 IX86_BUILTIN_BROADCASTF32X4_256,
31156 IX86_BUILTIN_BROADCASTI32X4_256,
31157 IX86_BUILTIN_EXTRACTF32X4_256,
31158 IX86_BUILTIN_EXTRACTI32X4_256,
31159 IX86_BUILTIN_DBPSADBW256,
31160 IX86_BUILTIN_DBPSADBW128,
31161 IX86_BUILTIN_CVTTPD2QQ256,
31162 IX86_BUILTIN_CVTTPD2QQ128,
31163 IX86_BUILTIN_CVTTPD2UQQ256,
31164 IX86_BUILTIN_CVTTPD2UQQ128,
31165 IX86_BUILTIN_CVTPD2QQ256,
31166 IX86_BUILTIN_CVTPD2QQ128,
31167 IX86_BUILTIN_CVTPD2UQQ256,
31168 IX86_BUILTIN_CVTPD2UQQ128,
31169 IX86_BUILTIN_CVTPD2UDQ256_MASK,
31170 IX86_BUILTIN_CVTPD2UDQ128_MASK,
31171 IX86_BUILTIN_CVTTPS2QQ256,
31172 IX86_BUILTIN_CVTTPS2QQ128,
31173 IX86_BUILTIN_CVTTPS2UQQ256,
31174 IX86_BUILTIN_CVTTPS2UQQ128,
31175 IX86_BUILTIN_CVTTPS2DQ256_MASK,
31176 IX86_BUILTIN_CVTTPS2DQ128_MASK,
31177 IX86_BUILTIN_CVTTPS2UDQ256,
31178 IX86_BUILTIN_CVTTPS2UDQ128,
31179 IX86_BUILTIN_CVTTPD2DQ256_MASK,
31180 IX86_BUILTIN_CVTTPD2DQ128_MASK,
31181 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
31182 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
31183 IX86_BUILTIN_CVTPD2DQ256_MASK,
31184 IX86_BUILTIN_CVTPD2DQ128_MASK,
31185 IX86_BUILTIN_CVTDQ2PD256_MASK,
31186 IX86_BUILTIN_CVTDQ2PD128_MASK,
31187 IX86_BUILTIN_CVTUDQ2PD256_MASK,
31188 IX86_BUILTIN_CVTUDQ2PD128_MASK,
31189 IX86_BUILTIN_CVTDQ2PS256_MASK,
31190 IX86_BUILTIN_CVTDQ2PS128_MASK,
31191 IX86_BUILTIN_CVTUDQ2PS256_MASK,
31192 IX86_BUILTIN_CVTUDQ2PS128_MASK,
31193 IX86_BUILTIN_CVTPS2PD256_MASK,
31194 IX86_BUILTIN_CVTPS2PD128_MASK,
31195 IX86_BUILTIN_PBROADCASTB256_MASK,
31196 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
31197 IX86_BUILTIN_PBROADCASTB128_MASK,
31198 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
31199 IX86_BUILTIN_PBROADCASTW256_MASK,
31200 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
31201 IX86_BUILTIN_PBROADCASTW128_MASK,
31202 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
31203 IX86_BUILTIN_PBROADCASTD256_MASK,
31204 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
31205 IX86_BUILTIN_PBROADCASTD128_MASK,
31206 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
31207 IX86_BUILTIN_PBROADCASTQ256_MASK,
31208 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
31209 IX86_BUILTIN_PBROADCASTQ128_MASK,
31210 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
31211 IX86_BUILTIN_BROADCASTSS256,
31212 IX86_BUILTIN_BROADCASTSS128,
31213 IX86_BUILTIN_BROADCASTSD256,
31214 IX86_BUILTIN_EXTRACTF64X2_256,
31215 IX86_BUILTIN_EXTRACTI64X2_256,
31216 IX86_BUILTIN_INSERTF32X4_256,
31217 IX86_BUILTIN_INSERTI32X4_256,
31218 IX86_BUILTIN_PMOVSXBW256_MASK,
31219 IX86_BUILTIN_PMOVSXBW128_MASK,
31220 IX86_BUILTIN_PMOVSXBD256_MASK,
31221 IX86_BUILTIN_PMOVSXBD128_MASK,
31222 IX86_BUILTIN_PMOVSXBQ256_MASK,
31223 IX86_BUILTIN_PMOVSXBQ128_MASK,
31224 IX86_BUILTIN_PMOVSXWD256_MASK,
31225 IX86_BUILTIN_PMOVSXWD128_MASK,
31226 IX86_BUILTIN_PMOVSXWQ256_MASK,
31227 IX86_BUILTIN_PMOVSXWQ128_MASK,
31228 IX86_BUILTIN_PMOVSXDQ256_MASK,
31229 IX86_BUILTIN_PMOVSXDQ128_MASK,
31230 IX86_BUILTIN_PMOVZXBW256_MASK,
31231 IX86_BUILTIN_PMOVZXBW128_MASK,
31232 IX86_BUILTIN_PMOVZXBD256_MASK,
31233 IX86_BUILTIN_PMOVZXBD128_MASK,
31234 IX86_BUILTIN_PMOVZXBQ256_MASK,
31235 IX86_BUILTIN_PMOVZXBQ128_MASK,
31236 IX86_BUILTIN_PMOVZXWD256_MASK,
31237 IX86_BUILTIN_PMOVZXWD128_MASK,
31238 IX86_BUILTIN_PMOVZXWQ256_MASK,
31239 IX86_BUILTIN_PMOVZXWQ128_MASK,
31240 IX86_BUILTIN_PMOVZXDQ256_MASK,
31241 IX86_BUILTIN_PMOVZXDQ128_MASK,
31242 IX86_BUILTIN_REDUCEPD256_MASK,
31243 IX86_BUILTIN_REDUCEPD128_MASK,
31244 IX86_BUILTIN_REDUCEPS256_MASK,
31245 IX86_BUILTIN_REDUCEPS128_MASK,
31246 IX86_BUILTIN_REDUCESD_MASK,
31247 IX86_BUILTIN_REDUCESS_MASK,
31248 IX86_BUILTIN_VPERMVARHI256_MASK,
31249 IX86_BUILTIN_VPERMVARHI128_MASK,
31250 IX86_BUILTIN_VPERMT2VARHI256,
31251 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
31252 IX86_BUILTIN_VPERMT2VARHI128,
31253 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
31254 IX86_BUILTIN_VPERMI2VARHI256,
31255 IX86_BUILTIN_VPERMI2VARHI128,
31256 IX86_BUILTIN_RCP14PD256,
31257 IX86_BUILTIN_RCP14PD128,
31258 IX86_BUILTIN_RCP14PS256,
31259 IX86_BUILTIN_RCP14PS128,
31260 IX86_BUILTIN_RSQRT14PD256_MASK,
31261 IX86_BUILTIN_RSQRT14PD128_MASK,
31262 IX86_BUILTIN_RSQRT14PS256_MASK,
31263 IX86_BUILTIN_RSQRT14PS128_MASK,
31264 IX86_BUILTIN_SQRTPD256_MASK,
31265 IX86_BUILTIN_SQRTPD128_MASK,
31266 IX86_BUILTIN_SQRTPS256_MASK,
31267 IX86_BUILTIN_SQRTPS128_MASK,
31268 IX86_BUILTIN_PADDB128_MASK,
31269 IX86_BUILTIN_PADDW128_MASK,
31270 IX86_BUILTIN_PADDD128_MASK,
31271 IX86_BUILTIN_PADDQ128_MASK,
31272 IX86_BUILTIN_PSUBB128_MASK,
31273 IX86_BUILTIN_PSUBW128_MASK,
31274 IX86_BUILTIN_PSUBD128_MASK,
31275 IX86_BUILTIN_PSUBQ128_MASK,
31276 IX86_BUILTIN_PADDSB128_MASK,
31277 IX86_BUILTIN_PADDSW128_MASK,
31278 IX86_BUILTIN_PSUBSB128_MASK,
31279 IX86_BUILTIN_PSUBSW128_MASK,
31280 IX86_BUILTIN_PADDUSB128_MASK,
31281 IX86_BUILTIN_PADDUSW128_MASK,
31282 IX86_BUILTIN_PSUBUSB128_MASK,
31283 IX86_BUILTIN_PSUBUSW128_MASK,
31284 IX86_BUILTIN_PADDB256_MASK,
31285 IX86_BUILTIN_PADDW256_MASK,
31286 IX86_BUILTIN_PADDD256_MASK,
31287 IX86_BUILTIN_PADDQ256_MASK,
31288 IX86_BUILTIN_PADDSB256_MASK,
31289 IX86_BUILTIN_PADDSW256_MASK,
31290 IX86_BUILTIN_PADDUSB256_MASK,
31291 IX86_BUILTIN_PADDUSW256_MASK,
31292 IX86_BUILTIN_PSUBB256_MASK,
31293 IX86_BUILTIN_PSUBW256_MASK,
31294 IX86_BUILTIN_PSUBD256_MASK,
31295 IX86_BUILTIN_PSUBQ256_MASK,
31296 IX86_BUILTIN_PSUBSB256_MASK,
31297 IX86_BUILTIN_PSUBSW256_MASK,
31298 IX86_BUILTIN_PSUBUSB256_MASK,
31299 IX86_BUILTIN_PSUBUSW256_MASK,
31300 IX86_BUILTIN_SHUF_F64x2_256,
31301 IX86_BUILTIN_SHUF_I64x2_256,
31302 IX86_BUILTIN_SHUF_I32x4_256,
31303 IX86_BUILTIN_SHUF_F32x4_256,
31304 IX86_BUILTIN_PMOVWB128,
31305 IX86_BUILTIN_PMOVWB256,
31306 IX86_BUILTIN_PMOVSWB128,
31307 IX86_BUILTIN_PMOVSWB256,
31308 IX86_BUILTIN_PMOVUSWB128,
31309 IX86_BUILTIN_PMOVUSWB256,
31310 IX86_BUILTIN_PMOVDB128,
31311 IX86_BUILTIN_PMOVDB256,
31312 IX86_BUILTIN_PMOVSDB128,
31313 IX86_BUILTIN_PMOVSDB256,
31314 IX86_BUILTIN_PMOVUSDB128,
31315 IX86_BUILTIN_PMOVUSDB256,
31316 IX86_BUILTIN_PMOVDW128,
31317 IX86_BUILTIN_PMOVDW256,
31318 IX86_BUILTIN_PMOVSDW128,
31319 IX86_BUILTIN_PMOVSDW256,
31320 IX86_BUILTIN_PMOVUSDW128,
31321 IX86_BUILTIN_PMOVUSDW256,
31322 IX86_BUILTIN_PMOVQB128,
31323 IX86_BUILTIN_PMOVQB256,
31324 IX86_BUILTIN_PMOVSQB128,
31325 IX86_BUILTIN_PMOVSQB256,
31326 IX86_BUILTIN_PMOVUSQB128,
31327 IX86_BUILTIN_PMOVUSQB256,
31328 IX86_BUILTIN_PMOVQW128,
31329 IX86_BUILTIN_PMOVQW256,
31330 IX86_BUILTIN_PMOVSQW128,
31331 IX86_BUILTIN_PMOVSQW256,
31332 IX86_BUILTIN_PMOVUSQW128,
31333 IX86_BUILTIN_PMOVUSQW256,
31334 IX86_BUILTIN_PMOVQD128,
31335 IX86_BUILTIN_PMOVQD256,
31336 IX86_BUILTIN_PMOVSQD128,
31337 IX86_BUILTIN_PMOVSQD256,
31338 IX86_BUILTIN_PMOVUSQD128,
31339 IX86_BUILTIN_PMOVUSQD256,
31340 IX86_BUILTIN_RANGEPD256,
31341 IX86_BUILTIN_RANGEPD128,
31342 IX86_BUILTIN_RANGEPS256,
31343 IX86_BUILTIN_RANGEPS128,
31344 IX86_BUILTIN_GETEXPPS256,
31345 IX86_BUILTIN_GETEXPPD256,
31346 IX86_BUILTIN_GETEXPPS128,
31347 IX86_BUILTIN_GETEXPPD128,
31348 IX86_BUILTIN_FIXUPIMMPD256_MASK,
31349 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
31350 IX86_BUILTIN_FIXUPIMMPS256_MASK,
31351 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
31352 IX86_BUILTIN_FIXUPIMMPD128_MASK,
31353 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
31354 IX86_BUILTIN_FIXUPIMMPS128_MASK,
31355 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
31356 IX86_BUILTIN_PABSQ256,
31357 IX86_BUILTIN_PABSQ128,
31358 IX86_BUILTIN_PABSD256_MASK,
31359 IX86_BUILTIN_PABSD128_MASK,
31360 IX86_BUILTIN_PMULHRSW256_MASK,
31361 IX86_BUILTIN_PMULHRSW128_MASK,
31362 IX86_BUILTIN_PMULHUW128_MASK,
31363 IX86_BUILTIN_PMULHUW256_MASK,
31364 IX86_BUILTIN_PMULHW256_MASK,
31365 IX86_BUILTIN_PMULHW128_MASK,
31366 IX86_BUILTIN_PMULLW256_MASK,
31367 IX86_BUILTIN_PMULLW128_MASK,
31368 IX86_BUILTIN_PMULLQ256,
31369 IX86_BUILTIN_PMULLQ128,
31370 IX86_BUILTIN_ANDPD256_MASK,
31371 IX86_BUILTIN_ANDPD128_MASK,
31372 IX86_BUILTIN_ANDPS256_MASK,
31373 IX86_BUILTIN_ANDPS128_MASK,
31374 IX86_BUILTIN_ANDNPD256_MASK,
31375 IX86_BUILTIN_ANDNPD128_MASK,
31376 IX86_BUILTIN_ANDNPS256_MASK,
31377 IX86_BUILTIN_ANDNPS128_MASK,
31378 IX86_BUILTIN_PSLLWI128_MASK,
31379 IX86_BUILTIN_PSLLDI128_MASK,
31380 IX86_BUILTIN_PSLLQI128_MASK,
31381 IX86_BUILTIN_PSLLW128_MASK,
31382 IX86_BUILTIN_PSLLD128_MASK,
31383 IX86_BUILTIN_PSLLQ128_MASK,
31384 IX86_BUILTIN_PSLLWI256_MASK ,
31385 IX86_BUILTIN_PSLLW256_MASK,
31386 IX86_BUILTIN_PSLLDI256_MASK,
31387 IX86_BUILTIN_PSLLD256_MASK,
31388 IX86_BUILTIN_PSLLQI256_MASK,
31389 IX86_BUILTIN_PSLLQ256_MASK,
31390 IX86_BUILTIN_PSRADI128_MASK,
31391 IX86_BUILTIN_PSRAD128_MASK,
31392 IX86_BUILTIN_PSRADI256_MASK,
31393 IX86_BUILTIN_PSRAD256_MASK,
31394 IX86_BUILTIN_PSRAQI128_MASK,
31395 IX86_BUILTIN_PSRAQ128_MASK,
31396 IX86_BUILTIN_PSRAQI256_MASK,
31397 IX86_BUILTIN_PSRAQ256_MASK,
31398 IX86_BUILTIN_PANDD256,
31399 IX86_BUILTIN_PANDD128,
31400 IX86_BUILTIN_PSRLDI128_MASK,
31401 IX86_BUILTIN_PSRLD128_MASK,
31402 IX86_BUILTIN_PSRLDI256_MASK,
31403 IX86_BUILTIN_PSRLD256_MASK,
31404 IX86_BUILTIN_PSRLQI128_MASK,
31405 IX86_BUILTIN_PSRLQ128_MASK,
31406 IX86_BUILTIN_PSRLQI256_MASK,
31407 IX86_BUILTIN_PSRLQ256_MASK,
31408 IX86_BUILTIN_PANDQ256,
31409 IX86_BUILTIN_PANDQ128,
31410 IX86_BUILTIN_PANDND256,
31411 IX86_BUILTIN_PANDND128,
31412 IX86_BUILTIN_PANDNQ256,
31413 IX86_BUILTIN_PANDNQ128,
31414 IX86_BUILTIN_PORD256,
31415 IX86_BUILTIN_PORD128,
31416 IX86_BUILTIN_PORQ256,
31417 IX86_BUILTIN_PORQ128,
31418 IX86_BUILTIN_PXORD256,
31419 IX86_BUILTIN_PXORD128,
31420 IX86_BUILTIN_PXORQ256,
31421 IX86_BUILTIN_PXORQ128,
31422 IX86_BUILTIN_PACKSSWB256_MASK,
31423 IX86_BUILTIN_PACKSSWB128_MASK,
31424 IX86_BUILTIN_PACKUSWB256_MASK,
31425 IX86_BUILTIN_PACKUSWB128_MASK,
31426 IX86_BUILTIN_RNDSCALEPS256,
31427 IX86_BUILTIN_RNDSCALEPD256,
31428 IX86_BUILTIN_RNDSCALEPS128,
31429 IX86_BUILTIN_RNDSCALEPD128,
31430 IX86_BUILTIN_VTERNLOGQ256_MASK,
31431 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
31432 IX86_BUILTIN_VTERNLOGD256_MASK,
31433 IX86_BUILTIN_VTERNLOGD256_MASKZ,
31434 IX86_BUILTIN_VTERNLOGQ128_MASK,
31435 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
31436 IX86_BUILTIN_VTERNLOGD128_MASK,
31437 IX86_BUILTIN_VTERNLOGD128_MASKZ,
31438 IX86_BUILTIN_SCALEFPD256,
31439 IX86_BUILTIN_SCALEFPS256,
31440 IX86_BUILTIN_SCALEFPD128,
31441 IX86_BUILTIN_SCALEFPS128,
31442 IX86_BUILTIN_VFMADDPD256_MASK,
31443 IX86_BUILTIN_VFMADDPD256_MASK3,
31444 IX86_BUILTIN_VFMADDPD256_MASKZ,
31445 IX86_BUILTIN_VFMADDPD128_MASK,
31446 IX86_BUILTIN_VFMADDPD128_MASK3,
31447 IX86_BUILTIN_VFMADDPD128_MASKZ,
31448 IX86_BUILTIN_VFMADDPS256_MASK,
31449 IX86_BUILTIN_VFMADDPS256_MASK3,
31450 IX86_BUILTIN_VFMADDPS256_MASKZ,
31451 IX86_BUILTIN_VFMADDPS128_MASK,
31452 IX86_BUILTIN_VFMADDPS128_MASK3,
31453 IX86_BUILTIN_VFMADDPS128_MASKZ,
31454 IX86_BUILTIN_VFMSUBPD256_MASK3,
31455 IX86_BUILTIN_VFMSUBPD128_MASK3,
31456 IX86_BUILTIN_VFMSUBPS256_MASK3,
31457 IX86_BUILTIN_VFMSUBPS128_MASK3,
31458 IX86_BUILTIN_VFNMADDPD256_MASK,
31459 IX86_BUILTIN_VFNMADDPD128_MASK,
31460 IX86_BUILTIN_VFNMADDPS256_MASK,
31461 IX86_BUILTIN_VFNMADDPS128_MASK,
31462 IX86_BUILTIN_VFNMSUBPD256_MASK,
31463 IX86_BUILTIN_VFNMSUBPD256_MASK3,
31464 IX86_BUILTIN_VFNMSUBPD128_MASK,
31465 IX86_BUILTIN_VFNMSUBPD128_MASK3,
31466 IX86_BUILTIN_VFNMSUBPS256_MASK,
31467 IX86_BUILTIN_VFNMSUBPS256_MASK3,
31468 IX86_BUILTIN_VFNMSUBPS128_MASK,
31469 IX86_BUILTIN_VFNMSUBPS128_MASK3,
31470 IX86_BUILTIN_VFMADDSUBPD256_MASK,
31471 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
31472 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
31473 IX86_BUILTIN_VFMADDSUBPD128_MASK,
31474 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
31475 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
31476 IX86_BUILTIN_VFMADDSUBPS256_MASK,
31477 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
31478 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
31479 IX86_BUILTIN_VFMADDSUBPS128_MASK,
31480 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
31481 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
31482 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
31483 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
31484 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
31485 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
31486 IX86_BUILTIN_INSERTF64X2_256,
31487 IX86_BUILTIN_INSERTI64X2_256,
31488 IX86_BUILTIN_PSRAVV16HI,
31489 IX86_BUILTIN_PSRAVV8HI,
31490 IX86_BUILTIN_PMADDUBSW256_MASK,
31491 IX86_BUILTIN_PMADDUBSW128_MASK,
31492 IX86_BUILTIN_PMADDWD256_MASK,
31493 IX86_BUILTIN_PMADDWD128_MASK,
31494 IX86_BUILTIN_PSRLVV16HI,
31495 IX86_BUILTIN_PSRLVV8HI,
31496 IX86_BUILTIN_CVTPS2DQ256_MASK,
31497 IX86_BUILTIN_CVTPS2DQ128_MASK,
31498 IX86_BUILTIN_CVTPS2UDQ256,
31499 IX86_BUILTIN_CVTPS2UDQ128,
31500 IX86_BUILTIN_CVTPS2QQ256,
31501 IX86_BUILTIN_CVTPS2QQ128,
31502 IX86_BUILTIN_CVTPS2UQQ256,
31503 IX86_BUILTIN_CVTPS2UQQ128,
31504 IX86_BUILTIN_GETMANTPS256,
31505 IX86_BUILTIN_GETMANTPS128,
31506 IX86_BUILTIN_GETMANTPD256,
31507 IX86_BUILTIN_GETMANTPD128,
31508 IX86_BUILTIN_MOVDDUP256_MASK,
31509 IX86_BUILTIN_MOVDDUP128_MASK,
31510 IX86_BUILTIN_MOVSHDUP256_MASK,
31511 IX86_BUILTIN_MOVSHDUP128_MASK,
31512 IX86_BUILTIN_MOVSLDUP256_MASK,
31513 IX86_BUILTIN_MOVSLDUP128_MASK,
31514 IX86_BUILTIN_CVTQQ2PS256,
31515 IX86_BUILTIN_CVTQQ2PS128,
31516 IX86_BUILTIN_CVTUQQ2PS256,
31517 IX86_BUILTIN_CVTUQQ2PS128,
31518 IX86_BUILTIN_CVTQQ2PD256,
31519 IX86_BUILTIN_CVTQQ2PD128,
31520 IX86_BUILTIN_CVTUQQ2PD256,
31521 IX86_BUILTIN_CVTUQQ2PD128,
31522 IX86_BUILTIN_VPERMT2VARQ256,
31523 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31524 IX86_BUILTIN_VPERMT2VARD256,
31525 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31526 IX86_BUILTIN_VPERMI2VARQ256,
31527 IX86_BUILTIN_VPERMI2VARD256,
31528 IX86_BUILTIN_VPERMT2VARPD256,
31529 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31530 IX86_BUILTIN_VPERMT2VARPS256,
31531 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31532 IX86_BUILTIN_VPERMI2VARPD256,
31533 IX86_BUILTIN_VPERMI2VARPS256,
31534 IX86_BUILTIN_VPERMT2VARQ128,
31535 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31536 IX86_BUILTIN_VPERMT2VARD128,
31537 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31538 IX86_BUILTIN_VPERMI2VARQ128,
31539 IX86_BUILTIN_VPERMI2VARD128,
31540 IX86_BUILTIN_VPERMT2VARPD128,
31541 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31542 IX86_BUILTIN_VPERMT2VARPS128,
31543 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31544 IX86_BUILTIN_VPERMI2VARPD128,
31545 IX86_BUILTIN_VPERMI2VARPS128,
31546 IX86_BUILTIN_PSHUFB256_MASK,
31547 IX86_BUILTIN_PSHUFB128_MASK,
31548 IX86_BUILTIN_PSHUFHW256_MASK,
31549 IX86_BUILTIN_PSHUFHW128_MASK,
31550 IX86_BUILTIN_PSHUFLW256_MASK,
31551 IX86_BUILTIN_PSHUFLW128_MASK,
31552 IX86_BUILTIN_PSHUFD256_MASK,
31553 IX86_BUILTIN_PSHUFD128_MASK,
31554 IX86_BUILTIN_SHUFPD256_MASK,
31555 IX86_BUILTIN_SHUFPD128_MASK,
31556 IX86_BUILTIN_SHUFPS256_MASK,
31557 IX86_BUILTIN_SHUFPS128_MASK,
31558 IX86_BUILTIN_PROLVQ256,
31559 IX86_BUILTIN_PROLVQ128,
31560 IX86_BUILTIN_PROLQ256,
31561 IX86_BUILTIN_PROLQ128,
31562 IX86_BUILTIN_PRORVQ256,
31563 IX86_BUILTIN_PRORVQ128,
31564 IX86_BUILTIN_PRORQ256,
31565 IX86_BUILTIN_PRORQ128,
31566 IX86_BUILTIN_PSRAVQ128,
31567 IX86_BUILTIN_PSRAVQ256,
31568 IX86_BUILTIN_PSLLVV4DI_MASK,
31569 IX86_BUILTIN_PSLLVV2DI_MASK,
31570 IX86_BUILTIN_PSLLVV8SI_MASK,
31571 IX86_BUILTIN_PSLLVV4SI_MASK,
31572 IX86_BUILTIN_PSRAVV8SI_MASK,
31573 IX86_BUILTIN_PSRAVV4SI_MASK,
31574 IX86_BUILTIN_PSRLVV4DI_MASK,
31575 IX86_BUILTIN_PSRLVV2DI_MASK,
31576 IX86_BUILTIN_PSRLVV8SI_MASK,
31577 IX86_BUILTIN_PSRLVV4SI_MASK,
31578 IX86_BUILTIN_PSRAWI256_MASK,
31579 IX86_BUILTIN_PSRAW256_MASK,
31580 IX86_BUILTIN_PSRAWI128_MASK,
31581 IX86_BUILTIN_PSRAW128_MASK,
31582 IX86_BUILTIN_PSRLWI256_MASK,
31583 IX86_BUILTIN_PSRLW256_MASK,
31584 IX86_BUILTIN_PSRLWI128_MASK,
31585 IX86_BUILTIN_PSRLW128_MASK,
31586 IX86_BUILTIN_PRORVD256,
31587 IX86_BUILTIN_PROLVD256,
31588 IX86_BUILTIN_PRORD256,
31589 IX86_BUILTIN_PROLD256,
31590 IX86_BUILTIN_PRORVD128,
31591 IX86_BUILTIN_PROLVD128,
31592 IX86_BUILTIN_PRORD128,
31593 IX86_BUILTIN_PROLD128,
31594 IX86_BUILTIN_FPCLASSPD256,
31595 IX86_BUILTIN_FPCLASSPD128,
31596 IX86_BUILTIN_FPCLASSSD,
31597 IX86_BUILTIN_FPCLASSPS256,
31598 IX86_BUILTIN_FPCLASSPS128,
31599 IX86_BUILTIN_FPCLASSSS,
31600 IX86_BUILTIN_CVTB2MASK128,
31601 IX86_BUILTIN_CVTB2MASK256,
31602 IX86_BUILTIN_CVTW2MASK128,
31603 IX86_BUILTIN_CVTW2MASK256,
31604 IX86_BUILTIN_CVTD2MASK128,
31605 IX86_BUILTIN_CVTD2MASK256,
31606 IX86_BUILTIN_CVTQ2MASK128,
31607 IX86_BUILTIN_CVTQ2MASK256,
31608 IX86_BUILTIN_CVTMASK2B128,
31609 IX86_BUILTIN_CVTMASK2B256,
31610 IX86_BUILTIN_CVTMASK2W128,
31611 IX86_BUILTIN_CVTMASK2W256,
31612 IX86_BUILTIN_CVTMASK2D128,
31613 IX86_BUILTIN_CVTMASK2D256,
31614 IX86_BUILTIN_CVTMASK2Q128,
31615 IX86_BUILTIN_CVTMASK2Q256,
31616 IX86_BUILTIN_PCMPEQB128_MASK,
31617 IX86_BUILTIN_PCMPEQB256_MASK,
31618 IX86_BUILTIN_PCMPEQW128_MASK,
31619 IX86_BUILTIN_PCMPEQW256_MASK,
31620 IX86_BUILTIN_PCMPEQD128_MASK,
31621 IX86_BUILTIN_PCMPEQD256_MASK,
31622 IX86_BUILTIN_PCMPEQQ128_MASK,
31623 IX86_BUILTIN_PCMPEQQ256_MASK,
31624 IX86_BUILTIN_PCMPGTB128_MASK,
31625 IX86_BUILTIN_PCMPGTB256_MASK,
31626 IX86_BUILTIN_PCMPGTW128_MASK,
31627 IX86_BUILTIN_PCMPGTW256_MASK,
31628 IX86_BUILTIN_PCMPGTD128_MASK,
31629 IX86_BUILTIN_PCMPGTD256_MASK,
31630 IX86_BUILTIN_PCMPGTQ128_MASK,
31631 IX86_BUILTIN_PCMPGTQ256_MASK,
31632 IX86_BUILTIN_PTESTMB128,
31633 IX86_BUILTIN_PTESTMB256,
31634 IX86_BUILTIN_PTESTMW128,
31635 IX86_BUILTIN_PTESTMW256,
31636 IX86_BUILTIN_PTESTMD128,
31637 IX86_BUILTIN_PTESTMD256,
31638 IX86_BUILTIN_PTESTMQ128,
31639 IX86_BUILTIN_PTESTMQ256,
31640 IX86_BUILTIN_PTESTNMB128,
31641 IX86_BUILTIN_PTESTNMB256,
31642 IX86_BUILTIN_PTESTNMW128,
31643 IX86_BUILTIN_PTESTNMW256,
31644 IX86_BUILTIN_PTESTNMD128,
31645 IX86_BUILTIN_PTESTNMD256,
31646 IX86_BUILTIN_PTESTNMQ128,
31647 IX86_BUILTIN_PTESTNMQ256,
31648 IX86_BUILTIN_PBROADCASTMB128,
31649 IX86_BUILTIN_PBROADCASTMB256,
31650 IX86_BUILTIN_PBROADCASTMW128,
31651 IX86_BUILTIN_PBROADCASTMW256,
31652 IX86_BUILTIN_COMPRESSPD256,
31653 IX86_BUILTIN_COMPRESSPD128,
31654 IX86_BUILTIN_COMPRESSPS256,
31655 IX86_BUILTIN_COMPRESSPS128,
31656 IX86_BUILTIN_PCOMPRESSQ256,
31657 IX86_BUILTIN_PCOMPRESSQ128,
31658 IX86_BUILTIN_PCOMPRESSD256,
31659 IX86_BUILTIN_PCOMPRESSD128,
31660 IX86_BUILTIN_EXPANDPD256,
31661 IX86_BUILTIN_EXPANDPD128,
31662 IX86_BUILTIN_EXPANDPS256,
31663 IX86_BUILTIN_EXPANDPS128,
31664 IX86_BUILTIN_PEXPANDQ256,
31665 IX86_BUILTIN_PEXPANDQ128,
31666 IX86_BUILTIN_PEXPANDD256,
31667 IX86_BUILTIN_PEXPANDD128,
31668 IX86_BUILTIN_EXPANDPD256Z,
31669 IX86_BUILTIN_EXPANDPD128Z,
31670 IX86_BUILTIN_EXPANDPS256Z,
31671 IX86_BUILTIN_EXPANDPS128Z,
31672 IX86_BUILTIN_PEXPANDQ256Z,
31673 IX86_BUILTIN_PEXPANDQ128Z,
31674 IX86_BUILTIN_PEXPANDD256Z,
31675 IX86_BUILTIN_PEXPANDD128Z,
31676 IX86_BUILTIN_PMAXSD256_MASK,
31677 IX86_BUILTIN_PMINSD256_MASK,
31678 IX86_BUILTIN_PMAXUD256_MASK,
31679 IX86_BUILTIN_PMINUD256_MASK,
31680 IX86_BUILTIN_PMAXSD128_MASK,
31681 IX86_BUILTIN_PMINSD128_MASK,
31682 IX86_BUILTIN_PMAXUD128_MASK,
31683 IX86_BUILTIN_PMINUD128_MASK,
31684 IX86_BUILTIN_PMAXSQ256_MASK,
31685 IX86_BUILTIN_PMINSQ256_MASK,
31686 IX86_BUILTIN_PMAXUQ256_MASK,
31687 IX86_BUILTIN_PMINUQ256_MASK,
31688 IX86_BUILTIN_PMAXSQ128_MASK,
31689 IX86_BUILTIN_PMINSQ128_MASK,
31690 IX86_BUILTIN_PMAXUQ128_MASK,
31691 IX86_BUILTIN_PMINUQ128_MASK,
31692 IX86_BUILTIN_PMINSB256_MASK,
31693 IX86_BUILTIN_PMINUB256_MASK,
31694 IX86_BUILTIN_PMAXSB256_MASK,
31695 IX86_BUILTIN_PMAXUB256_MASK,
31696 IX86_BUILTIN_PMINSB128_MASK,
31697 IX86_BUILTIN_PMINUB128_MASK,
31698 IX86_BUILTIN_PMAXSB128_MASK,
31699 IX86_BUILTIN_PMAXUB128_MASK,
31700 IX86_BUILTIN_PMINSW256_MASK,
31701 IX86_BUILTIN_PMINUW256_MASK,
31702 IX86_BUILTIN_PMAXSW256_MASK,
31703 IX86_BUILTIN_PMAXUW256_MASK,
31704 IX86_BUILTIN_PMINSW128_MASK,
31705 IX86_BUILTIN_PMINUW128_MASK,
31706 IX86_BUILTIN_PMAXSW128_MASK,
31707 IX86_BUILTIN_PMAXUW128_MASK,
31708 IX86_BUILTIN_VPCONFLICTQ256,
31709 IX86_BUILTIN_VPCONFLICTD256,
31710 IX86_BUILTIN_VPCLZCNTQ256,
31711 IX86_BUILTIN_VPCLZCNTD256,
31712 IX86_BUILTIN_UNPCKHPD256_MASK,
31713 IX86_BUILTIN_UNPCKHPD128_MASK,
31714 IX86_BUILTIN_UNPCKHPS256_MASK,
31715 IX86_BUILTIN_UNPCKHPS128_MASK,
31716 IX86_BUILTIN_UNPCKLPD256_MASK,
31717 IX86_BUILTIN_UNPCKLPD128_MASK,
31718 IX86_BUILTIN_UNPCKLPS256_MASK,
31719 IX86_BUILTIN_VPCONFLICTQ128,
31720 IX86_BUILTIN_VPCONFLICTD128,
31721 IX86_BUILTIN_VPCLZCNTQ128,
31722 IX86_BUILTIN_VPCLZCNTD128,
31723 IX86_BUILTIN_UNPCKLPS128_MASK,
31724 IX86_BUILTIN_ALIGND256,
31725 IX86_BUILTIN_ALIGNQ256,
31726 IX86_BUILTIN_ALIGND128,
31727 IX86_BUILTIN_ALIGNQ128,
31728 IX86_BUILTIN_CVTPS2PH256_MASK,
31729 IX86_BUILTIN_CVTPS2PH_MASK,
31730 IX86_BUILTIN_CVTPH2PS_MASK,
31731 IX86_BUILTIN_CVTPH2PS256_MASK,
31732 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31733 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31734 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31735 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31736 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31737 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31738 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31739 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31740 IX86_BUILTIN_PUNPCKHBW128_MASK,
31741 IX86_BUILTIN_PUNPCKHBW256_MASK,
31742 IX86_BUILTIN_PUNPCKHWD128_MASK,
31743 IX86_BUILTIN_PUNPCKHWD256_MASK,
31744 IX86_BUILTIN_PUNPCKLBW128_MASK,
31745 IX86_BUILTIN_PUNPCKLBW256_MASK,
31746 IX86_BUILTIN_PUNPCKLWD128_MASK,
31747 IX86_BUILTIN_PUNPCKLWD256_MASK,
31748 IX86_BUILTIN_PSLLVV16HI,
31749 IX86_BUILTIN_PSLLVV8HI,
31750 IX86_BUILTIN_PACKSSDW256_MASK,
31751 IX86_BUILTIN_PACKSSDW128_MASK,
31752 IX86_BUILTIN_PACKUSDW256_MASK,
31753 IX86_BUILTIN_PACKUSDW128_MASK,
31754 IX86_BUILTIN_PAVGB256_MASK,
31755 IX86_BUILTIN_PAVGW256_MASK,
31756 IX86_BUILTIN_PAVGB128_MASK,
31757 IX86_BUILTIN_PAVGW128_MASK,
31758 IX86_BUILTIN_VPERMVARSF256_MASK,
31759 IX86_BUILTIN_VPERMVARDF256_MASK,
31760 IX86_BUILTIN_VPERMDF256_MASK,
31761 IX86_BUILTIN_PABSB256_MASK,
31762 IX86_BUILTIN_PABSB128_MASK,
31763 IX86_BUILTIN_PABSW256_MASK,
31764 IX86_BUILTIN_PABSW128_MASK,
31765 IX86_BUILTIN_VPERMILVARPD_MASK,
31766 IX86_BUILTIN_VPERMILVARPS_MASK,
31767 IX86_BUILTIN_VPERMILVARPD256_MASK,
31768 IX86_BUILTIN_VPERMILVARPS256_MASK,
31769 IX86_BUILTIN_VPERMILPD_MASK,
31770 IX86_BUILTIN_VPERMILPS_MASK,
31771 IX86_BUILTIN_VPERMILPD256_MASK,
31772 IX86_BUILTIN_VPERMILPS256_MASK,
31773 IX86_BUILTIN_BLENDMQ256,
31774 IX86_BUILTIN_BLENDMD256,
31775 IX86_BUILTIN_BLENDMPD256,
31776 IX86_BUILTIN_BLENDMPS256,
31777 IX86_BUILTIN_BLENDMQ128,
31778 IX86_BUILTIN_BLENDMD128,
31779 IX86_BUILTIN_BLENDMPD128,
31780 IX86_BUILTIN_BLENDMPS128,
31781 IX86_BUILTIN_BLENDMW256,
31782 IX86_BUILTIN_BLENDMB256,
31783 IX86_BUILTIN_BLENDMW128,
31784 IX86_BUILTIN_BLENDMB128,
31785 IX86_BUILTIN_PMULLD256_MASK,
31786 IX86_BUILTIN_PMULLD128_MASK,
31787 IX86_BUILTIN_PMULUDQ256_MASK,
31788 IX86_BUILTIN_PMULDQ256_MASK,
31789 IX86_BUILTIN_PMULDQ128_MASK,
31790 IX86_BUILTIN_PMULUDQ128_MASK,
31791 IX86_BUILTIN_CVTPD2PS256_MASK,
31792 IX86_BUILTIN_CVTPD2PS_MASK,
31793 IX86_BUILTIN_VPERMVARSI256_MASK,
31794 IX86_BUILTIN_VPERMVARDI256_MASK,
31795 IX86_BUILTIN_VPERMDI256_MASK,
31796 IX86_BUILTIN_CMPQ256,
31797 IX86_BUILTIN_CMPD256,
31798 IX86_BUILTIN_UCMPQ256,
31799 IX86_BUILTIN_UCMPD256,
31800 IX86_BUILTIN_CMPB256,
31801 IX86_BUILTIN_CMPW256,
31802 IX86_BUILTIN_UCMPB256,
31803 IX86_BUILTIN_UCMPW256,
31804 IX86_BUILTIN_CMPPD256_MASK,
31805 IX86_BUILTIN_CMPPS256_MASK,
31806 IX86_BUILTIN_CMPQ128,
31807 IX86_BUILTIN_CMPD128,
31808 IX86_BUILTIN_UCMPQ128,
31809 IX86_BUILTIN_UCMPD128,
31810 IX86_BUILTIN_CMPB128,
31811 IX86_BUILTIN_CMPW128,
31812 IX86_BUILTIN_UCMPB128,
31813 IX86_BUILTIN_UCMPW128,
31814 IX86_BUILTIN_CMPPD128_MASK,
31815 IX86_BUILTIN_CMPPS128_MASK,
31817 IX86_BUILTIN_GATHER3SIV8SF,
31818 IX86_BUILTIN_GATHER3SIV4SF,
31819 IX86_BUILTIN_GATHER3SIV4DF,
31820 IX86_BUILTIN_GATHER3SIV2DF,
31821 IX86_BUILTIN_GATHER3DIV8SF,
31822 IX86_BUILTIN_GATHER3DIV4SF,
31823 IX86_BUILTIN_GATHER3DIV4DF,
31824 IX86_BUILTIN_GATHER3DIV2DF,
31825 IX86_BUILTIN_GATHER3SIV8SI,
31826 IX86_BUILTIN_GATHER3SIV4SI,
31827 IX86_BUILTIN_GATHER3SIV4DI,
31828 IX86_BUILTIN_GATHER3SIV2DI,
31829 IX86_BUILTIN_GATHER3DIV8SI,
31830 IX86_BUILTIN_GATHER3DIV4SI,
31831 IX86_BUILTIN_GATHER3DIV4DI,
31832 IX86_BUILTIN_GATHER3DIV2DI,
31833 IX86_BUILTIN_SCATTERSIV8SF,
31834 IX86_BUILTIN_SCATTERSIV4SF,
31835 IX86_BUILTIN_SCATTERSIV4DF,
31836 IX86_BUILTIN_SCATTERSIV2DF,
31837 IX86_BUILTIN_SCATTERDIV8SF,
31838 IX86_BUILTIN_SCATTERDIV4SF,
31839 IX86_BUILTIN_SCATTERDIV4DF,
31840 IX86_BUILTIN_SCATTERDIV2DF,
31841 IX86_BUILTIN_SCATTERSIV8SI,
31842 IX86_BUILTIN_SCATTERSIV4SI,
31843 IX86_BUILTIN_SCATTERSIV4DI,
31844 IX86_BUILTIN_SCATTERSIV2DI,
31845 IX86_BUILTIN_SCATTERDIV8SI,
31846 IX86_BUILTIN_SCATTERDIV4SI,
31847 IX86_BUILTIN_SCATTERDIV4DI,
31848 IX86_BUILTIN_SCATTERDIV2DI,
31851 IX86_BUILTIN_RANGESD128,
31852 IX86_BUILTIN_RANGESS128,
31853 IX86_BUILTIN_KUNPCKWD,
31854 IX86_BUILTIN_KUNPCKDQ,
31855 IX86_BUILTIN_BROADCASTF32x2_512,
31856 IX86_BUILTIN_BROADCASTI32x2_512,
31857 IX86_BUILTIN_BROADCASTF64X2_512,
31858 IX86_BUILTIN_BROADCASTI64X2_512,
31859 IX86_BUILTIN_BROADCASTF32X8_512,
31860 IX86_BUILTIN_BROADCASTI32X8_512,
31861 IX86_BUILTIN_EXTRACTF64X2_512,
31862 IX86_BUILTIN_EXTRACTF32X8,
31863 IX86_BUILTIN_EXTRACTI64X2_512,
31864 IX86_BUILTIN_EXTRACTI32X8,
31865 IX86_BUILTIN_REDUCEPD512_MASK,
31866 IX86_BUILTIN_REDUCEPS512_MASK,
31867 IX86_BUILTIN_PMULLQ512,
31868 IX86_BUILTIN_XORPD512,
31869 IX86_BUILTIN_XORPS512,
31870 IX86_BUILTIN_ORPD512,
31871 IX86_BUILTIN_ORPS512,
31872 IX86_BUILTIN_ANDPD512,
31873 IX86_BUILTIN_ANDPS512,
31874 IX86_BUILTIN_ANDNPD512,
31875 IX86_BUILTIN_ANDNPS512,
31876 IX86_BUILTIN_INSERTF32X8,
31877 IX86_BUILTIN_INSERTI32X8,
31878 IX86_BUILTIN_INSERTF64X2_512,
31879 IX86_BUILTIN_INSERTI64X2_512,
31880 IX86_BUILTIN_FPCLASSPD512,
31881 IX86_BUILTIN_FPCLASSPS512,
31882 IX86_BUILTIN_CVTD2MASK512,
31883 IX86_BUILTIN_CVTQ2MASK512,
31884 IX86_BUILTIN_CVTMASK2D512,
31885 IX86_BUILTIN_CVTMASK2Q512,
31886 IX86_BUILTIN_CVTPD2QQ512,
31887 IX86_BUILTIN_CVTPS2QQ512,
31888 IX86_BUILTIN_CVTPD2UQQ512,
31889 IX86_BUILTIN_CVTPS2UQQ512,
31890 IX86_BUILTIN_CVTQQ2PS512,
31891 IX86_BUILTIN_CVTUQQ2PS512,
31892 IX86_BUILTIN_CVTQQ2PD512,
31893 IX86_BUILTIN_CVTUQQ2PD512,
31894 IX86_BUILTIN_CVTTPS2QQ512,
31895 IX86_BUILTIN_CVTTPS2UQQ512,
31896 IX86_BUILTIN_CVTTPD2QQ512,
31897 IX86_BUILTIN_CVTTPD2UQQ512,
31898 IX86_BUILTIN_RANGEPS512,
31899 IX86_BUILTIN_RANGEPD512,
31902 IX86_BUILTIN_PACKUSDW512,
31903 IX86_BUILTIN_PACKSSDW512,
31904 IX86_BUILTIN_LOADDQUHI512_MASK,
31905 IX86_BUILTIN_LOADDQUQI512_MASK,
31906 IX86_BUILTIN_PSLLDQ512,
31907 IX86_BUILTIN_PSRLDQ512,
31908 IX86_BUILTIN_STOREDQUHI512_MASK,
31909 IX86_BUILTIN_STOREDQUQI512_MASK,
31910 IX86_BUILTIN_PALIGNR512,
31911 IX86_BUILTIN_PALIGNR512_MASK,
31912 IX86_BUILTIN_MOVDQUHI512_MASK,
31913 IX86_BUILTIN_MOVDQUQI512_MASK,
31914 IX86_BUILTIN_PSADBW512,
31915 IX86_BUILTIN_DBPSADBW512,
31916 IX86_BUILTIN_PBROADCASTB512,
31917 IX86_BUILTIN_PBROADCASTB512_GPR,
31918 IX86_BUILTIN_PBROADCASTW512,
31919 IX86_BUILTIN_PBROADCASTW512_GPR,
31920 IX86_BUILTIN_PMOVSXBW512_MASK,
31921 IX86_BUILTIN_PMOVZXBW512_MASK,
31922 IX86_BUILTIN_VPERMVARHI512_MASK,
31923 IX86_BUILTIN_VPERMT2VARHI512,
31924 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31925 IX86_BUILTIN_VPERMI2VARHI512,
31926 IX86_BUILTIN_PAVGB512,
31927 IX86_BUILTIN_PAVGW512,
31928 IX86_BUILTIN_PADDB512,
31929 IX86_BUILTIN_PSUBB512,
31930 IX86_BUILTIN_PSUBSB512,
31931 IX86_BUILTIN_PADDSB512,
31932 IX86_BUILTIN_PSUBUSB512,
31933 IX86_BUILTIN_PADDUSB512,
31934 IX86_BUILTIN_PSUBW512,
31935 IX86_BUILTIN_PADDW512,
31936 IX86_BUILTIN_PSUBSW512,
31937 IX86_BUILTIN_PADDSW512,
31938 IX86_BUILTIN_PSUBUSW512,
31939 IX86_BUILTIN_PADDUSW512,
31940 IX86_BUILTIN_PMAXUW512,
31941 IX86_BUILTIN_PMAXSW512,
31942 IX86_BUILTIN_PMINUW512,
31943 IX86_BUILTIN_PMINSW512,
31944 IX86_BUILTIN_PMAXUB512,
31945 IX86_BUILTIN_PMAXSB512,
31946 IX86_BUILTIN_PMINUB512,
31947 IX86_BUILTIN_PMINSB512,
31948 IX86_BUILTIN_PMOVWB512,
31949 IX86_BUILTIN_PMOVSWB512,
31950 IX86_BUILTIN_PMOVUSWB512,
31951 IX86_BUILTIN_PMULHRSW512_MASK,
31952 IX86_BUILTIN_PMULHUW512_MASK,
31953 IX86_BUILTIN_PMULHW512_MASK,
31954 IX86_BUILTIN_PMULLW512_MASK,
31955 IX86_BUILTIN_PSLLWI512_MASK,
31956 IX86_BUILTIN_PSLLW512_MASK,
31957 IX86_BUILTIN_PACKSSWB512,
31958 IX86_BUILTIN_PACKUSWB512,
31959 IX86_BUILTIN_PSRAVV32HI,
31960 IX86_BUILTIN_PMADDUBSW512_MASK,
31961 IX86_BUILTIN_PMADDWD512_MASK,
31962 IX86_BUILTIN_PSRLVV32HI,
31963 IX86_BUILTIN_PUNPCKHBW512,
31964 IX86_BUILTIN_PUNPCKHWD512,
31965 IX86_BUILTIN_PUNPCKLBW512,
31966 IX86_BUILTIN_PUNPCKLWD512,
31967 IX86_BUILTIN_PSHUFB512,
31968 IX86_BUILTIN_PSHUFHW512,
31969 IX86_BUILTIN_PSHUFLW512,
31970 IX86_BUILTIN_PSRAWI512,
31971 IX86_BUILTIN_PSRAW512,
31972 IX86_BUILTIN_PSRLWI512,
31973 IX86_BUILTIN_PSRLW512,
31974 IX86_BUILTIN_CVTB2MASK512,
31975 IX86_BUILTIN_CVTW2MASK512,
31976 IX86_BUILTIN_CVTMASK2B512,
31977 IX86_BUILTIN_CVTMASK2W512,
31978 IX86_BUILTIN_PCMPEQB512_MASK,
31979 IX86_BUILTIN_PCMPEQW512_MASK,
31980 IX86_BUILTIN_PCMPGTB512_MASK,
31981 IX86_BUILTIN_PCMPGTW512_MASK,
31982 IX86_BUILTIN_PTESTMB512,
31983 IX86_BUILTIN_PTESTMW512,
31984 IX86_BUILTIN_PTESTNMB512,
31985 IX86_BUILTIN_PTESTNMW512,
31986 IX86_BUILTIN_PSLLVV32HI,
31987 IX86_BUILTIN_PABSB512,
31988 IX86_BUILTIN_PABSW512,
31989 IX86_BUILTIN_BLENDMW512,
31990 IX86_BUILTIN_BLENDMB512,
31991 IX86_BUILTIN_CMPB512,
31992 IX86_BUILTIN_CMPW512,
31993 IX86_BUILTIN_UCMPB512,
31994 IX86_BUILTIN_UCMPW512,
31996 /* Alternate 4 and 8 element gather/scatter for the vectorizer
31997 where all operands are 32-byte or 64-byte wide respectively. */
31998 IX86_BUILTIN_GATHERALTSIV4DF,
31999 IX86_BUILTIN_GATHERALTDIV8SF,
32000 IX86_BUILTIN_GATHERALTSIV4DI,
32001 IX86_BUILTIN_GATHERALTDIV8SI,
32002 IX86_BUILTIN_GATHER3ALTDIV16SF,
32003 IX86_BUILTIN_GATHER3ALTDIV16SI,
32004 IX86_BUILTIN_GATHER3ALTSIV4DF,
32005 IX86_BUILTIN_GATHER3ALTDIV8SF,
32006 IX86_BUILTIN_GATHER3ALTSIV4DI,
32007 IX86_BUILTIN_GATHER3ALTDIV8SI,
32008 IX86_BUILTIN_GATHER3ALTSIV8DF,
32009 IX86_BUILTIN_GATHER3ALTSIV8DI,
32010 IX86_BUILTIN_GATHER3DIV16SF,
32011 IX86_BUILTIN_GATHER3DIV16SI,
32012 IX86_BUILTIN_GATHER3DIV8DF,
32013 IX86_BUILTIN_GATHER3DIV8DI,
32014 IX86_BUILTIN_GATHER3SIV16SF,
32015 IX86_BUILTIN_GATHER3SIV16SI,
32016 IX86_BUILTIN_GATHER3SIV8DF,
32017 IX86_BUILTIN_GATHER3SIV8DI,
32018 IX86_BUILTIN_SCATTERALTSIV8DF,
32019 IX86_BUILTIN_SCATTERALTDIV16SF,
32020 IX86_BUILTIN_SCATTERALTSIV8DI,
32021 IX86_BUILTIN_SCATTERALTDIV16SI,
32022 IX86_BUILTIN_SCATTERDIV16SF,
32023 IX86_BUILTIN_SCATTERDIV16SI,
32024 IX86_BUILTIN_SCATTERDIV8DF,
32025 IX86_BUILTIN_SCATTERDIV8DI,
32026 IX86_BUILTIN_SCATTERSIV16SF,
32027 IX86_BUILTIN_SCATTERSIV16SI,
32028 IX86_BUILTIN_SCATTERSIV8DF,
32029 IX86_BUILTIN_SCATTERSIV8DI,
32032 IX86_BUILTIN_GATHERPFQPD,
32033 IX86_BUILTIN_GATHERPFDPS,
32034 IX86_BUILTIN_GATHERPFDPD,
32035 IX86_BUILTIN_GATHERPFQPS,
32036 IX86_BUILTIN_SCATTERPFDPD,
32037 IX86_BUILTIN_SCATTERPFDPS,
32038 IX86_BUILTIN_SCATTERPFQPD,
32039 IX86_BUILTIN_SCATTERPFQPS,
32042 IX86_BUILTIN_EXP2PD_MASK,
32043 IX86_BUILTIN_EXP2PS_MASK,
32044 IX86_BUILTIN_EXP2PS,
32045 IX86_BUILTIN_RCP28PD,
32046 IX86_BUILTIN_RCP28PS,
32047 IX86_BUILTIN_RCP28SD,
32048 IX86_BUILTIN_RCP28SS,
32049 IX86_BUILTIN_RSQRT28PD,
32050 IX86_BUILTIN_RSQRT28PS,
32051 IX86_BUILTIN_RSQRT28SD,
32052 IX86_BUILTIN_RSQRT28SS,
32055 IX86_BUILTIN_VPMADD52LUQ512,
32056 IX86_BUILTIN_VPMADD52HUQ512,
32057 IX86_BUILTIN_VPMADD52LUQ256,
32058 IX86_BUILTIN_VPMADD52HUQ256,
32059 IX86_BUILTIN_VPMADD52LUQ128,
32060 IX86_BUILTIN_VPMADD52HUQ128,
32061 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
32062 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
32063 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
32064 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
32065 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
32066 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
32069 IX86_BUILTIN_VPMULTISHIFTQB512,
32070 IX86_BUILTIN_VPMULTISHIFTQB256,
32071 IX86_BUILTIN_VPMULTISHIFTQB128,
32072 IX86_BUILTIN_VPERMVARQI512_MASK,
32073 IX86_BUILTIN_VPERMT2VARQI512,
32074 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
32075 IX86_BUILTIN_VPERMI2VARQI512,
32076 IX86_BUILTIN_VPERMVARQI256_MASK,
32077 IX86_BUILTIN_VPERMVARQI128_MASK,
32078 IX86_BUILTIN_VPERMT2VARQI256,
32079 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
32080 IX86_BUILTIN_VPERMT2VARQI128,
32081 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32082 IX86_BUILTIN_VPERMI2VARQI256,
32083 IX86_BUILTIN_VPERMI2VARQI128,
32085 /* SHA builtins. */
32086 IX86_BUILTIN_SHA1MSG1,
32087 IX86_BUILTIN_SHA1MSG2,
32088 IX86_BUILTIN_SHA1NEXTE,
32089 IX86_BUILTIN_SHA1RNDS4,
32090 IX86_BUILTIN_SHA256MSG1,
32091 IX86_BUILTIN_SHA256MSG2,
32092 IX86_BUILTIN_SHA256RNDS2,
32094 /* CLWB instructions. */
32097 /* PCOMMIT instructions. */
32098 IX86_BUILTIN_PCOMMIT,
32100 /* CLFLUSHOPT instructions. */
32101 IX86_BUILTIN_CLFLUSHOPT,
32103 /* TFmode support builtins. */
32105 IX86_BUILTIN_HUGE_VALQ,
32106 IX86_BUILTIN_FABSQ,
32107 IX86_BUILTIN_COPYSIGNQ,
32109 /* Vectorizer support builtins. */
32110 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32111 IX86_BUILTIN_CPYSGNPS,
32112 IX86_BUILTIN_CPYSGNPD,
32113 IX86_BUILTIN_CPYSGNPS256,
32114 IX86_BUILTIN_CPYSGNPS512,
32115 IX86_BUILTIN_CPYSGNPD256,
32116 IX86_BUILTIN_CPYSGNPD512,
32117 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32118 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32121 /* FMA4 instructions. */
32122 IX86_BUILTIN_VFMADDSS,
32123 IX86_BUILTIN_VFMADDSD,
32124 IX86_BUILTIN_VFMADDPS,
32125 IX86_BUILTIN_VFMADDPD,
32126 IX86_BUILTIN_VFMADDPS256,
32127 IX86_BUILTIN_VFMADDPD256,
32128 IX86_BUILTIN_VFMADDSUBPS,
32129 IX86_BUILTIN_VFMADDSUBPD,
32130 IX86_BUILTIN_VFMADDSUBPS256,
32131 IX86_BUILTIN_VFMADDSUBPD256,
32133 /* FMA3 instructions. */
32134 IX86_BUILTIN_VFMADDSS3,
32135 IX86_BUILTIN_VFMADDSD3,
32137 /* XOP instructions. */
32138 IX86_BUILTIN_VPCMOV,
32139 IX86_BUILTIN_VPCMOV_V2DI,
32140 IX86_BUILTIN_VPCMOV_V4SI,
32141 IX86_BUILTIN_VPCMOV_V8HI,
32142 IX86_BUILTIN_VPCMOV_V16QI,
32143 IX86_BUILTIN_VPCMOV_V4SF,
32144 IX86_BUILTIN_VPCMOV_V2DF,
32145 IX86_BUILTIN_VPCMOV256,
32146 IX86_BUILTIN_VPCMOV_V4DI256,
32147 IX86_BUILTIN_VPCMOV_V8SI256,
32148 IX86_BUILTIN_VPCMOV_V16HI256,
32149 IX86_BUILTIN_VPCMOV_V32QI256,
32150 IX86_BUILTIN_VPCMOV_V8SF256,
32151 IX86_BUILTIN_VPCMOV_V4DF256,
32153 IX86_BUILTIN_VPPERM,
32155 IX86_BUILTIN_VPMACSSWW,
32156 IX86_BUILTIN_VPMACSWW,
32157 IX86_BUILTIN_VPMACSSWD,
32158 IX86_BUILTIN_VPMACSWD,
32159 IX86_BUILTIN_VPMACSSDD,
32160 IX86_BUILTIN_VPMACSDD,
32161 IX86_BUILTIN_VPMACSSDQL,
32162 IX86_BUILTIN_VPMACSSDQH,
32163 IX86_BUILTIN_VPMACSDQL,
32164 IX86_BUILTIN_VPMACSDQH,
32165 IX86_BUILTIN_VPMADCSSWD,
32166 IX86_BUILTIN_VPMADCSWD,
32168 IX86_BUILTIN_VPHADDBW,
32169 IX86_BUILTIN_VPHADDBD,
32170 IX86_BUILTIN_VPHADDBQ,
32171 IX86_BUILTIN_VPHADDWD,
32172 IX86_BUILTIN_VPHADDWQ,
32173 IX86_BUILTIN_VPHADDDQ,
32174 IX86_BUILTIN_VPHADDUBW,
32175 IX86_BUILTIN_VPHADDUBD,
32176 IX86_BUILTIN_VPHADDUBQ,
32177 IX86_BUILTIN_VPHADDUWD,
32178 IX86_BUILTIN_VPHADDUWQ,
32179 IX86_BUILTIN_VPHADDUDQ,
32180 IX86_BUILTIN_VPHSUBBW,
32181 IX86_BUILTIN_VPHSUBWD,
32182 IX86_BUILTIN_VPHSUBDQ,
32184 IX86_BUILTIN_VPROTB,
32185 IX86_BUILTIN_VPROTW,
32186 IX86_BUILTIN_VPROTD,
32187 IX86_BUILTIN_VPROTQ,
32188 IX86_BUILTIN_VPROTB_IMM,
32189 IX86_BUILTIN_VPROTW_IMM,
32190 IX86_BUILTIN_VPROTD_IMM,
32191 IX86_BUILTIN_VPROTQ_IMM,
32193 IX86_BUILTIN_VPSHLB,
32194 IX86_BUILTIN_VPSHLW,
32195 IX86_BUILTIN_VPSHLD,
32196 IX86_BUILTIN_VPSHLQ,
32197 IX86_BUILTIN_VPSHAB,
32198 IX86_BUILTIN_VPSHAW,
32199 IX86_BUILTIN_VPSHAD,
32200 IX86_BUILTIN_VPSHAQ,
32202 IX86_BUILTIN_VFRCZSS,
32203 IX86_BUILTIN_VFRCZSD,
32204 IX86_BUILTIN_VFRCZPS,
32205 IX86_BUILTIN_VFRCZPD,
32206 IX86_BUILTIN_VFRCZPS256,
32207 IX86_BUILTIN_VFRCZPD256,
32209 IX86_BUILTIN_VPCOMEQUB,
32210 IX86_BUILTIN_VPCOMNEUB,
32211 IX86_BUILTIN_VPCOMLTUB,
32212 IX86_BUILTIN_VPCOMLEUB,
32213 IX86_BUILTIN_VPCOMGTUB,
32214 IX86_BUILTIN_VPCOMGEUB,
32215 IX86_BUILTIN_VPCOMFALSEUB,
32216 IX86_BUILTIN_VPCOMTRUEUB,
32218 IX86_BUILTIN_VPCOMEQUW,
32219 IX86_BUILTIN_VPCOMNEUW,
32220 IX86_BUILTIN_VPCOMLTUW,
32221 IX86_BUILTIN_VPCOMLEUW,
32222 IX86_BUILTIN_VPCOMGTUW,
32223 IX86_BUILTIN_VPCOMGEUW,
32224 IX86_BUILTIN_VPCOMFALSEUW,
32225 IX86_BUILTIN_VPCOMTRUEUW,
32227 IX86_BUILTIN_VPCOMEQUD,
32228 IX86_BUILTIN_VPCOMNEUD,
32229 IX86_BUILTIN_VPCOMLTUD,
32230 IX86_BUILTIN_VPCOMLEUD,
32231 IX86_BUILTIN_VPCOMGTUD,
32232 IX86_BUILTIN_VPCOMGEUD,
32233 IX86_BUILTIN_VPCOMFALSEUD,
32234 IX86_BUILTIN_VPCOMTRUEUD,
32236 IX86_BUILTIN_VPCOMEQUQ,
32237 IX86_BUILTIN_VPCOMNEUQ,
32238 IX86_BUILTIN_VPCOMLTUQ,
32239 IX86_BUILTIN_VPCOMLEUQ,
32240 IX86_BUILTIN_VPCOMGTUQ,
32241 IX86_BUILTIN_VPCOMGEUQ,
32242 IX86_BUILTIN_VPCOMFALSEUQ,
32243 IX86_BUILTIN_VPCOMTRUEUQ,
32245 IX86_BUILTIN_VPCOMEQB,
32246 IX86_BUILTIN_VPCOMNEB,
32247 IX86_BUILTIN_VPCOMLTB,
32248 IX86_BUILTIN_VPCOMLEB,
32249 IX86_BUILTIN_VPCOMGTB,
32250 IX86_BUILTIN_VPCOMGEB,
32251 IX86_BUILTIN_VPCOMFALSEB,
32252 IX86_BUILTIN_VPCOMTRUEB,
32254 IX86_BUILTIN_VPCOMEQW,
32255 IX86_BUILTIN_VPCOMNEW,
32256 IX86_BUILTIN_VPCOMLTW,
32257 IX86_BUILTIN_VPCOMLEW,
32258 IX86_BUILTIN_VPCOMGTW,
32259 IX86_BUILTIN_VPCOMGEW,
32260 IX86_BUILTIN_VPCOMFALSEW,
32261 IX86_BUILTIN_VPCOMTRUEW,
32263 IX86_BUILTIN_VPCOMEQD,
32264 IX86_BUILTIN_VPCOMNED,
32265 IX86_BUILTIN_VPCOMLTD,
32266 IX86_BUILTIN_VPCOMLED,
32267 IX86_BUILTIN_VPCOMGTD,
32268 IX86_BUILTIN_VPCOMGED,
32269 IX86_BUILTIN_VPCOMFALSED,
32270 IX86_BUILTIN_VPCOMTRUED,
32272 IX86_BUILTIN_VPCOMEQQ,
32273 IX86_BUILTIN_VPCOMNEQ,
32274 IX86_BUILTIN_VPCOMLTQ,
32275 IX86_BUILTIN_VPCOMLEQ,
32276 IX86_BUILTIN_VPCOMGTQ,
32277 IX86_BUILTIN_VPCOMGEQ,
32278 IX86_BUILTIN_VPCOMFALSEQ,
32279 IX86_BUILTIN_VPCOMTRUEQ,
32281 /* LWP instructions. */
32282 IX86_BUILTIN_LLWPCB,
32283 IX86_BUILTIN_SLWPCB,
32284 IX86_BUILTIN_LWPVAL32,
32285 IX86_BUILTIN_LWPVAL64,
32286 IX86_BUILTIN_LWPINS32,
32287 IX86_BUILTIN_LWPINS64,
32292 IX86_BUILTIN_XBEGIN,
32294 IX86_BUILTIN_XABORT,
32295 IX86_BUILTIN_XTEST,
32298 IX86_BUILTIN_BNDMK,
32299 IX86_BUILTIN_BNDSTX,
32300 IX86_BUILTIN_BNDLDX,
32301 IX86_BUILTIN_BNDCL,
32302 IX86_BUILTIN_BNDCU,
32303 IX86_BUILTIN_BNDRET,
32304 IX86_BUILTIN_BNDNARROW,
32305 IX86_BUILTIN_BNDINT,
32306 IX86_BUILTIN_SIZEOF,
32307 IX86_BUILTIN_BNDLOWER,
32308 IX86_BUILTIN_BNDUPPER,
32310 /* BMI instructions. */
32311 IX86_BUILTIN_BEXTR32,
32312 IX86_BUILTIN_BEXTR64,
32315 /* TBM instructions. */
32316 IX86_BUILTIN_BEXTRI32,
32317 IX86_BUILTIN_BEXTRI64,
32319 /* BMI2 instructions. */
32320 IX86_BUILTIN_BZHI32,
32321 IX86_BUILTIN_BZHI64,
32322 IX86_BUILTIN_PDEP32,
32323 IX86_BUILTIN_PDEP64,
32324 IX86_BUILTIN_PEXT32,
32325 IX86_BUILTIN_PEXT64,
32327 /* ADX instructions. */
32328 IX86_BUILTIN_ADDCARRYX32,
32329 IX86_BUILTIN_ADDCARRYX64,
32331 /* SBB instructions. */
32332 IX86_BUILTIN_SBB32,
32333 IX86_BUILTIN_SBB64,
32335 /* FSGSBASE instructions. */
32336 IX86_BUILTIN_RDFSBASE32,
32337 IX86_BUILTIN_RDFSBASE64,
32338 IX86_BUILTIN_RDGSBASE32,
32339 IX86_BUILTIN_RDGSBASE64,
32340 IX86_BUILTIN_WRFSBASE32,
32341 IX86_BUILTIN_WRFSBASE64,
32342 IX86_BUILTIN_WRGSBASE32,
32343 IX86_BUILTIN_WRGSBASE64,
32345 /* RDRND instructions. */
32346 IX86_BUILTIN_RDRAND16_STEP,
32347 IX86_BUILTIN_RDRAND32_STEP,
32348 IX86_BUILTIN_RDRAND64_STEP,
32350 /* RDSEED instructions. */
32351 IX86_BUILTIN_RDSEED16_STEP,
32352 IX86_BUILTIN_RDSEED32_STEP,
32353 IX86_BUILTIN_RDSEED64_STEP,
32355 /* F16C instructions. */
32356 IX86_BUILTIN_CVTPH2PS,
32357 IX86_BUILTIN_CVTPH2PS256,
32358 IX86_BUILTIN_CVTPS2PH,
32359 IX86_BUILTIN_CVTPS2PH256,
32361 /* MONITORX and MWAITX instrucions. */
32362 IX86_BUILTIN_MONITORX,
32363 IX86_BUILTIN_MWAITX,
32365 /* CFString built-in for darwin */
32366 IX86_BUILTIN_CFSTRING,
32368 /* Builtins to get CPU type and supported features. */
32369 IX86_BUILTIN_CPU_INIT,
32370 IX86_BUILTIN_CPU_IS,
32371 IX86_BUILTIN_CPU_SUPPORTS,
32373 /* Read/write FLAGS register built-ins. */
32374 IX86_BUILTIN_READ_FLAGS,
32375 IX86_BUILTIN_WRITE_FLAGS,
32377 /* PKU instructions. */
32378 IX86_BUILTIN_RDPKRU,
32379 IX86_BUILTIN_WRPKRU,
32384 /* Table for the ix86 builtin decls. */
32385 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
32387 /* Table of all of the builtin functions that are possible with different ISA's
32388 but are waiting to be built until a function is declared to use that
32390 struct builtin_isa {
32391 const char *name; /* function name */
32392 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
32393 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
32394 bool const_p; /* true if the declaration is constant */
32395 bool leaf_p; /* true if the declaration has leaf attribute */
32396 bool nothrow_p; /* true if the declaration has nothrow attribute */
32397 bool set_and_not_built_p;
32400 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
32402 /* Bits that can still enable any inclusion of a builtin. */
32403 static HOST_WIDE_INT deferred_isa_values = 0;
32405 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32406 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32407 function decl in the ix86_builtins array. Returns the function decl or
32408 NULL_TREE, if the builtin was not added.
32410 If the front end has a special hook for builtin functions, delay adding
32411 builtin functions that aren't in the current ISA until the ISA is changed
32412 with function specific optimization. Doing so, can save about 300K for the
32413 default compiler. When the builtin is expanded, check at that time whether
32416 If the front end doesn't have a special hook, record all builtins, even if
32417 it isn't an instruction set in the current ISA in case the user uses
32418 function specific options for a different ISA, so that we don't get scope
32419 errors if a builtin is added in the middle of a function scope. */
32422 def_builtin (HOST_WIDE_INT mask, const char *name,
32423 enum ix86_builtin_func_type tcode,
32424 enum ix86_builtins code)
32426 tree decl = NULL_TREE;
32428 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
32430 ix86_builtins_isa[(int) code].isa = mask;
32432 mask &= ~OPTION_MASK_ISA_64BIT;
32434 || (mask & ix86_isa_flags) != 0
32435 || (lang_hooks.builtin_function
32436 == lang_hooks.builtin_function_ext_scope))
32439 tree type = ix86_get_builtin_func_type (tcode);
32440 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32442 ix86_builtins[(int) code] = decl;
32443 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
32447 /* Just a MASK where set_and_not_built_p == true can potentially
32448 include a builtin. */
32449 deferred_isa_values |= mask;
32450 ix86_builtins[(int) code] = NULL_TREE;
32451 ix86_builtins_isa[(int) code].tcode = tcode;
32452 ix86_builtins_isa[(int) code].name = name;
32453 ix86_builtins_isa[(int) code].leaf_p = false;
32454 ix86_builtins_isa[(int) code].nothrow_p = false;
32455 ix86_builtins_isa[(int) code].const_p = false;
32456 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
32463 /* Like def_builtin, but also marks the function decl "const". */
32466 def_builtin_const (HOST_WIDE_INT mask, const char *name,
32467 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
32469 tree decl = def_builtin (mask, name, tcode, code);
32471 TREE_READONLY (decl) = 1;
32473 ix86_builtins_isa[(int) code].const_p = true;
32478 /* Add any new builtin functions for a given ISA that may not have been
32479 declared. This saves a bit of space compared to adding all of the
32480 declarations to the tree, even if we didn't use them. */
32483 ix86_add_new_builtins (HOST_WIDE_INT isa)
32485 if ((isa & deferred_isa_values) == 0)
32488 /* Bits in ISA value can be removed from potential isa values. */
32489 deferred_isa_values &= ~isa;
32492 tree saved_current_target_pragma = current_target_pragma;
32493 current_target_pragma = NULL_TREE;
32495 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
32497 if ((ix86_builtins_isa[i].isa & isa) != 0
32498 && ix86_builtins_isa[i].set_and_not_built_p)
32502 /* Don't define the builtin again. */
32503 ix86_builtins_isa[i].set_and_not_built_p = false;
32505 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
32506 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
32507 type, i, BUILT_IN_MD, NULL,
32510 ix86_builtins[i] = decl;
32511 if (ix86_builtins_isa[i].const_p)
32512 TREE_READONLY (decl) = 1;
32513 if (ix86_builtins_isa[i].leaf_p)
32514 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32516 if (ix86_builtins_isa[i].nothrow_p)
32517 TREE_NOTHROW (decl) = 1;
32521 current_target_pragma = saved_current_target_pragma;
32524 /* Bits for builtin_description.flag. */
32526 /* Set when we don't support the comparison natively, and should
32527 swap_comparison in order to support it. */
32528 #define BUILTIN_DESC_SWAP_OPERANDS 1
32530 struct builtin_description
32532 const HOST_WIDE_INT mask;
32533 const enum insn_code icode;
32534 const char *const name;
32535 const enum ix86_builtins code;
32536 const enum rtx_code comparison;
32540 static const struct builtin_description bdesc_comi[] =
32542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32544 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32568 static const struct builtin_description bdesc_pcmpestr[] =
32571 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32572 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32573 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32574 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32575 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32576 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32577 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32580 static const struct builtin_description bdesc_pcmpistr[] =
32583 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32584 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32585 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32586 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32587 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32588 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32589 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32592 /* Special builtins with variable number of arguments. */
32593 static const struct builtin_description bdesc_special_args[] =
32595 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32596 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32597 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32599 /* 80387 (for use internally for atomic compound assignment). */
32600 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32601 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32602 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32603 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32606 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32609 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32611 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32612 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32613 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32614 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32615 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32616 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32617 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32618 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32619 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32621 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32622 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32623 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32624 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32625 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32626 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32627 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32628 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32640 /* SSE or 3DNow!A */
32641 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32642 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32652 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32660 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32663 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32666 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32667 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32673 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32674 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32675 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32760 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32761 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32762 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32763 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32764 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32765 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32768 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32769 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32770 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32771 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32772 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32773 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32774 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32775 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32778 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32779 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32780 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32789 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32790 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32792 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32825 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32826 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32827 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32828 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32885 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32887 /* RDPKRU and WRPKRU. */
32888 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32889 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
32892 /* Builtins with variable number of arguments. */
32893 static const struct builtin_description bdesc_args[] =
32895 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32896 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32897 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32898 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32899 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32900 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32901 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32904 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32906 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32913 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32920 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32932 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32968 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32969 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32970 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32971 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32973 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32974 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32975 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32976 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32977 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32978 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32979 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32980 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32981 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32982 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32983 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32984 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32985 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32986 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32987 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32990 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32991 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32992 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32993 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32994 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32995 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
32999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33000 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33002 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
33005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
33006 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
33007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
33008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
33009 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
33011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33013 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33014 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33015 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
33023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
33024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
33025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
33029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
33030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
33031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
33033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
33035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
33036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
33037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
33039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
33040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
33041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33043 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33044 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33048 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33050 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33051 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33053 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33058 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33059 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
33062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
33063 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
33065 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
33067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33071 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
33072 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
33074 /* SSE MMX or 3Dnow!A */
33075 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33076 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33077 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33079 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33080 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33081 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33082 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33084 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33085 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33087 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33096 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33108 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33109 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33113 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33115 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33116 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33117 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33118 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
33129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33146 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33150 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33152 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33153 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33155 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33158 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33159 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33161 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33163 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33164 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33165 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33166 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33167 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33168 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33169 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33170 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33181 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33182 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
33184 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33186 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33187 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33199 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33200 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33201 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33204 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33205 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33206 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33207 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33208 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33209 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33210 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33211 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33217 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
33220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
33221 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
33225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
33226 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
33227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
33228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
33230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33231 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33232 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33233 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33234 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33235 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33236 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33239 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33240 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33241 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33242 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33243 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33244 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33246 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33247 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33248 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33249 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
33252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
33257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33260 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33261 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33264 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
33265 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33267 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33268 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33269 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33270 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33271 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33272 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33275 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33276 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
33277 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33278 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
33279 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33280 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33282 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33283 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33284 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33285 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33286 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33287 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33288 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33289 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33290 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33291 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33292 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33293 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
33295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
33296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
33309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
33312 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33313 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33314 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
33315 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
33316 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33317 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33318 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33319 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
33320 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
33321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
33323 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33324 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33325 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33326 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33327 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33328 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33329 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33331 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33332 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33333 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33334 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33335 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33339 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33344 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33351 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33352 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33353 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33354 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33356 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
33357 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
33358 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
33359 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
33361 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33362 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33364 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33365 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33367 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
33368 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
33369 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
33370 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
33372 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
33373 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
33375 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33376 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33378 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33379 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33380 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33383 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33384 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
33385 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
33386 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33387 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33390 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
33391 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
33392 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
33393 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33396 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
33397 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33399 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33400 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33401 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33402 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33405 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
33408 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33409 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33412 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33413 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33416 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33422 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33423 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33424 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33425 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33426 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33427 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33428 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33429 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33430 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33431 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33432 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33433 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
33436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
33437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
33438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
33443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
33444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
33457 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
33458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
33461 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33463 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
33472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
33473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
33475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33479 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33481 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33483 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
33491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
33492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
33493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
33495 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33496 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
33502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
33503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
33504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
33506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
33507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
33509 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33510 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33520 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
33521 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
33522 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
33524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33543 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33544 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33546 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33549 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33550 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33551 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33552 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33553 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33554 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33555 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33557 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33558 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33559 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33560 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33564 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33565 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33566 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33567 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33568 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33569 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33570 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33588 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33589 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33590 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33591 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33592 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33593 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33594 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33595 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33596 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33597 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33598 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33599 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33613 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33615 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33616 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33617 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33618 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33619 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33620 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33630 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33631 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33632 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33633 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33634 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33635 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33636 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33637 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33638 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33639 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33641 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33642 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33643 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33644 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33645 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33646 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33647 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33648 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33649 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33650 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33652 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33654 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33663 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33696 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33699 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33700 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33701 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33704 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33705 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33708 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33709 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33710 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33711 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33714 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33715 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33716 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33717 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33718 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33719 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33777 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33778 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33888 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33889 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33890 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33891 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33923 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33928 /* Mask arithmetic operations */
33929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33950 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33951 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33988 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33989 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33990 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33991 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33992 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33993 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33994 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33995 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33996 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33997 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33998 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33999 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34000 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
34005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
34006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
34007 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34008 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34009 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34010 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34011 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34012 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34013 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34018 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34019 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34020 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
34032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
34033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
34034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
34035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
34036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
34037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
34038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
34040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
34041 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
34042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
34043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
34045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
34046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
34047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
34049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
34051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
34053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
34055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
34057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34060 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
34061 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
34062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
34063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
34064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34076 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34077 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34088 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34089 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34090 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34091 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34092 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34093 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34094 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34095 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34096 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34098 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34099 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34114 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34115 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34118 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34119 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34122 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34123 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34127 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34128 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34129 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34130 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34131 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34134 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34136 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34137 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34186 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34187 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34188 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34189 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34207 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34208 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34209 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34211 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34212 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34213 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34215 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34216 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34217 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34218 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34219 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34220 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34221 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34222 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34223 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34224 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34268 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34269 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34270 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34271 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34332 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
34333 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
34334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
34337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
34338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
34339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
34340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34346 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34347 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34348 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34349 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34360 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34361 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34362 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34363 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34364 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34365 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34366 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34367 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34392 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34393 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34394 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34395 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34396 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34397 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34440 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
34441 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
34442 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
34443 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
34444 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
34445 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
34446 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
34447 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
34448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
34449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
34450 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
34451 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
34452 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
34453 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
34454 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
34455 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
34456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
34457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
34458 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
34459 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
34460 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34461 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34462 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34463 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34464 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34465 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34494 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34495 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34496 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
34497 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
34498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34543 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34544 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34554 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34555 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34556 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34557 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34565 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34566 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34567 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34568 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34586 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34588 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34594 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34597 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34598 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34599 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34600 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34601 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34602 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34664 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34665 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34666 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34667 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34668 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34669 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34670 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34671 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34672 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34673 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34674 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34675 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34676 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34677 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34678 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34679 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34680 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34681 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34682 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34683 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34684 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34685 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34686 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34687 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34688 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34689 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34690 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34691 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34692 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34693 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34694 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34697 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34698 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34699 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34700 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34705 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34706 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34707 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34708 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34709 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34710 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34711 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34712 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34713 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34714 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34715 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34716 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34717 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34718 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34719 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34720 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34721 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34741 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34746 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34747 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34748 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34749 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34750 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34751 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34790 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34791 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34792 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34793 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34794 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34795 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34796 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34797 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34798 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34799 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34800 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34801 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34804 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34805 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34806 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34807 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34808 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34809 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34810 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34811 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34812 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34813 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34814 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34815 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34816 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34817 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34818 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34821 /* Builtins with rounding support. */
34822 static const struct builtin_description bdesc_round_args[] =
34825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34844 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34846 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34853 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34855 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34905 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34907 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34909 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34911 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34913 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34915 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34917 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34919 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34946 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34947 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34948 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34949 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34950 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34951 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34952 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34953 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34954 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34955 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34958 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34959 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34960 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34961 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34962 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34963 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34964 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34965 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34966 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34967 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34968 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34969 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34970 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34971 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34972 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34973 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
34976 /* Bultins for MPX. */
34977 static const struct builtin_description bdesc_mpx[] =
34979 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
34980 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34981 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34984 /* Const builtins for MPX. */
34985 static const struct builtin_description bdesc_mpx_const[] =
34987 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
34988 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
34989 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
34990 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
34991 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
34992 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
34993 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
34994 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
34997 /* FMA4 and XOP. */
34998 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
34999 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
35000 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
35001 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
35002 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
35003 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
35004 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
35005 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
35006 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
35007 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
35008 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
35009 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
35010 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
35011 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
35012 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
35013 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
35014 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
35015 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
35016 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
35017 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
35018 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
35019 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
35020 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
35021 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
35022 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
35023 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
35024 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
35025 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
35026 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
35027 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
35028 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
35029 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
35030 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
35031 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
35032 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
35033 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
35034 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
35035 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
35036 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
35037 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
35038 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
35039 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
35040 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
35041 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
35042 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
35043 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
35044 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
35045 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
35046 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
35047 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
35048 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
35049 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
35051 static const struct builtin_description bdesc_multi_arg[] =
35053 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
35054 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
35055 UNKNOWN, (int)MULTI_ARG_3_SF },
35056 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
35057 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
35058 UNKNOWN, (int)MULTI_ARG_3_DF },
35060 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
35061 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
35062 UNKNOWN, (int)MULTI_ARG_3_SF },
35063 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
35064 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
35065 UNKNOWN, (int)MULTI_ARG_3_DF },
35067 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
35068 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
35069 UNKNOWN, (int)MULTI_ARG_3_SF },
35070 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
35071 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
35072 UNKNOWN, (int)MULTI_ARG_3_DF },
35073 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
35074 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
35075 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35076 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
35077 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
35078 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35080 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
35081 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35082 UNKNOWN, (int)MULTI_ARG_3_SF },
35083 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35084 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35085 UNKNOWN, (int)MULTI_ARG_3_DF },
35086 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35087 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35088 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35089 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35090 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35091 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
35126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
35127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
35128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
35129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
35130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
35131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
35132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
35134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
35135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
35136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
35138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
35139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
35141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
35142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
35143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
35144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
35145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
35146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
35148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
35165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
35168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
35169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
35170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
35172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
35173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
35176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
35177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
35178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
35180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
35181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
35184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
35185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
35186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
35188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
35192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
35193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
35194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
35196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
35197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
35200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
35201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
35202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
35204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
35205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
35208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
35209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
35210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
35212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
35213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
35216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
35217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
35218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
35220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
35224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
35225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
35226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
35228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
35247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
35248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
35249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
35253 /* TM vector builtins. */
35255 /* Reuse the existing x86-specific `struct builtin_description' cause
35256 we're lazy. Add casts to make them fit. */
35257 static const struct builtin_description bdesc_tm[] =
35259 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35260 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35261 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35262 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35263 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35264 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35265 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35267 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35268 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35269 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35270 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35271 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35272 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35273 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35275 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35276 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35277 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35278 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35279 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35280 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35281 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35283 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
35284 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
35285 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
35288 /* Initialize the transactional memory vector load/store builtins. */
35291 ix86_init_tm_builtins (void)
35293 enum ix86_builtin_func_type ftype;
35294 const struct builtin_description *d;
35297 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
35298 tree attrs_log, attrs_type_log;
35303 /* If there are no builtins defined, we must be compiling in a
35304 language without trans-mem support. */
35305 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
35308 /* Use whatever attributes a normal TM load has. */
35309 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
35310 attrs_load = DECL_ATTRIBUTES (decl);
35311 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35312 /* Use whatever attributes a normal TM store has. */
35313 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
35314 attrs_store = DECL_ATTRIBUTES (decl);
35315 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35316 /* Use whatever attributes a normal TM log has. */
35317 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
35318 attrs_log = DECL_ATTRIBUTES (decl);
35319 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35321 for (i = 0, d = bdesc_tm;
35322 i < ARRAY_SIZE (bdesc_tm);
35325 if ((d->mask & ix86_isa_flags) != 0
35326 || (lang_hooks.builtin_function
35327 == lang_hooks.builtin_function_ext_scope))
35329 tree type, attrs, attrs_type;
35330 enum built_in_function code = (enum built_in_function) d->code;
35332 ftype = (enum ix86_builtin_func_type) d->flag;
35333 type = ix86_get_builtin_func_type (ftype);
35335 if (BUILTIN_TM_LOAD_P (code))
35337 attrs = attrs_load;
35338 attrs_type = attrs_type_load;
35340 else if (BUILTIN_TM_STORE_P (code))
35342 attrs = attrs_store;
35343 attrs_type = attrs_type_store;
35348 attrs_type = attrs_type_log;
35350 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
35351 /* The builtin without the prefix for
35352 calling it directly. */
35353 d->name + strlen ("__builtin_"),
35355 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
35356 set the TYPE_ATTRIBUTES. */
35357 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
35359 set_builtin_decl (code, decl, false);
35364 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
35365 in the current target ISA to allow the user to compile particular modules
35366 with different target specific options that differ from the command line
35369 ix86_init_mmx_sse_builtins (void)
35371 const struct builtin_description * d;
35372 enum ix86_builtin_func_type ftype;
35375 /* Add all special builtins with variable number of operands. */
35376 for (i = 0, d = bdesc_special_args;
35377 i < ARRAY_SIZE (bdesc_special_args);
35383 ftype = (enum ix86_builtin_func_type) d->flag;
35384 def_builtin (d->mask, d->name, ftype, d->code);
35387 /* Add all builtins with variable number of operands. */
35388 for (i = 0, d = bdesc_args;
35389 i < ARRAY_SIZE (bdesc_args);
35395 ftype = (enum ix86_builtin_func_type) d->flag;
35396 def_builtin_const (d->mask, d->name, ftype, d->code);
35399 /* Add all builtins with rounding. */
35400 for (i = 0, d = bdesc_round_args;
35401 i < ARRAY_SIZE (bdesc_round_args);
35407 ftype = (enum ix86_builtin_func_type) d->flag;
35408 def_builtin_const (d->mask, d->name, ftype, d->code);
35411 /* pcmpestr[im] insns. */
35412 for (i = 0, d = bdesc_pcmpestr;
35413 i < ARRAY_SIZE (bdesc_pcmpestr);
35416 if (d->code == IX86_BUILTIN_PCMPESTRM128)
35417 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
35419 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
35420 def_builtin_const (d->mask, d->name, ftype, d->code);
35423 /* pcmpistr[im] insns. */
35424 for (i = 0, d = bdesc_pcmpistr;
35425 i < ARRAY_SIZE (bdesc_pcmpistr);
35428 if (d->code == IX86_BUILTIN_PCMPISTRM128)
35429 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
35431 ftype = INT_FTYPE_V16QI_V16QI_INT;
35432 def_builtin_const (d->mask, d->name, ftype, d->code);
35435 /* comi/ucomi insns. */
35436 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35438 if (d->mask == OPTION_MASK_ISA_SSE2)
35439 ftype = INT_FTYPE_V2DF_V2DF;
35441 ftype = INT_FTYPE_V4SF_V4SF;
35442 def_builtin_const (d->mask, d->name, ftype, d->code);
35446 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
35447 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
35448 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
35449 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
35451 /* SSE or 3DNow!A */
35452 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35453 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
35454 IX86_BUILTIN_MASKMOVQ);
35457 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
35458 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
35460 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
35461 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
35462 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
35463 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
35466 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
35467 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
35468 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
35469 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
35472 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
35473 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
35474 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
35475 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
35476 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
35477 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
35478 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
35479 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
35480 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
35481 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
35482 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
35483 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
35486 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
35487 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
35490 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35491 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35492 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35493 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35494 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35495 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35496 IX86_BUILTIN_RDRAND64_STEP);
35499 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35500 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35501 IX86_BUILTIN_GATHERSIV2DF);
35503 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35504 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35505 IX86_BUILTIN_GATHERSIV4DF);
35507 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35508 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35509 IX86_BUILTIN_GATHERDIV2DF);
35511 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35512 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35513 IX86_BUILTIN_GATHERDIV4DF);
35515 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35516 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35517 IX86_BUILTIN_GATHERSIV4SF);
35519 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35520 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35521 IX86_BUILTIN_GATHERSIV8SF);
35523 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35524 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35525 IX86_BUILTIN_GATHERDIV4SF);
35527 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35528 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35529 IX86_BUILTIN_GATHERDIV8SF);
35531 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35532 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35533 IX86_BUILTIN_GATHERSIV2DI);
35535 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35536 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35537 IX86_BUILTIN_GATHERSIV4DI);
35539 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35540 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35541 IX86_BUILTIN_GATHERDIV2DI);
35543 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35544 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35545 IX86_BUILTIN_GATHERDIV4DI);
35547 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35548 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35549 IX86_BUILTIN_GATHERSIV4SI);
35551 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35552 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35553 IX86_BUILTIN_GATHERSIV8SI);
35555 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35556 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35557 IX86_BUILTIN_GATHERDIV4SI);
35559 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35560 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35561 IX86_BUILTIN_GATHERDIV8SI);
35563 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35564 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35565 IX86_BUILTIN_GATHERALTSIV4DF);
35567 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35568 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35569 IX86_BUILTIN_GATHERALTDIV8SF);
35571 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35572 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35573 IX86_BUILTIN_GATHERALTSIV4DI);
35575 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35576 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35577 IX86_BUILTIN_GATHERALTDIV8SI);
35580 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35581 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35582 IX86_BUILTIN_GATHER3SIV16SF);
35584 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35585 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35586 IX86_BUILTIN_GATHER3SIV8DF);
35588 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35589 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35590 IX86_BUILTIN_GATHER3DIV16SF);
35592 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35593 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35594 IX86_BUILTIN_GATHER3DIV8DF);
35596 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35597 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35598 IX86_BUILTIN_GATHER3SIV16SI);
35600 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35601 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35602 IX86_BUILTIN_GATHER3SIV8DI);
35604 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35605 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35606 IX86_BUILTIN_GATHER3DIV16SI);
35608 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35609 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35610 IX86_BUILTIN_GATHER3DIV8DI);
35612 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35613 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35614 IX86_BUILTIN_GATHER3ALTSIV8DF);
35616 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35617 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35618 IX86_BUILTIN_GATHER3ALTDIV16SF);
35620 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35621 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35622 IX86_BUILTIN_GATHER3ALTSIV8DI);
35624 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35625 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35626 IX86_BUILTIN_GATHER3ALTDIV16SI);
35628 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35629 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35630 IX86_BUILTIN_SCATTERSIV16SF);
35632 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35633 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35634 IX86_BUILTIN_SCATTERSIV8DF);
35636 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35637 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35638 IX86_BUILTIN_SCATTERDIV16SF);
35640 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35641 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35642 IX86_BUILTIN_SCATTERDIV8DF);
35644 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35645 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35646 IX86_BUILTIN_SCATTERSIV16SI);
35648 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35649 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35650 IX86_BUILTIN_SCATTERSIV8DI);
35652 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35653 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35654 IX86_BUILTIN_SCATTERDIV16SI);
35656 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35657 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35658 IX86_BUILTIN_SCATTERDIV8DI);
35661 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35662 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35663 IX86_BUILTIN_GATHER3SIV2DF);
35665 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35666 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35667 IX86_BUILTIN_GATHER3SIV4DF);
35669 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35670 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35671 IX86_BUILTIN_GATHER3DIV2DF);
35673 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35674 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35675 IX86_BUILTIN_GATHER3DIV4DF);
35677 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35678 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35679 IX86_BUILTIN_GATHER3SIV4SF);
35681 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35682 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35683 IX86_BUILTIN_GATHER3SIV8SF);
35685 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35686 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35687 IX86_BUILTIN_GATHER3DIV4SF);
35689 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35690 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35691 IX86_BUILTIN_GATHER3DIV8SF);
35693 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35694 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35695 IX86_BUILTIN_GATHER3SIV2DI);
35697 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35698 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35699 IX86_BUILTIN_GATHER3SIV4DI);
35701 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35702 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35703 IX86_BUILTIN_GATHER3DIV2DI);
35705 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35706 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35707 IX86_BUILTIN_GATHER3DIV4DI);
35709 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35710 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35711 IX86_BUILTIN_GATHER3SIV4SI);
35713 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35714 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35715 IX86_BUILTIN_GATHER3SIV8SI);
35717 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35718 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35719 IX86_BUILTIN_GATHER3DIV4SI);
35721 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35722 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35723 IX86_BUILTIN_GATHER3DIV8SI);
35725 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35726 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35727 IX86_BUILTIN_GATHER3ALTSIV4DF);
35729 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35730 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35731 IX86_BUILTIN_GATHER3ALTDIV8SF);
35733 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35734 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35735 IX86_BUILTIN_GATHER3ALTSIV4DI);
35737 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35738 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35739 IX86_BUILTIN_GATHER3ALTDIV8SI);
35741 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35742 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35743 IX86_BUILTIN_SCATTERSIV8SF);
35745 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35746 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35747 IX86_BUILTIN_SCATTERSIV4SF);
35749 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35750 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35751 IX86_BUILTIN_SCATTERSIV4DF);
35753 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35754 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35755 IX86_BUILTIN_SCATTERSIV2DF);
35757 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35758 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35759 IX86_BUILTIN_SCATTERDIV8SF);
35761 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35762 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35763 IX86_BUILTIN_SCATTERDIV4SF);
35765 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35766 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35767 IX86_BUILTIN_SCATTERDIV4DF);
35769 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35770 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35771 IX86_BUILTIN_SCATTERDIV2DF);
35773 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35774 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35775 IX86_BUILTIN_SCATTERSIV8SI);
35777 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35778 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35779 IX86_BUILTIN_SCATTERSIV4SI);
35781 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35782 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35783 IX86_BUILTIN_SCATTERSIV4DI);
35785 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35786 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35787 IX86_BUILTIN_SCATTERSIV2DI);
35789 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35790 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35791 IX86_BUILTIN_SCATTERDIV8SI);
35793 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35794 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35795 IX86_BUILTIN_SCATTERDIV4SI);
35797 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35798 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35799 IX86_BUILTIN_SCATTERDIV4DI);
35801 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35802 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35803 IX86_BUILTIN_SCATTERDIV2DI);
35804 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35805 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35806 IX86_BUILTIN_SCATTERALTSIV8DF);
35808 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35809 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35810 IX86_BUILTIN_SCATTERALTDIV16SF);
35812 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35813 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35814 IX86_BUILTIN_SCATTERALTSIV8DI);
35816 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35817 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35818 IX86_BUILTIN_SCATTERALTDIV16SI);
35821 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35822 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35823 IX86_BUILTIN_GATHERPFDPD);
35824 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35825 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35826 IX86_BUILTIN_GATHERPFDPS);
35827 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35828 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35829 IX86_BUILTIN_GATHERPFQPD);
35830 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35831 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35832 IX86_BUILTIN_GATHERPFQPS);
35833 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35834 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35835 IX86_BUILTIN_SCATTERPFDPD);
35836 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35837 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35838 IX86_BUILTIN_SCATTERPFDPS);
35839 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35840 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35841 IX86_BUILTIN_SCATTERPFQPD);
35842 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35843 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35844 IX86_BUILTIN_SCATTERPFQPS);
35847 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35848 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35849 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35850 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35851 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35852 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35853 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35854 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35855 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35856 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35857 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35858 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35859 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35860 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35863 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35864 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35866 /* MMX access to the vec_init patterns. */
35867 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35868 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35870 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35871 V4HI_FTYPE_HI_HI_HI_HI,
35872 IX86_BUILTIN_VEC_INIT_V4HI);
35874 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35875 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35876 IX86_BUILTIN_VEC_INIT_V8QI);
35878 /* Access to the vec_extract patterns. */
35879 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35880 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35881 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35882 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35883 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35884 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35885 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35886 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35887 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35888 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35890 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35891 "__builtin_ia32_vec_ext_v4hi",
35892 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35894 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35895 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35897 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35898 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35900 /* Access to the vec_set patterns. */
35901 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35902 "__builtin_ia32_vec_set_v2di",
35903 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35905 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35906 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35908 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35909 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35911 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35912 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35914 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35915 "__builtin_ia32_vec_set_v4hi",
35916 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35918 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35919 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35922 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35923 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35924 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35925 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35926 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35927 "__builtin_ia32_rdseed_di_step",
35928 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35931 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35932 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35933 def_builtin (OPTION_MASK_ISA_64BIT,
35934 "__builtin_ia32_addcarryx_u64",
35935 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35936 IX86_BUILTIN_ADDCARRYX64);
35939 def_builtin (0, "__builtin_ia32_sbb_u32",
35940 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35941 def_builtin (OPTION_MASK_ISA_64BIT,
35942 "__builtin_ia32_sbb_u64",
35943 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35944 IX86_BUILTIN_SBB64);
35946 /* Read/write FLAGS. */
35947 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35948 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35949 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35950 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35951 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35952 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35953 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35954 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35957 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35958 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35961 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35962 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35964 /* MONITORX and MWAITX. */
35965 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35966 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35967 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35968 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35971 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
35972 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
35974 /* Add FMA4 multi-arg argument instructions */
35975 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35980 ftype = (enum ix86_builtin_func_type) d->flag;
35981 def_builtin_const (d->mask, d->name, ftype, d->code);
35986 ix86_init_mpx_builtins ()
35988 const struct builtin_description * d;
35989 enum ix86_builtin_func_type ftype;
35993 for (i = 0, d = bdesc_mpx;
35994 i < ARRAY_SIZE (bdesc_mpx);
36000 ftype = (enum ix86_builtin_func_type) d->flag;
36001 decl = def_builtin (d->mask, d->name, ftype, d->code);
36003 /* With no leaf and nothrow flags for MPX builtins
36004 abnormal edges may follow its call when setjmp
36005 presents in the function. Since we may have a lot
36006 of MPX builtins calls it causes lots of useless
36007 edges and enormous PHI nodes. To avoid this we mark
36008 MPX builtins as leaf and nothrow. */
36011 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36013 TREE_NOTHROW (decl) = 1;
36017 ix86_builtins_isa[(int)d->code].leaf_p = true;
36018 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36022 for (i = 0, d = bdesc_mpx_const;
36023 i < ARRAY_SIZE (bdesc_mpx_const);
36029 ftype = (enum ix86_builtin_func_type) d->flag;
36030 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
36034 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36036 TREE_NOTHROW (decl) = 1;
36040 ix86_builtins_isa[(int)d->code].leaf_p = true;
36041 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36046 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36047 to return a pointer to VERSION_DECL if the outcome of the expression
36048 formed by PREDICATE_CHAIN is true. This function will be called during
36049 version dispatch to decide which function version to execute. It returns
36050 the basic block at the end, to which more conditions can be added. */
36053 add_condition_to_bb (tree function_decl, tree version_decl,
36054 tree predicate_chain, basic_block new_bb)
36056 gimple *return_stmt;
36057 tree convert_expr, result_var;
36058 gimple *convert_stmt;
36059 gimple *call_cond_stmt;
36060 gimple *if_else_stmt;
36062 basic_block bb1, bb2, bb3;
36065 tree cond_var, and_expr_var = NULL_TREE;
36068 tree predicate_decl, predicate_arg;
36070 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36072 gcc_assert (new_bb != NULL);
36073 gseq = bb_seq (new_bb);
36076 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36077 build_fold_addr_expr (version_decl));
36078 result_var = create_tmp_var (ptr_type_node);
36079 convert_stmt = gimple_build_assign (result_var, convert_expr);
36080 return_stmt = gimple_build_return (result_var);
36082 if (predicate_chain == NULL_TREE)
36084 gimple_seq_add_stmt (&gseq, convert_stmt);
36085 gimple_seq_add_stmt (&gseq, return_stmt);
36086 set_bb_seq (new_bb, gseq);
36087 gimple_set_bb (convert_stmt, new_bb);
36088 gimple_set_bb (return_stmt, new_bb);
36093 while (predicate_chain != NULL)
36095 cond_var = create_tmp_var (integer_type_node);
36096 predicate_decl = TREE_PURPOSE (predicate_chain);
36097 predicate_arg = TREE_VALUE (predicate_chain);
36098 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36099 gimple_call_set_lhs (call_cond_stmt, cond_var);
36101 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36102 gimple_set_bb (call_cond_stmt, new_bb);
36103 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36105 predicate_chain = TREE_CHAIN (predicate_chain);
36107 if (and_expr_var == NULL)
36108 and_expr_var = cond_var;
36111 gimple *assign_stmt;
36112 /* Use MIN_EXPR to check if any integer is zero?.
36113 and_expr_var = min_expr <cond_var, and_expr_var> */
36114 assign_stmt = gimple_build_assign (and_expr_var,
36115 build2 (MIN_EXPR, integer_type_node,
36116 cond_var, and_expr_var));
36118 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
36119 gimple_set_bb (assign_stmt, new_bb);
36120 gimple_seq_add_stmt (&gseq, assign_stmt);
36124 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
36126 NULL_TREE, NULL_TREE);
36127 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36128 gimple_set_bb (if_else_stmt, new_bb);
36129 gimple_seq_add_stmt (&gseq, if_else_stmt);
36131 gimple_seq_add_stmt (&gseq, convert_stmt);
36132 gimple_seq_add_stmt (&gseq, return_stmt);
36133 set_bb_seq (new_bb, gseq);
36136 e12 = split_block (bb1, if_else_stmt);
36138 e12->flags &= ~EDGE_FALLTHRU;
36139 e12->flags |= EDGE_TRUE_VALUE;
36141 e23 = split_block (bb2, return_stmt);
36143 gimple_set_bb (convert_stmt, bb2);
36144 gimple_set_bb (return_stmt, bb2);
36147 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36150 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36157 /* This parses the attribute arguments to target in DECL and determines
36158 the right builtin to use to match the platform specification.
36159 It returns the priority value for this version decl. If PREDICATE_LIST
36160 is not NULL, it stores the list of cpu features that need to be checked
36161 before dispatching this function. */
36163 static unsigned int
36164 get_builtin_code_for_version (tree decl, tree *predicate_list)
36167 struct cl_target_option cur_target;
36169 struct cl_target_option *new_target;
36170 const char *arg_str = NULL;
36171 const char *attrs_str = NULL;
36172 char *tok_str = NULL;
36175 /* Priority of i386 features, greater value is higher priority. This is
36176 used to decide the order in which function dispatch must happen. For
36177 instance, a version specialized for SSE4.2 should be checked for dispatch
36178 before a version for SSE3, as SSE4.2 implies SSE3. */
36179 enum feature_priority
36212 enum feature_priority priority = P_ZERO;
36214 /* These are the target attribute strings for which a dispatcher is
36215 available, from fold_builtin_cpu. */
36217 static struct _feature_list
36219 const char *const name;
36220 const enum feature_priority priority;
36222 const feature_list[] =
36228 {"sse4a", P_SSE4_A},
36229 {"ssse3", P_SSSE3},
36230 {"sse4.1", P_SSE4_1},
36231 {"sse4.2", P_SSE4_2},
36232 {"popcnt", P_POPCNT},
36234 {"pclmul", P_PCLMUL},
36242 {"avx512f", P_AVX512F}
36246 static unsigned int NUM_FEATURES
36247 = sizeof (feature_list) / sizeof (struct _feature_list);
36251 tree predicate_chain = NULL_TREE;
36252 tree predicate_decl, predicate_arg;
36254 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36255 gcc_assert (attrs != NULL);
36257 attrs = TREE_VALUE (TREE_VALUE (attrs));
36259 gcc_assert (TREE_CODE (attrs) == STRING_CST);
36260 attrs_str = TREE_STRING_POINTER (attrs);
36262 /* Return priority zero for default function. */
36263 if (strcmp (attrs_str, "default") == 0)
36266 /* Handle arch= if specified. For priority, set it to be 1 more than
36267 the best instruction set the processor can handle. For instance, if
36268 there is a version for atom and a version for ssse3 (the highest ISA
36269 priority for atom), the atom version must be checked for dispatch
36270 before the ssse3 version. */
36271 if (strstr (attrs_str, "arch=") != NULL)
36273 cl_target_option_save (&cur_target, &global_options);
36274 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
36275 &global_options_set);
36277 gcc_assert (target_node);
36278 new_target = TREE_TARGET_OPTION (target_node);
36279 gcc_assert (new_target);
36281 if (new_target->arch_specified && new_target->arch > 0)
36283 switch (new_target->arch)
36285 case PROCESSOR_CORE2:
36287 priority = P_PROC_SSSE3;
36289 case PROCESSOR_NEHALEM:
36290 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
36291 arg_str = "westmere";
36293 /* We translate "arch=corei7" and "arch=nehalem" to
36294 "corei7" so that it will be mapped to M_INTEL_COREI7
36295 as cpu type to cover all M_INTEL_COREI7_XXXs. */
36296 arg_str = "corei7";
36297 priority = P_PROC_SSE4_2;
36299 case PROCESSOR_SANDYBRIDGE:
36300 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
36301 arg_str = "ivybridge";
36303 arg_str = "sandybridge";
36304 priority = P_PROC_AVX;
36306 case PROCESSOR_HASWELL:
36307 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
36308 arg_str = "skylake-avx512";
36309 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
36310 arg_str = "skylake";
36311 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
36312 arg_str = "broadwell";
36314 arg_str = "haswell";
36315 priority = P_PROC_AVX2;
36317 case PROCESSOR_BONNELL:
36318 arg_str = "bonnell";
36319 priority = P_PROC_SSSE3;
36321 case PROCESSOR_KNL:
36323 priority = P_PROC_AVX512F;
36325 case PROCESSOR_SILVERMONT:
36326 arg_str = "silvermont";
36327 priority = P_PROC_SSE4_2;
36329 case PROCESSOR_AMDFAM10:
36330 arg_str = "amdfam10h";
36331 priority = P_PROC_SSE4_A;
36333 case PROCESSOR_BTVER1:
36334 arg_str = "btver1";
36335 priority = P_PROC_SSE4_A;
36337 case PROCESSOR_BTVER2:
36338 arg_str = "btver2";
36339 priority = P_PROC_BMI;
36341 case PROCESSOR_BDVER1:
36342 arg_str = "bdver1";
36343 priority = P_PROC_XOP;
36345 case PROCESSOR_BDVER2:
36346 arg_str = "bdver2";
36347 priority = P_PROC_FMA;
36349 case PROCESSOR_BDVER3:
36350 arg_str = "bdver3";
36351 priority = P_PROC_FMA;
36353 case PROCESSOR_BDVER4:
36354 arg_str = "bdver4";
36355 priority = P_PROC_AVX2;
36357 case PROCESSOR_ZNVER1:
36358 arg_str = "znver1";
36359 priority = P_PROC_AVX2;
36364 cl_target_option_restore (&global_options, &cur_target);
36366 if (predicate_list && arg_str == NULL)
36368 error_at (DECL_SOURCE_LOCATION (decl),
36369 "No dispatcher found for the versioning attributes");
36373 if (predicate_list)
36375 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
36376 /* For a C string literal the length includes the trailing NULL. */
36377 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36378 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36383 /* Process feature name. */
36384 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
36385 strcpy (tok_str, attrs_str);
36386 token = strtok (tok_str, ",");
36387 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
36389 while (token != NULL)
36391 /* Do not process "arch=" */
36392 if (strncmp (token, "arch=", 5) == 0)
36394 token = strtok (NULL, ",");
36397 for (i = 0; i < NUM_FEATURES; ++i)
36399 if (strcmp (token, feature_list[i].name) == 0)
36401 if (predicate_list)
36403 predicate_arg = build_string_literal (
36404 strlen (feature_list[i].name) + 1,
36405 feature_list[i].name);
36406 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36409 /* Find the maximum priority feature. */
36410 if (feature_list[i].priority > priority)
36411 priority = feature_list[i].priority;
36416 if (predicate_list && i == NUM_FEATURES)
36418 error_at (DECL_SOURCE_LOCATION (decl),
36419 "No dispatcher found for %s", token);
36422 token = strtok (NULL, ",");
36426 if (predicate_list && predicate_chain == NULL_TREE)
36428 error_at (DECL_SOURCE_LOCATION (decl),
36429 "No dispatcher found for the versioning attributes : %s",
36433 else if (predicate_list)
36435 predicate_chain = nreverse (predicate_chain);
36436 *predicate_list = predicate_chain;
36442 /* This compares the priority of target features in function DECL1
36443 and DECL2. It returns positive value if DECL1 is higher priority,
36444 negative value if DECL2 is higher priority and 0 if they are the
36448 ix86_compare_version_priority (tree decl1, tree decl2)
36450 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
36451 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
36453 return (int)priority1 - (int)priority2;
36456 /* V1 and V2 point to function versions with different priorities
36457 based on the target ISA. This function compares their priorities. */
36460 feature_compare (const void *v1, const void *v2)
36462 typedef struct _function_version_info
36465 tree predicate_chain;
36466 unsigned int dispatch_priority;
36467 } function_version_info;
36469 const function_version_info c1 = *(const function_version_info *)v1;
36470 const function_version_info c2 = *(const function_version_info *)v2;
36471 return (c2.dispatch_priority - c1.dispatch_priority);
36474 /* This function generates the dispatch function for
36475 multi-versioned functions. DISPATCH_DECL is the function which will
36476 contain the dispatch logic. FNDECLS are the function choices for
36477 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
36478 in DISPATCH_DECL in which the dispatch code is generated. */
36481 dispatch_function_versions (tree dispatch_decl,
36483 basic_block *empty_bb)
36486 gimple *ifunc_cpu_init_stmt;
36490 vec<tree> *fndecls;
36491 unsigned int num_versions = 0;
36492 unsigned int actual_versions = 0;
36495 struct _function_version_info
36498 tree predicate_chain;
36499 unsigned int dispatch_priority;
36500 }*function_version_info;
36502 gcc_assert (dispatch_decl != NULL
36503 && fndecls_p != NULL
36504 && empty_bb != NULL);
36506 /*fndecls_p is actually a vector. */
36507 fndecls = static_cast<vec<tree> *> (fndecls_p);
36509 /* At least one more version other than the default. */
36510 num_versions = fndecls->length ();
36511 gcc_assert (num_versions >= 2);
36513 function_version_info = (struct _function_version_info *)
36514 XNEWVEC (struct _function_version_info, (num_versions - 1));
36516 /* The first version in the vector is the default decl. */
36517 default_decl = (*fndecls)[0];
36519 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36521 gseq = bb_seq (*empty_bb);
36522 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36523 constructors, so explicity call __builtin_cpu_init here. */
36524 ifunc_cpu_init_stmt = gimple_build_call_vec (
36525 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36526 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36527 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36528 set_bb_seq (*empty_bb, gseq);
36533 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36535 tree version_decl = ele;
36536 tree predicate_chain = NULL_TREE;
36537 unsigned int priority;
36538 /* Get attribute string, parse it and find the right predicate decl.
36539 The predicate function could be a lengthy combination of many
36540 features, like arch-type and various isa-variants. */
36541 priority = get_builtin_code_for_version (version_decl,
36544 if (predicate_chain == NULL_TREE)
36547 function_version_info [actual_versions].version_decl = version_decl;
36548 function_version_info [actual_versions].predicate_chain
36550 function_version_info [actual_versions].dispatch_priority = priority;
36554 /* Sort the versions according to descending order of dispatch priority. The
36555 priority is based on the ISA. This is not a perfect solution. There
36556 could still be ambiguity. If more than one function version is suitable
36557 to execute, which one should be dispatched? In future, allow the user
36558 to specify a dispatch priority next to the version. */
36559 qsort (function_version_info, actual_versions,
36560 sizeof (struct _function_version_info), feature_compare);
36562 for (i = 0; i < actual_versions; ++i)
36563 *empty_bb = add_condition_to_bb (dispatch_decl,
36564 function_version_info[i].version_decl,
36565 function_version_info[i].predicate_chain,
36568 /* dispatch default version at the end. */
36569 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36572 free (function_version_info);
36576 /* Comparator function to be used in qsort routine to sort attribute
36577 specification strings to "target". */
36580 attr_strcmp (const void *v1, const void *v2)
36582 const char *c1 = *(char *const*)v1;
36583 const char *c2 = *(char *const*)v2;
36584 return strcmp (c1, c2);
36587 /* ARGLIST is the argument to target attribute. This function tokenizes
36588 the comma separated arguments, sorts them and returns a string which
36589 is a unique identifier for the comma separated arguments. It also
36590 replaces non-identifier characters "=,-" with "_". */
36593 sorted_attr_string (tree arglist)
36596 size_t str_len_sum = 0;
36597 char **args = NULL;
36598 char *attr_str, *ret_str;
36600 unsigned int argnum = 1;
36603 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36605 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36606 size_t len = strlen (str);
36607 str_len_sum += len + 1;
36608 if (arg != arglist)
36610 for (i = 0; i < strlen (str); i++)
36615 attr_str = XNEWVEC (char, str_len_sum);
36617 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36619 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36620 size_t len = strlen (str);
36621 memcpy (attr_str + str_len_sum, str, len);
36622 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36623 str_len_sum += len + 1;
36626 /* Replace "=,-" with "_". */
36627 for (i = 0; i < strlen (attr_str); i++)
36628 if (attr_str[i] == '=' || attr_str[i]== '-')
36634 args = XNEWVEC (char *, argnum);
36637 attr = strtok (attr_str, ",");
36638 while (attr != NULL)
36642 attr = strtok (NULL, ",");
36645 qsort (args, argnum, sizeof (char *), attr_strcmp);
36647 ret_str = XNEWVEC (char, str_len_sum);
36649 for (i = 0; i < argnum; i++)
36651 size_t len = strlen (args[i]);
36652 memcpy (ret_str + str_len_sum, args[i], len);
36653 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36654 str_len_sum += len + 1;
36658 XDELETEVEC (attr_str);
36662 /* This function changes the assembler name for functions that are
36663 versions. If DECL is a function version and has a "target"
36664 attribute, it appends the attribute string to its assembler name. */
36667 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36670 const char *orig_name, *version_string;
36671 char *attr_str, *assembler_name;
36673 if (DECL_DECLARED_INLINE_P (decl)
36674 && lookup_attribute ("gnu_inline",
36675 DECL_ATTRIBUTES (decl)))
36676 error_at (DECL_SOURCE_LOCATION (decl),
36677 "Function versions cannot be marked as gnu_inline,"
36678 " bodies have to be generated");
36680 if (DECL_VIRTUAL_P (decl)
36681 || DECL_VINDEX (decl))
36682 sorry ("Virtual function multiversioning not supported");
36684 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36686 /* target attribute string cannot be NULL. */
36687 gcc_assert (version_attr != NULL_TREE);
36689 orig_name = IDENTIFIER_POINTER (id);
36691 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36693 if (strcmp (version_string, "default") == 0)
36696 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36697 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36699 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36701 /* Allow assembler name to be modified if already set. */
36702 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36703 SET_DECL_RTL (decl, NULL);
36705 tree ret = get_identifier (assembler_name);
36706 XDELETEVEC (attr_str);
36707 XDELETEVEC (assembler_name);
36711 /* This function returns true if FN1 and FN2 are versions of the same function,
36712 that is, the target strings of the function decls are different. This assumes
36713 that FN1 and FN2 have the same signature. */
36716 ix86_function_versions (tree fn1, tree fn2)
36719 char *target1, *target2;
36722 if (TREE_CODE (fn1) != FUNCTION_DECL
36723 || TREE_CODE (fn2) != FUNCTION_DECL)
36726 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36727 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36729 /* At least one function decl should have the target attribute specified. */
36730 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36733 /* Diagnose missing target attribute if one of the decls is already
36734 multi-versioned. */
36735 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36737 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36739 if (attr2 != NULL_TREE)
36741 std::swap (fn1, fn2);
36744 error_at (DECL_SOURCE_LOCATION (fn2),
36745 "missing %<target%> attribute for multi-versioned %D",
36747 inform (DECL_SOURCE_LOCATION (fn1),
36748 "previous declaration of %D", fn1);
36749 /* Prevent diagnosing of the same error multiple times. */
36750 DECL_ATTRIBUTES (fn2)
36751 = tree_cons (get_identifier ("target"),
36752 copy_node (TREE_VALUE (attr1)),
36753 DECL_ATTRIBUTES (fn2));
36758 target1 = sorted_attr_string (TREE_VALUE (attr1));
36759 target2 = sorted_attr_string (TREE_VALUE (attr2));
36761 /* The sorted target strings must be different for fn1 and fn2
36763 if (strcmp (target1, target2) == 0)
36768 XDELETEVEC (target1);
36769 XDELETEVEC (target2);
36775 ix86_mangle_decl_assembler_name (tree decl, tree id)
36777 /* For function version, add the target suffix to the assembler name. */
36778 if (TREE_CODE (decl) == FUNCTION_DECL
36779 && DECL_FUNCTION_VERSIONED (decl))
36780 id = ix86_mangle_function_version_assembler_name (decl, id);
36781 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36782 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36788 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36789 is true, append the full path name of the source file. */
36792 make_name (tree decl, const char *suffix, bool make_unique)
36794 char *global_var_name;
36797 const char *unique_name = NULL;
36799 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36801 /* Get a unique name that can be used globally without any chances
36802 of collision at link time. */
36804 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36806 name_len = strlen (name) + strlen (suffix) + 2;
36809 name_len += strlen (unique_name) + 1;
36810 global_var_name = XNEWVEC (char, name_len);
36812 /* Use '.' to concatenate names as it is demangler friendly. */
36814 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36817 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36819 return global_var_name;
36822 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36824 /* Make a dispatcher declaration for the multi-versioned function DECL.
36825 Calls to DECL function will be replaced with calls to the dispatcher
36826 by the front-end. Return the decl created. */
36829 make_dispatcher_decl (const tree decl)
36833 tree fn_type, func_type;
36834 bool is_uniq = false;
36836 if (TREE_PUBLIC (decl) == 0)
36839 func_name = make_name (decl, "ifunc", is_uniq);
36841 fn_type = TREE_TYPE (decl);
36842 func_type = build_function_type (TREE_TYPE (fn_type),
36843 TYPE_ARG_TYPES (fn_type));
36845 func_decl = build_fn_decl (func_name, func_type);
36846 XDELETEVEC (func_name);
36847 TREE_USED (func_decl) = 1;
36848 DECL_CONTEXT (func_decl) = NULL_TREE;
36849 DECL_INITIAL (func_decl) = error_mark_node;
36850 DECL_ARTIFICIAL (func_decl) = 1;
36851 /* Mark this func as external, the resolver will flip it again if
36852 it gets generated. */
36853 DECL_EXTERNAL (func_decl) = 1;
36854 /* This will be of type IFUNCs have to be externally visible. */
36855 TREE_PUBLIC (func_decl) = 1;
36862 /* Returns true if decl is multi-versioned and DECL is the default function,
36863 that is it is not tagged with target specific optimization. */
36866 is_function_default_version (const tree decl)
36868 if (TREE_CODE (decl) != FUNCTION_DECL
36869 || !DECL_FUNCTION_VERSIONED (decl))
36871 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36873 attr = TREE_VALUE (TREE_VALUE (attr));
36874 return (TREE_CODE (attr) == STRING_CST
36875 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36878 /* Make a dispatcher declaration for the multi-versioned function DECL.
36879 Calls to DECL function will be replaced with calls to the dispatcher
36880 by the front-end. Returns the decl of the dispatcher function. */
36883 ix86_get_function_versions_dispatcher (void *decl)
36885 tree fn = (tree) decl;
36886 struct cgraph_node *node = NULL;
36887 struct cgraph_node *default_node = NULL;
36888 struct cgraph_function_version_info *node_v = NULL;
36889 struct cgraph_function_version_info *first_v = NULL;
36891 tree dispatch_decl = NULL;
36893 struct cgraph_function_version_info *default_version_info = NULL;
36895 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36897 node = cgraph_node::get (fn);
36898 gcc_assert (node != NULL);
36900 node_v = node->function_version ();
36901 gcc_assert (node_v != NULL);
36903 if (node_v->dispatcher_resolver != NULL)
36904 return node_v->dispatcher_resolver;
36906 /* Find the default version and make it the first node. */
36908 /* Go to the beginning of the chain. */
36909 while (first_v->prev != NULL)
36910 first_v = first_v->prev;
36911 default_version_info = first_v;
36912 while (default_version_info != NULL)
36914 if (is_function_default_version
36915 (default_version_info->this_node->decl))
36917 default_version_info = default_version_info->next;
36920 /* If there is no default node, just return NULL. */
36921 if (default_version_info == NULL)
36924 /* Make default info the first node. */
36925 if (first_v != default_version_info)
36927 default_version_info->prev->next = default_version_info->next;
36928 if (default_version_info->next)
36929 default_version_info->next->prev = default_version_info->prev;
36930 first_v->prev = default_version_info;
36931 default_version_info->next = first_v;
36932 default_version_info->prev = NULL;
36935 default_node = default_version_info->this_node;
36937 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36938 if (targetm.has_ifunc_p ())
36940 struct cgraph_function_version_info *it_v = NULL;
36941 struct cgraph_node *dispatcher_node = NULL;
36942 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36944 /* Right now, the dispatching is done via ifunc. */
36945 dispatch_decl = make_dispatcher_decl (default_node->decl);
36947 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36948 gcc_assert (dispatcher_node != NULL);
36949 dispatcher_node->dispatcher_function = 1;
36950 dispatcher_version_info
36951 = dispatcher_node->insert_new_function_version ();
36952 dispatcher_version_info->next = default_version_info;
36953 dispatcher_node->definition = 1;
36955 /* Set the dispatcher for all the versions. */
36956 it_v = default_version_info;
36957 while (it_v != NULL)
36959 it_v->dispatcher_resolver = dispatch_decl;
36966 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36967 "multiversioning needs ifunc which is not supported "
36971 return dispatch_decl;
36974 /* Make the resolver function decl to dispatch the versions of
36975 a multi-versioned function, DEFAULT_DECL. Create an
36976 empty basic block in the resolver and store the pointer in
36977 EMPTY_BB. Return the decl of the resolver function. */
36980 make_resolver_func (const tree default_decl,
36981 const tree dispatch_decl,
36982 basic_block *empty_bb)
36984 char *resolver_name;
36985 tree decl, type, decl_name, t;
36986 bool is_uniq = false;
36988 /* IFUNC's have to be globally visible. So, if the default_decl is
36989 not, then the name of the IFUNC should be made unique. */
36990 if (TREE_PUBLIC (default_decl) == 0)
36993 /* Append the filename to the resolver function if the versions are
36994 not externally visible. This is because the resolver function has
36995 to be externally visible for the loader to find it. So, appending
36996 the filename will prevent conflicts with a resolver function from
36997 another module which is based on the same version name. */
36998 resolver_name = make_name (default_decl, "resolver", is_uniq);
37000 /* The resolver function should return a (void *). */
37001 type = build_function_type_list (ptr_type_node, NULL_TREE);
37003 decl = build_fn_decl (resolver_name, type);
37004 decl_name = get_identifier (resolver_name);
37005 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37007 DECL_NAME (decl) = decl_name;
37008 TREE_USED (decl) = 1;
37009 DECL_ARTIFICIAL (decl) = 1;
37010 DECL_IGNORED_P (decl) = 0;
37011 /* IFUNC resolvers have to be externally visible. */
37012 TREE_PUBLIC (decl) = 1;
37013 DECL_UNINLINABLE (decl) = 1;
37015 /* Resolver is not external, body is generated. */
37016 DECL_EXTERNAL (decl) = 0;
37017 DECL_EXTERNAL (dispatch_decl) = 0;
37019 DECL_CONTEXT (decl) = NULL_TREE;
37020 DECL_INITIAL (decl) = make_node (BLOCK);
37021 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37023 if (DECL_COMDAT_GROUP (default_decl)
37024 || TREE_PUBLIC (default_decl))
37026 /* In this case, each translation unit with a call to this
37027 versioned function will put out a resolver. Ensure it
37028 is comdat to keep just one copy. */
37029 DECL_COMDAT (decl) = 1;
37030 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
37032 /* Build result decl and add to function_decl. */
37033 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37034 DECL_ARTIFICIAL (t) = 1;
37035 DECL_IGNORED_P (t) = 1;
37036 DECL_RESULT (decl) = t;
37038 gimplify_function_tree (decl);
37039 push_cfun (DECL_STRUCT_FUNCTION (decl));
37040 *empty_bb = init_lowered_empty_function (decl, false, 0);
37042 cgraph_node::add_new_function (decl, true);
37043 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37047 gcc_assert (dispatch_decl != NULL);
37048 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37049 DECL_ATTRIBUTES (dispatch_decl)
37050 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37052 /* Create the alias for dispatch to resolver here. */
37053 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37054 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37055 XDELETEVEC (resolver_name);
37059 /* Generate the dispatching code body to dispatch multi-versioned function
37060 DECL. The target hook is called to process the "target" attributes and
37061 provide the code to dispatch the right function at run-time. NODE points
37062 to the dispatcher decl whose body will be created. */
37065 ix86_generate_version_dispatcher_body (void *node_p)
37067 tree resolver_decl;
37068 basic_block empty_bb;
37069 tree default_ver_decl;
37070 struct cgraph_node *versn;
37071 struct cgraph_node *node;
37073 struct cgraph_function_version_info *node_version_info = NULL;
37074 struct cgraph_function_version_info *versn_info = NULL;
37076 node = (cgraph_node *)node_p;
37078 node_version_info = node->function_version ();
37079 gcc_assert (node->dispatcher_function
37080 && node_version_info != NULL);
37082 if (node_version_info->dispatcher_resolver)
37083 return node_version_info->dispatcher_resolver;
37085 /* The first version in the chain corresponds to the default version. */
37086 default_ver_decl = node_version_info->next->this_node->decl;
37088 /* node is going to be an alias, so remove the finalized bit. */
37089 node->definition = false;
37091 resolver_decl = make_resolver_func (default_ver_decl,
37092 node->decl, &empty_bb);
37094 node_version_info->dispatcher_resolver = resolver_decl;
37096 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37098 auto_vec<tree, 2> fn_ver_vec;
37100 for (versn_info = node_version_info->next; versn_info;
37101 versn_info = versn_info->next)
37103 versn = versn_info->this_node;
37104 /* Check for virtual functions here again, as by this time it should
37105 have been determined if this function needs a vtable index or
37106 not. This happens for methods in derived classes that override
37107 virtual methods in base classes but are not explicitly marked as
37109 if (DECL_VINDEX (versn->decl))
37110 sorry ("Virtual function multiversioning not supported");
37112 fn_ver_vec.safe_push (versn->decl);
37115 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37116 cgraph_edge::rebuild_edges ();
37118 return resolver_decl;
37120 /* This builds the processor_model struct type defined in
37121 libgcc/config/i386/cpuinfo.c */
37124 build_processor_model_struct (void)
37126 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
37128 tree field = NULL_TREE, field_chain = NULL_TREE;
37130 tree type = make_node (RECORD_TYPE);
37132 /* The first 3 fields are unsigned int. */
37133 for (i = 0; i < 3; ++i)
37135 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37136 get_identifier (field_name[i]), unsigned_type_node);
37137 if (field_chain != NULL_TREE)
37138 DECL_CHAIN (field) = field_chain;
37139 field_chain = field;
37142 /* The last field is an array of unsigned integers of size one. */
37143 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37144 get_identifier (field_name[3]),
37145 build_array_type (unsigned_type_node,
37146 build_index_type (size_one_node)));
37147 if (field_chain != NULL_TREE)
37148 DECL_CHAIN (field) = field_chain;
37149 field_chain = field;
37151 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
37155 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
37158 make_var_decl (tree type, const char *name)
37162 new_decl = build_decl (UNKNOWN_LOCATION,
37164 get_identifier(name),
37167 DECL_EXTERNAL (new_decl) = 1;
37168 TREE_STATIC (new_decl) = 1;
37169 TREE_PUBLIC (new_decl) = 1;
37170 DECL_INITIAL (new_decl) = 0;
37171 DECL_ARTIFICIAL (new_decl) = 0;
37172 DECL_PRESERVE_P (new_decl) = 1;
37174 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
37175 assemble_variable (new_decl, 0, 0, 0);
37180 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
37181 into an integer defined in libgcc/config/i386/cpuinfo.c */
37184 fold_builtin_cpu (tree fndecl, tree *args)
37187 enum ix86_builtins fn_code = (enum ix86_builtins)
37188 DECL_FUNCTION_CODE (fndecl);
37189 tree param_string_cst = NULL;
37191 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
37192 enum processor_features
37225 /* These are the values for vendor types and cpu types and subtypes
37226 in cpuinfo.c. Cpu types and subtypes should be subtracted by
37227 the corresponding start value. */
37228 enum processor_model
37238 M_INTEL_SILVERMONT,
37242 M_CPU_SUBTYPE_START,
37243 M_INTEL_COREI7_NEHALEM,
37244 M_INTEL_COREI7_WESTMERE,
37245 M_INTEL_COREI7_SANDYBRIDGE,
37246 M_AMDFAM10H_BARCELONA,
37247 M_AMDFAM10H_SHANGHAI,
37248 M_AMDFAM10H_ISTANBUL,
37249 M_AMDFAM15H_BDVER1,
37250 M_AMDFAM15H_BDVER2,
37251 M_AMDFAM15H_BDVER3,
37252 M_AMDFAM15H_BDVER4,
37253 M_AMDFAM17H_ZNVER1,
37254 M_INTEL_COREI7_IVYBRIDGE,
37255 M_INTEL_COREI7_HASWELL,
37256 M_INTEL_COREI7_BROADWELL,
37257 M_INTEL_COREI7_SKYLAKE,
37258 M_INTEL_COREI7_SKYLAKE_AVX512
37261 static struct _arch_names_table
37263 const char *const name;
37264 const enum processor_model model;
37266 const arch_names_table[] =
37269 {"intel", M_INTEL},
37270 {"atom", M_INTEL_BONNELL},
37271 {"slm", M_INTEL_SILVERMONT},
37272 {"core2", M_INTEL_CORE2},
37273 {"corei7", M_INTEL_COREI7},
37274 {"nehalem", M_INTEL_COREI7_NEHALEM},
37275 {"westmere", M_INTEL_COREI7_WESTMERE},
37276 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
37277 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
37278 {"haswell", M_INTEL_COREI7_HASWELL},
37279 {"broadwell", M_INTEL_COREI7_BROADWELL},
37280 {"skylake", M_INTEL_COREI7_SKYLAKE},
37281 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
37282 {"bonnell", M_INTEL_BONNELL},
37283 {"silvermont", M_INTEL_SILVERMONT},
37284 {"knl", M_INTEL_KNL},
37285 {"amdfam10h", M_AMDFAM10H},
37286 {"barcelona", M_AMDFAM10H_BARCELONA},
37287 {"shanghai", M_AMDFAM10H_SHANGHAI},
37288 {"istanbul", M_AMDFAM10H_ISTANBUL},
37289 {"btver1", M_AMD_BTVER1},
37290 {"amdfam15h", M_AMDFAM15H},
37291 {"bdver1", M_AMDFAM15H_BDVER1},
37292 {"bdver2", M_AMDFAM15H_BDVER2},
37293 {"bdver3", M_AMDFAM15H_BDVER3},
37294 {"bdver4", M_AMDFAM15H_BDVER4},
37295 {"btver2", M_AMD_BTVER2},
37296 {"znver1", M_AMDFAM17H_ZNVER1},
37299 static struct _isa_names_table
37301 const char *const name;
37302 const enum processor_features feature;
37304 const isa_names_table[] =
37308 {"popcnt", F_POPCNT},
37312 {"ssse3", F_SSSE3},
37313 {"sse4a", F_SSE4_A},
37314 {"sse4.1", F_SSE4_1},
37315 {"sse4.2", F_SSE4_2},
37321 {"avx512f", F_AVX512F},
37325 {"pclmul", F_PCLMUL},
37326 {"avx512vl",F_AVX512VL},
37327 {"avx512bw",F_AVX512BW},
37328 {"avx512dq",F_AVX512DQ},
37329 {"avx512cd",F_AVX512CD},
37330 {"avx512er",F_AVX512ER},
37331 {"avx512pf",F_AVX512PF},
37332 {"avx512vbmi",F_AVX512VBMI},
37333 {"avx512ifma",F_AVX512IFMA},
37336 tree __processor_model_type = build_processor_model_struct ();
37337 tree __cpu_model_var = make_var_decl (__processor_model_type,
37341 varpool_node::add (__cpu_model_var);
37343 gcc_assert ((args != NULL) && (*args != NULL));
37345 param_string_cst = *args;
37346 while (param_string_cst
37347 && TREE_CODE (param_string_cst) != STRING_CST)
37349 /* *args must be a expr that can contain other EXPRS leading to a
37351 if (!EXPR_P (param_string_cst))
37353 error ("Parameter to builtin must be a string constant or literal");
37354 return integer_zero_node;
37356 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
37359 gcc_assert (param_string_cst);
37361 if (fn_code == IX86_BUILTIN_CPU_IS)
37367 unsigned int field_val = 0;
37368 unsigned int NUM_ARCH_NAMES
37369 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
37371 for (i = 0; i < NUM_ARCH_NAMES; i++)
37372 if (strcmp (arch_names_table[i].name,
37373 TREE_STRING_POINTER (param_string_cst)) == 0)
37376 if (i == NUM_ARCH_NAMES)
37378 error ("Parameter to builtin not valid: %s",
37379 TREE_STRING_POINTER (param_string_cst));
37380 return integer_zero_node;
37383 field = TYPE_FIELDS (__processor_model_type);
37384 field_val = arch_names_table[i].model;
37386 /* CPU types are stored in the next field. */
37387 if (field_val > M_CPU_TYPE_START
37388 && field_val < M_CPU_SUBTYPE_START)
37390 field = DECL_CHAIN (field);
37391 field_val -= M_CPU_TYPE_START;
37394 /* CPU subtypes are stored in the next field. */
37395 if (field_val > M_CPU_SUBTYPE_START)
37397 field = DECL_CHAIN ( DECL_CHAIN (field));
37398 field_val -= M_CPU_SUBTYPE_START;
37401 /* Get the appropriate field in __cpu_model. */
37402 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37405 /* Check the value. */
37406 final = build2 (EQ_EXPR, unsigned_type_node, ref,
37407 build_int_cstu (unsigned_type_node, field_val));
37408 return build1 (CONVERT_EXPR, integer_type_node, final);
37410 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37417 unsigned int field_val = 0;
37418 unsigned int NUM_ISA_NAMES
37419 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
37421 for (i = 0; i < NUM_ISA_NAMES; i++)
37422 if (strcmp (isa_names_table[i].name,
37423 TREE_STRING_POINTER (param_string_cst)) == 0)
37426 if (i == NUM_ISA_NAMES)
37428 error ("Parameter to builtin not valid: %s",
37429 TREE_STRING_POINTER (param_string_cst));
37430 return integer_zero_node;
37433 field = TYPE_FIELDS (__processor_model_type);
37434 /* Get the last field, which is __cpu_features. */
37435 while (DECL_CHAIN (field))
37436 field = DECL_CHAIN (field);
37438 /* Get the appropriate field: __cpu_model.__cpu_features */
37439 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37442 /* Access the 0th element of __cpu_features array. */
37443 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
37444 integer_zero_node, NULL_TREE, NULL_TREE);
37446 field_val = (1 << isa_names_table[i].feature);
37447 /* Return __cpu_model.__cpu_features[0] & field_val */
37448 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
37449 build_int_cstu (unsigned_type_node, field_val));
37450 return build1 (CONVERT_EXPR, integer_type_node, final);
37452 gcc_unreachable ();
37456 ix86_fold_builtin (tree fndecl, int n_args,
37457 tree *args, bool ignore ATTRIBUTE_UNUSED)
37459 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
37461 enum ix86_builtins fn_code = (enum ix86_builtins)
37462 DECL_FUNCTION_CODE (fndecl);
37463 if (fn_code == IX86_BUILTIN_CPU_IS
37464 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37466 gcc_assert (n_args == 1);
37467 return fold_builtin_cpu (fndecl, args);
37471 #ifdef SUBTARGET_FOLD_BUILTIN
37472 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
37478 /* Make builtins to detect cpu type and features supported. NAME is
37479 the builtin name, CODE is the builtin code, and FTYPE is the function
37480 type of the builtin. */
37483 make_cpu_type_builtin (const char* name, int code,
37484 enum ix86_builtin_func_type ftype, bool is_const)
37489 type = ix86_get_builtin_func_type (ftype);
37490 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37492 gcc_assert (decl != NULL_TREE);
37493 ix86_builtins[(int) code] = decl;
37494 TREE_READONLY (decl) = is_const;
37497 /* Make builtins to get CPU type and features supported. The created
37500 __builtin_cpu_init (), to detect cpu type and features,
37501 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37502 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37506 ix86_init_platform_type_builtins (void)
37508 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37509 INT_FTYPE_VOID, false);
37510 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37511 INT_FTYPE_PCCHAR, true);
37512 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37513 INT_FTYPE_PCCHAR, true);
37516 /* Internal method for ix86_init_builtins. */
37519 ix86_init_builtins_va_builtins_abi (void)
37521 tree ms_va_ref, sysv_va_ref;
37522 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37523 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37524 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37525 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37529 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37530 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37531 ms_va_ref = build_reference_type (ms_va_list_type_node);
37533 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37536 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37537 fnvoid_va_start_ms =
37538 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37539 fnvoid_va_end_sysv =
37540 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37541 fnvoid_va_start_sysv =
37542 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37544 fnvoid_va_copy_ms =
37545 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37547 fnvoid_va_copy_sysv =
37548 build_function_type_list (void_type_node, sysv_va_ref,
37549 sysv_va_ref, NULL_TREE);
37551 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37552 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37553 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37554 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37555 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37556 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37557 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37558 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37559 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37560 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37561 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37562 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37566 ix86_init_builtin_types (void)
37568 tree float128_type_node, float80_type_node;
37570 /* The __float80 type. */
37571 float80_type_node = long_double_type_node;
37572 if (TYPE_MODE (float80_type_node) != XFmode)
37574 /* The __float80 type. */
37575 float80_type_node = make_node (REAL_TYPE);
37577 TYPE_PRECISION (float80_type_node) = 80;
37578 layout_type (float80_type_node);
37580 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37582 /* The __float128 type. */
37583 float128_type_node = make_node (REAL_TYPE);
37584 TYPE_PRECISION (float128_type_node) = 128;
37585 layout_type (float128_type_node);
37586 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37588 /* This macro is built by i386-builtin-types.awk. */
37589 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37593 ix86_init_builtins (void)
37597 ix86_init_builtin_types ();
37599 /* Builtins to get CPU type and features. */
37600 ix86_init_platform_type_builtins ();
37602 /* TFmode support builtins. */
37603 def_builtin_const (0, "__builtin_infq",
37604 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37605 def_builtin_const (0, "__builtin_huge_valq",
37606 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37608 /* We will expand them to normal call if SSE isn't available since
37609 they are used by libgcc. */
37610 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37611 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37612 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37613 TREE_READONLY (t) = 1;
37614 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37616 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37617 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37618 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37619 TREE_READONLY (t) = 1;
37620 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37622 ix86_init_tm_builtins ();
37623 ix86_init_mmx_sse_builtins ();
37624 ix86_init_mpx_builtins ();
37627 ix86_init_builtins_va_builtins_abi ();
37629 #ifdef SUBTARGET_INIT_BUILTINS
37630 SUBTARGET_INIT_BUILTINS;
37634 /* Return the ix86 builtin for CODE. */
37637 ix86_builtin_decl (unsigned code, bool)
37639 if (code >= IX86_BUILTIN_MAX)
37640 return error_mark_node;
37642 return ix86_builtins[code];
37645 /* Errors in the source file can cause expand_expr to return const0_rtx
37646 where we expect a vector. To avoid crashing, use one of the vector
37647 clear instructions. */
37649 safe_vector_operand (rtx x, machine_mode mode)
37651 if (x == const0_rtx)
37652 x = CONST0_RTX (mode);
37656 /* Fixup modeless constants to fit required mode. */
37658 fixup_modeless_constant (rtx x, machine_mode mode)
37660 if (GET_MODE (x) == VOIDmode)
37661 x = convert_to_mode (mode, x, 1);
37665 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37668 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37671 tree arg0 = CALL_EXPR_ARG (exp, 0);
37672 tree arg1 = CALL_EXPR_ARG (exp, 1);
37673 rtx op0 = expand_normal (arg0);
37674 rtx op1 = expand_normal (arg1);
37675 machine_mode tmode = insn_data[icode].operand[0].mode;
37676 machine_mode mode0 = insn_data[icode].operand[1].mode;
37677 machine_mode mode1 = insn_data[icode].operand[2].mode;
37679 if (VECTOR_MODE_P (mode0))
37680 op0 = safe_vector_operand (op0, mode0);
37681 if (VECTOR_MODE_P (mode1))
37682 op1 = safe_vector_operand (op1, mode1);
37684 if (optimize || !target
37685 || GET_MODE (target) != tmode
37686 || !insn_data[icode].operand[0].predicate (target, tmode))
37687 target = gen_reg_rtx (tmode);
37689 if (GET_MODE (op1) == SImode && mode1 == TImode)
37691 rtx x = gen_reg_rtx (V4SImode);
37692 emit_insn (gen_sse2_loadd (x, op1));
37693 op1 = gen_lowpart (TImode, x);
37696 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37697 op0 = copy_to_mode_reg (mode0, op0);
37698 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37699 op1 = copy_to_mode_reg (mode1, op1);
37701 pat = GEN_FCN (icode) (target, op0, op1);
37710 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37713 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37714 enum ix86_builtin_func_type m_type,
37715 enum rtx_code sub_code)
37720 bool comparison_p = false;
37722 bool last_arg_constant = false;
37723 int num_memory = 0;
37729 machine_mode tmode = insn_data[icode].operand[0].mode;
37733 case MULTI_ARG_4_DF2_DI_I:
37734 case MULTI_ARG_4_DF2_DI_I1:
37735 case MULTI_ARG_4_SF2_SI_I:
37736 case MULTI_ARG_4_SF2_SI_I1:
37738 last_arg_constant = true;
37741 case MULTI_ARG_3_SF:
37742 case MULTI_ARG_3_DF:
37743 case MULTI_ARG_3_SF2:
37744 case MULTI_ARG_3_DF2:
37745 case MULTI_ARG_3_DI:
37746 case MULTI_ARG_3_SI:
37747 case MULTI_ARG_3_SI_DI:
37748 case MULTI_ARG_3_HI:
37749 case MULTI_ARG_3_HI_SI:
37750 case MULTI_ARG_3_QI:
37751 case MULTI_ARG_3_DI2:
37752 case MULTI_ARG_3_SI2:
37753 case MULTI_ARG_3_HI2:
37754 case MULTI_ARG_3_QI2:
37758 case MULTI_ARG_2_SF:
37759 case MULTI_ARG_2_DF:
37760 case MULTI_ARG_2_DI:
37761 case MULTI_ARG_2_SI:
37762 case MULTI_ARG_2_HI:
37763 case MULTI_ARG_2_QI:
37767 case MULTI_ARG_2_DI_IMM:
37768 case MULTI_ARG_2_SI_IMM:
37769 case MULTI_ARG_2_HI_IMM:
37770 case MULTI_ARG_2_QI_IMM:
37772 last_arg_constant = true;
37775 case MULTI_ARG_1_SF:
37776 case MULTI_ARG_1_DF:
37777 case MULTI_ARG_1_SF2:
37778 case MULTI_ARG_1_DF2:
37779 case MULTI_ARG_1_DI:
37780 case MULTI_ARG_1_SI:
37781 case MULTI_ARG_1_HI:
37782 case MULTI_ARG_1_QI:
37783 case MULTI_ARG_1_SI_DI:
37784 case MULTI_ARG_1_HI_DI:
37785 case MULTI_ARG_1_HI_SI:
37786 case MULTI_ARG_1_QI_DI:
37787 case MULTI_ARG_1_QI_SI:
37788 case MULTI_ARG_1_QI_HI:
37792 case MULTI_ARG_2_DI_CMP:
37793 case MULTI_ARG_2_SI_CMP:
37794 case MULTI_ARG_2_HI_CMP:
37795 case MULTI_ARG_2_QI_CMP:
37797 comparison_p = true;
37800 case MULTI_ARG_2_SF_TF:
37801 case MULTI_ARG_2_DF_TF:
37802 case MULTI_ARG_2_DI_TF:
37803 case MULTI_ARG_2_SI_TF:
37804 case MULTI_ARG_2_HI_TF:
37805 case MULTI_ARG_2_QI_TF:
37811 gcc_unreachable ();
37814 if (optimize || !target
37815 || GET_MODE (target) != tmode
37816 || !insn_data[icode].operand[0].predicate (target, tmode))
37817 target = gen_reg_rtx (tmode);
37819 gcc_assert (nargs <= 4);
37821 for (i = 0; i < nargs; i++)
37823 tree arg = CALL_EXPR_ARG (exp, i);
37824 rtx op = expand_normal (arg);
37825 int adjust = (comparison_p) ? 1 : 0;
37826 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37828 if (last_arg_constant && i == nargs - 1)
37830 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37832 enum insn_code new_icode = icode;
37835 case CODE_FOR_xop_vpermil2v2df3:
37836 case CODE_FOR_xop_vpermil2v4sf3:
37837 case CODE_FOR_xop_vpermil2v4df3:
37838 case CODE_FOR_xop_vpermil2v8sf3:
37839 error ("the last argument must be a 2-bit immediate");
37840 return gen_reg_rtx (tmode);
37841 case CODE_FOR_xop_rotlv2di3:
37842 new_icode = CODE_FOR_rotlv2di3;
37844 case CODE_FOR_xop_rotlv4si3:
37845 new_icode = CODE_FOR_rotlv4si3;
37847 case CODE_FOR_xop_rotlv8hi3:
37848 new_icode = CODE_FOR_rotlv8hi3;
37850 case CODE_FOR_xop_rotlv16qi3:
37851 new_icode = CODE_FOR_rotlv16qi3;
37853 if (CONST_INT_P (op))
37855 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37856 op = GEN_INT (INTVAL (op) & mask);
37857 gcc_checking_assert
37858 (insn_data[icode].operand[i + 1].predicate (op, mode));
37862 gcc_checking_assert
37864 && insn_data[new_icode].operand[0].mode == tmode
37865 && insn_data[new_icode].operand[1].mode == tmode
37866 && insn_data[new_icode].operand[2].mode == mode
37867 && insn_data[new_icode].operand[0].predicate
37868 == insn_data[icode].operand[0].predicate
37869 && insn_data[new_icode].operand[1].predicate
37870 == insn_data[icode].operand[1].predicate);
37876 gcc_unreachable ();
37883 if (VECTOR_MODE_P (mode))
37884 op = safe_vector_operand (op, mode);
37886 /* If we aren't optimizing, only allow one memory operand to be
37888 if (memory_operand (op, mode))
37891 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37894 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37896 op = force_reg (mode, op);
37900 args[i].mode = mode;
37906 pat = GEN_FCN (icode) (target, args[0].op);
37911 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37912 GEN_INT ((int)sub_code));
37913 else if (! comparison_p)
37914 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37917 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37921 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37926 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37930 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37934 gcc_unreachable ();
37944 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37945 insns with vec_merge. */
37948 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37952 tree arg0 = CALL_EXPR_ARG (exp, 0);
37953 rtx op1, op0 = expand_normal (arg0);
37954 machine_mode tmode = insn_data[icode].operand[0].mode;
37955 machine_mode mode0 = insn_data[icode].operand[1].mode;
37957 if (optimize || !target
37958 || GET_MODE (target) != tmode
37959 || !insn_data[icode].operand[0].predicate (target, tmode))
37960 target = gen_reg_rtx (tmode);
37962 if (VECTOR_MODE_P (mode0))
37963 op0 = safe_vector_operand (op0, mode0);
37965 if ((optimize && !register_operand (op0, mode0))
37966 || !insn_data[icode].operand[1].predicate (op0, mode0))
37967 op0 = copy_to_mode_reg (mode0, op0);
37970 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37971 op1 = copy_to_mode_reg (mode0, op1);
37973 pat = GEN_FCN (icode) (target, op0, op1);
37980 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
37983 ix86_expand_sse_compare (const struct builtin_description *d,
37984 tree exp, rtx target, bool swap)
37987 tree arg0 = CALL_EXPR_ARG (exp, 0);
37988 tree arg1 = CALL_EXPR_ARG (exp, 1);
37989 rtx op0 = expand_normal (arg0);
37990 rtx op1 = expand_normal (arg1);
37992 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37993 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37994 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37995 enum rtx_code comparison = d->comparison;
37997 if (VECTOR_MODE_P (mode0))
37998 op0 = safe_vector_operand (op0, mode0);
37999 if (VECTOR_MODE_P (mode1))
38000 op1 = safe_vector_operand (op1, mode1);
38002 /* Swap operands if we have a comparison that isn't available in
38005 std::swap (op0, op1);
38007 if (optimize || !target
38008 || GET_MODE (target) != tmode
38009 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38010 target = gen_reg_rtx (tmode);
38012 if ((optimize && !register_operand (op0, mode0))
38013 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
38014 op0 = copy_to_mode_reg (mode0, op0);
38015 if ((optimize && !register_operand (op1, mode1))
38016 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
38017 op1 = copy_to_mode_reg (mode1, op1);
38019 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
38020 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38027 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
38030 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
38034 tree arg0 = CALL_EXPR_ARG (exp, 0);
38035 tree arg1 = CALL_EXPR_ARG (exp, 1);
38036 rtx op0 = expand_normal (arg0);
38037 rtx op1 = expand_normal (arg1);
38038 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38039 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38040 enum rtx_code comparison = d->comparison;
38042 if (VECTOR_MODE_P (mode0))
38043 op0 = safe_vector_operand (op0, mode0);
38044 if (VECTOR_MODE_P (mode1))
38045 op1 = safe_vector_operand (op1, mode1);
38047 /* Swap operands if we have a comparison that isn't available in
38049 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38050 std::swap (op0, op1);
38052 target = gen_reg_rtx (SImode);
38053 emit_move_insn (target, const0_rtx);
38054 target = gen_rtx_SUBREG (QImode, target, 0);
38056 if ((optimize && !register_operand (op0, mode0))
38057 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38058 op0 = copy_to_mode_reg (mode0, op0);
38059 if ((optimize && !register_operand (op1, mode1))
38060 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38061 op1 = copy_to_mode_reg (mode1, op1);
38063 pat = GEN_FCN (d->icode) (op0, op1);
38067 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38068 gen_rtx_fmt_ee (comparison, QImode,
38072 return SUBREG_REG (target);
38075 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38078 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38082 tree arg0 = CALL_EXPR_ARG (exp, 0);
38083 rtx op1, op0 = expand_normal (arg0);
38084 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38085 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38087 if (optimize || target == 0
38088 || GET_MODE (target) != tmode
38089 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38090 target = gen_reg_rtx (tmode);
38092 if (VECTOR_MODE_P (mode0))
38093 op0 = safe_vector_operand (op0, mode0);
38095 if ((optimize && !register_operand (op0, mode0))
38096 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38097 op0 = copy_to_mode_reg (mode0, op0);
38099 op1 = GEN_INT (d->comparison);
38101 pat = GEN_FCN (d->icode) (target, op0, op1);
38109 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
38110 tree exp, rtx target)
38113 tree arg0 = CALL_EXPR_ARG (exp, 0);
38114 tree arg1 = CALL_EXPR_ARG (exp, 1);
38115 rtx op0 = expand_normal (arg0);
38116 rtx op1 = expand_normal (arg1);
38118 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38119 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38120 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38122 if (optimize || target == 0
38123 || GET_MODE (target) != tmode
38124 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38125 target = gen_reg_rtx (tmode);
38127 op0 = safe_vector_operand (op0, mode0);
38128 op1 = safe_vector_operand (op1, mode1);
38130 if ((optimize && !register_operand (op0, mode0))
38131 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38132 op0 = copy_to_mode_reg (mode0, op0);
38133 if ((optimize && !register_operand (op1, mode1))
38134 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38135 op1 = copy_to_mode_reg (mode1, op1);
38137 op2 = GEN_INT (d->comparison);
38139 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38146 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
38149 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
38153 tree arg0 = CALL_EXPR_ARG (exp, 0);
38154 tree arg1 = CALL_EXPR_ARG (exp, 1);
38155 rtx op0 = expand_normal (arg0);
38156 rtx op1 = expand_normal (arg1);
38157 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38158 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38159 enum rtx_code comparison = d->comparison;
38161 if (VECTOR_MODE_P (mode0))
38162 op0 = safe_vector_operand (op0, mode0);
38163 if (VECTOR_MODE_P (mode1))
38164 op1 = safe_vector_operand (op1, mode1);
38166 target = gen_reg_rtx (SImode);
38167 emit_move_insn (target, const0_rtx);
38168 target = gen_rtx_SUBREG (QImode, target, 0);
38170 if ((optimize && !register_operand (op0, mode0))
38171 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38172 op0 = copy_to_mode_reg (mode0, op0);
38173 if ((optimize && !register_operand (op1, mode1))
38174 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38175 op1 = copy_to_mode_reg (mode1, op1);
38177 pat = GEN_FCN (d->icode) (op0, op1);
38181 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38182 gen_rtx_fmt_ee (comparison, QImode,
38186 return SUBREG_REG (target);
38189 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
38192 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
38193 tree exp, rtx target)
38196 tree arg0 = CALL_EXPR_ARG (exp, 0);
38197 tree arg1 = CALL_EXPR_ARG (exp, 1);
38198 tree arg2 = CALL_EXPR_ARG (exp, 2);
38199 tree arg3 = CALL_EXPR_ARG (exp, 3);
38200 tree arg4 = CALL_EXPR_ARG (exp, 4);
38201 rtx scratch0, scratch1;
38202 rtx op0 = expand_normal (arg0);
38203 rtx op1 = expand_normal (arg1);
38204 rtx op2 = expand_normal (arg2);
38205 rtx op3 = expand_normal (arg3);
38206 rtx op4 = expand_normal (arg4);
38207 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
38209 tmode0 = insn_data[d->icode].operand[0].mode;
38210 tmode1 = insn_data[d->icode].operand[1].mode;
38211 modev2 = insn_data[d->icode].operand[2].mode;
38212 modei3 = insn_data[d->icode].operand[3].mode;
38213 modev4 = insn_data[d->icode].operand[4].mode;
38214 modei5 = insn_data[d->icode].operand[5].mode;
38215 modeimm = insn_data[d->icode].operand[6].mode;
38217 if (VECTOR_MODE_P (modev2))
38218 op0 = safe_vector_operand (op0, modev2);
38219 if (VECTOR_MODE_P (modev4))
38220 op2 = safe_vector_operand (op2, modev4);
38222 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38223 op0 = copy_to_mode_reg (modev2, op0);
38224 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
38225 op1 = copy_to_mode_reg (modei3, op1);
38226 if ((optimize && !register_operand (op2, modev4))
38227 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
38228 op2 = copy_to_mode_reg (modev4, op2);
38229 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
38230 op3 = copy_to_mode_reg (modei5, op3);
38232 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
38234 error ("the fifth argument must be an 8-bit immediate");
38238 if (d->code == IX86_BUILTIN_PCMPESTRI128)
38240 if (optimize || !target
38241 || GET_MODE (target) != tmode0
38242 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38243 target = gen_reg_rtx (tmode0);
38245 scratch1 = gen_reg_rtx (tmode1);
38247 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
38249 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
38251 if (optimize || !target
38252 || GET_MODE (target) != tmode1
38253 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38254 target = gen_reg_rtx (tmode1);
38256 scratch0 = gen_reg_rtx (tmode0);
38258 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
38262 gcc_assert (d->flag);
38264 scratch0 = gen_reg_rtx (tmode0);
38265 scratch1 = gen_reg_rtx (tmode1);
38267 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
38277 target = gen_reg_rtx (SImode);
38278 emit_move_insn (target, const0_rtx);
38279 target = gen_rtx_SUBREG (QImode, target, 0);
38282 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38283 gen_rtx_fmt_ee (EQ, QImode,
38284 gen_rtx_REG ((machine_mode) d->flag,
38287 return SUBREG_REG (target);
38294 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
38297 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
38298 tree exp, rtx target)
38301 tree arg0 = CALL_EXPR_ARG (exp, 0);
38302 tree arg1 = CALL_EXPR_ARG (exp, 1);
38303 tree arg2 = CALL_EXPR_ARG (exp, 2);
38304 rtx scratch0, scratch1;
38305 rtx op0 = expand_normal (arg0);
38306 rtx op1 = expand_normal (arg1);
38307 rtx op2 = expand_normal (arg2);
38308 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
38310 tmode0 = insn_data[d->icode].operand[0].mode;
38311 tmode1 = insn_data[d->icode].operand[1].mode;
38312 modev2 = insn_data[d->icode].operand[2].mode;
38313 modev3 = insn_data[d->icode].operand[3].mode;
38314 modeimm = insn_data[d->icode].operand[4].mode;
38316 if (VECTOR_MODE_P (modev2))
38317 op0 = safe_vector_operand (op0, modev2);
38318 if (VECTOR_MODE_P (modev3))
38319 op1 = safe_vector_operand (op1, modev3);
38321 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38322 op0 = copy_to_mode_reg (modev2, op0);
38323 if ((optimize && !register_operand (op1, modev3))
38324 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
38325 op1 = copy_to_mode_reg (modev3, op1);
38327 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
38329 error ("the third argument must be an 8-bit immediate");
38333 if (d->code == IX86_BUILTIN_PCMPISTRI128)
38335 if (optimize || !target
38336 || GET_MODE (target) != tmode0
38337 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38338 target = gen_reg_rtx (tmode0);
38340 scratch1 = gen_reg_rtx (tmode1);
38342 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
38344 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
38346 if (optimize || !target
38347 || GET_MODE (target) != tmode1
38348 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38349 target = gen_reg_rtx (tmode1);
38351 scratch0 = gen_reg_rtx (tmode0);
38353 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
38357 gcc_assert (d->flag);
38359 scratch0 = gen_reg_rtx (tmode0);
38360 scratch1 = gen_reg_rtx (tmode1);
38362 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
38372 target = gen_reg_rtx (SImode);
38373 emit_move_insn (target, const0_rtx);
38374 target = gen_rtx_SUBREG (QImode, target, 0);
38377 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38378 gen_rtx_fmt_ee (EQ, QImode,
38379 gen_rtx_REG ((machine_mode) d->flag,
38382 return SUBREG_REG (target);
38388 /* Subroutine of ix86_expand_builtin to take care of insns with
38389 variable number of operands. */
38392 ix86_expand_args_builtin (const struct builtin_description *d,
38393 tree exp, rtx target)
38395 rtx pat, real_target;
38396 unsigned int i, nargs;
38397 unsigned int nargs_constant = 0;
38398 unsigned int mask_pos = 0;
38399 int num_memory = 0;
38405 bool last_arg_count = false;
38406 enum insn_code icode = d->icode;
38407 const struct insn_data_d *insn_p = &insn_data[icode];
38408 machine_mode tmode = insn_p->operand[0].mode;
38409 machine_mode rmode = VOIDmode;
38411 enum rtx_code comparison = d->comparison;
38413 switch ((enum ix86_builtin_func_type) d->flag)
38415 case V2DF_FTYPE_V2DF_ROUND:
38416 case V4DF_FTYPE_V4DF_ROUND:
38417 case V4SF_FTYPE_V4SF_ROUND:
38418 case V8SF_FTYPE_V8SF_ROUND:
38419 case V4SI_FTYPE_V4SF_ROUND:
38420 case V8SI_FTYPE_V8SF_ROUND:
38421 return ix86_expand_sse_round (d, exp, target);
38422 case V4SI_FTYPE_V2DF_V2DF_ROUND:
38423 case V8SI_FTYPE_V4DF_V4DF_ROUND:
38424 case V16SI_FTYPE_V8DF_V8DF_ROUND:
38425 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
38426 case INT_FTYPE_V8SF_V8SF_PTEST:
38427 case INT_FTYPE_V4DI_V4DI_PTEST:
38428 case INT_FTYPE_V4DF_V4DF_PTEST:
38429 case INT_FTYPE_V4SF_V4SF_PTEST:
38430 case INT_FTYPE_V2DI_V2DI_PTEST:
38431 case INT_FTYPE_V2DF_V2DF_PTEST:
38432 return ix86_expand_sse_ptest (d, exp, target);
38433 case FLOAT128_FTYPE_FLOAT128:
38434 case FLOAT_FTYPE_FLOAT:
38435 case INT_FTYPE_INT:
38436 case UINT64_FTYPE_INT:
38437 case UINT16_FTYPE_UINT16:
38438 case INT64_FTYPE_INT64:
38439 case INT64_FTYPE_V4SF:
38440 case INT64_FTYPE_V2DF:
38441 case INT_FTYPE_V16QI:
38442 case INT_FTYPE_V8QI:
38443 case INT_FTYPE_V8SF:
38444 case INT_FTYPE_V4DF:
38445 case INT_FTYPE_V4SF:
38446 case INT_FTYPE_V2DF:
38447 case INT_FTYPE_V32QI:
38448 case V16QI_FTYPE_V16QI:
38449 case V8SI_FTYPE_V8SF:
38450 case V8SI_FTYPE_V4SI:
38451 case V8HI_FTYPE_V8HI:
38452 case V8HI_FTYPE_V16QI:
38453 case V8QI_FTYPE_V8QI:
38454 case V8SF_FTYPE_V8SF:
38455 case V8SF_FTYPE_V8SI:
38456 case V8SF_FTYPE_V4SF:
38457 case V8SF_FTYPE_V8HI:
38458 case V4SI_FTYPE_V4SI:
38459 case V4SI_FTYPE_V16QI:
38460 case V4SI_FTYPE_V4SF:
38461 case V4SI_FTYPE_V8SI:
38462 case V4SI_FTYPE_V8HI:
38463 case V4SI_FTYPE_V4DF:
38464 case V4SI_FTYPE_V2DF:
38465 case V4HI_FTYPE_V4HI:
38466 case V4DF_FTYPE_V4DF:
38467 case V4DF_FTYPE_V4SI:
38468 case V4DF_FTYPE_V4SF:
38469 case V4DF_FTYPE_V2DF:
38470 case V4SF_FTYPE_V4SF:
38471 case V4SF_FTYPE_V4SI:
38472 case V4SF_FTYPE_V8SF:
38473 case V4SF_FTYPE_V4DF:
38474 case V4SF_FTYPE_V8HI:
38475 case V4SF_FTYPE_V2DF:
38476 case V2DI_FTYPE_V2DI:
38477 case V2DI_FTYPE_V16QI:
38478 case V2DI_FTYPE_V8HI:
38479 case V2DI_FTYPE_V4SI:
38480 case V2DF_FTYPE_V2DF:
38481 case V2DF_FTYPE_V4SI:
38482 case V2DF_FTYPE_V4DF:
38483 case V2DF_FTYPE_V4SF:
38484 case V2DF_FTYPE_V2SI:
38485 case V2SI_FTYPE_V2SI:
38486 case V2SI_FTYPE_V4SF:
38487 case V2SI_FTYPE_V2SF:
38488 case V2SI_FTYPE_V2DF:
38489 case V2SF_FTYPE_V2SF:
38490 case V2SF_FTYPE_V2SI:
38491 case V32QI_FTYPE_V32QI:
38492 case V32QI_FTYPE_V16QI:
38493 case V16HI_FTYPE_V16HI:
38494 case V16HI_FTYPE_V8HI:
38495 case V8SI_FTYPE_V8SI:
38496 case V16HI_FTYPE_V16QI:
38497 case V8SI_FTYPE_V16QI:
38498 case V4DI_FTYPE_V16QI:
38499 case V8SI_FTYPE_V8HI:
38500 case V4DI_FTYPE_V8HI:
38501 case V4DI_FTYPE_V4SI:
38502 case V4DI_FTYPE_V2DI:
38503 case UHI_FTYPE_UHI:
38504 case UHI_FTYPE_V16QI:
38505 case USI_FTYPE_V32QI:
38506 case UDI_FTYPE_V64QI:
38507 case V16QI_FTYPE_UHI:
38508 case V32QI_FTYPE_USI:
38509 case V64QI_FTYPE_UDI:
38510 case V8HI_FTYPE_UQI:
38511 case V16HI_FTYPE_UHI:
38512 case V32HI_FTYPE_USI:
38513 case V4SI_FTYPE_UQI:
38514 case V8SI_FTYPE_UQI:
38515 case V4SI_FTYPE_UHI:
38516 case V8SI_FTYPE_UHI:
38517 case UQI_FTYPE_V8HI:
38518 case UHI_FTYPE_V16HI:
38519 case USI_FTYPE_V32HI:
38520 case UQI_FTYPE_V4SI:
38521 case UQI_FTYPE_V8SI:
38522 case UHI_FTYPE_V16SI:
38523 case UQI_FTYPE_V2DI:
38524 case UQI_FTYPE_V4DI:
38525 case UQI_FTYPE_V8DI:
38526 case V16SI_FTYPE_UHI:
38527 case V2DI_FTYPE_UQI:
38528 case V4DI_FTYPE_UQI:
38529 case V16SI_FTYPE_INT:
38530 case V16SF_FTYPE_V8SF:
38531 case V16SI_FTYPE_V8SI:
38532 case V16SF_FTYPE_V4SF:
38533 case V16SI_FTYPE_V4SI:
38534 case V16SF_FTYPE_V16SF:
38535 case V8DI_FTYPE_UQI:
38536 case V8DF_FTYPE_V4DF:
38537 case V8DF_FTYPE_V2DF:
38538 case V8DF_FTYPE_V8DF:
38541 case V4SF_FTYPE_V4SF_VEC_MERGE:
38542 case V2DF_FTYPE_V2DF_VEC_MERGE:
38543 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38544 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38545 case V16QI_FTYPE_V16QI_V16QI:
38546 case V16QI_FTYPE_V8HI_V8HI:
38547 case V16SF_FTYPE_V16SF_V16SF:
38548 case V8QI_FTYPE_V8QI_V8QI:
38549 case V8QI_FTYPE_V4HI_V4HI:
38550 case V8HI_FTYPE_V8HI_V8HI:
38551 case V8HI_FTYPE_V16QI_V16QI:
38552 case V8HI_FTYPE_V4SI_V4SI:
38553 case V8SF_FTYPE_V8SF_V8SF:
38554 case V8SF_FTYPE_V8SF_V8SI:
38555 case V8DF_FTYPE_V8DF_V8DF:
38556 case V4SI_FTYPE_V4SI_V4SI:
38557 case V4SI_FTYPE_V8HI_V8HI:
38558 case V4SI_FTYPE_V2DF_V2DF:
38559 case V4HI_FTYPE_V4HI_V4HI:
38560 case V4HI_FTYPE_V8QI_V8QI:
38561 case V4HI_FTYPE_V2SI_V2SI:
38562 case V4DF_FTYPE_V4DF_V4DF:
38563 case V4DF_FTYPE_V4DF_V4DI:
38564 case V4SF_FTYPE_V4SF_V4SF:
38565 case V4SF_FTYPE_V4SF_V4SI:
38566 case V4SF_FTYPE_V4SF_V2SI:
38567 case V4SF_FTYPE_V4SF_V2DF:
38568 case V4SF_FTYPE_V4SF_UINT:
38569 case V4SF_FTYPE_V4SF_DI:
38570 case V4SF_FTYPE_V4SF_SI:
38571 case V2DI_FTYPE_V2DI_V2DI:
38572 case V2DI_FTYPE_V16QI_V16QI:
38573 case V2DI_FTYPE_V4SI_V4SI:
38574 case V2DI_FTYPE_V2DI_V16QI:
38575 case V2SI_FTYPE_V2SI_V2SI:
38576 case V2SI_FTYPE_V4HI_V4HI:
38577 case V2SI_FTYPE_V2SF_V2SF:
38578 case V2DF_FTYPE_V2DF_V2DF:
38579 case V2DF_FTYPE_V2DF_V4SF:
38580 case V2DF_FTYPE_V2DF_V2DI:
38581 case V2DF_FTYPE_V2DF_DI:
38582 case V2DF_FTYPE_V2DF_SI:
38583 case V2DF_FTYPE_V2DF_UINT:
38584 case V2SF_FTYPE_V2SF_V2SF:
38585 case V1DI_FTYPE_V1DI_V1DI:
38586 case V1DI_FTYPE_V8QI_V8QI:
38587 case V1DI_FTYPE_V2SI_V2SI:
38588 case V32QI_FTYPE_V16HI_V16HI:
38589 case V16HI_FTYPE_V8SI_V8SI:
38590 case V32QI_FTYPE_V32QI_V32QI:
38591 case V16HI_FTYPE_V32QI_V32QI:
38592 case V16HI_FTYPE_V16HI_V16HI:
38593 case V8SI_FTYPE_V4DF_V4DF:
38594 case V8SI_FTYPE_V8SI_V8SI:
38595 case V8SI_FTYPE_V16HI_V16HI:
38596 case V4DI_FTYPE_V4DI_V4DI:
38597 case V4DI_FTYPE_V8SI_V8SI:
38598 case V8DI_FTYPE_V64QI_V64QI:
38599 if (comparison == UNKNOWN)
38600 return ix86_expand_binop_builtin (icode, exp, target);
38603 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38604 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38605 gcc_assert (comparison != UNKNOWN);
38609 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38610 case V16HI_FTYPE_V16HI_SI_COUNT:
38611 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38612 case V8SI_FTYPE_V8SI_SI_COUNT:
38613 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38614 case V4DI_FTYPE_V4DI_INT_COUNT:
38615 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38616 case V8HI_FTYPE_V8HI_SI_COUNT:
38617 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38618 case V4SI_FTYPE_V4SI_SI_COUNT:
38619 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38620 case V4HI_FTYPE_V4HI_SI_COUNT:
38621 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38622 case V2DI_FTYPE_V2DI_SI_COUNT:
38623 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38624 case V2SI_FTYPE_V2SI_SI_COUNT:
38625 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38626 case V1DI_FTYPE_V1DI_SI_COUNT:
38628 last_arg_count = true;
38630 case UINT64_FTYPE_UINT64_UINT64:
38631 case UINT_FTYPE_UINT_UINT:
38632 case UINT_FTYPE_UINT_USHORT:
38633 case UINT_FTYPE_UINT_UCHAR:
38634 case UINT16_FTYPE_UINT16_INT:
38635 case UINT8_FTYPE_UINT8_INT:
38636 case UHI_FTYPE_UHI_UHI:
38637 case USI_FTYPE_USI_USI:
38638 case UDI_FTYPE_UDI_UDI:
38639 case V16SI_FTYPE_V8DF_V8DF:
38642 case V2DI_FTYPE_V2DI_INT_CONVERT:
38645 nargs_constant = 1;
38647 case V4DI_FTYPE_V4DI_INT_CONVERT:
38650 nargs_constant = 1;
38652 case V8DI_FTYPE_V8DI_INT_CONVERT:
38655 nargs_constant = 1;
38657 case V8HI_FTYPE_V8HI_INT:
38658 case V8HI_FTYPE_V8SF_INT:
38659 case V16HI_FTYPE_V16SF_INT:
38660 case V8HI_FTYPE_V4SF_INT:
38661 case V8SF_FTYPE_V8SF_INT:
38662 case V4SF_FTYPE_V16SF_INT:
38663 case V16SF_FTYPE_V16SF_INT:
38664 case V4SI_FTYPE_V4SI_INT:
38665 case V4SI_FTYPE_V8SI_INT:
38666 case V4HI_FTYPE_V4HI_INT:
38667 case V4DF_FTYPE_V4DF_INT:
38668 case V4DF_FTYPE_V8DF_INT:
38669 case V4SF_FTYPE_V4SF_INT:
38670 case V4SF_FTYPE_V8SF_INT:
38671 case V2DI_FTYPE_V2DI_INT:
38672 case V2DF_FTYPE_V2DF_INT:
38673 case V2DF_FTYPE_V4DF_INT:
38674 case V16HI_FTYPE_V16HI_INT:
38675 case V8SI_FTYPE_V8SI_INT:
38676 case V16SI_FTYPE_V16SI_INT:
38677 case V4SI_FTYPE_V16SI_INT:
38678 case V4DI_FTYPE_V4DI_INT:
38679 case V2DI_FTYPE_V4DI_INT:
38680 case V4DI_FTYPE_V8DI_INT:
38681 case QI_FTYPE_V4SF_INT:
38682 case QI_FTYPE_V2DF_INT:
38684 nargs_constant = 1;
38686 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38687 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38688 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38689 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38690 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38691 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38692 case UHI_FTYPE_V16SI_V16SI_UHI:
38693 case UQI_FTYPE_V8DI_V8DI_UQI:
38694 case V16HI_FTYPE_V16SI_V16HI_UHI:
38695 case V16QI_FTYPE_V16SI_V16QI_UHI:
38696 case V16QI_FTYPE_V8DI_V16QI_UQI:
38697 case V16SF_FTYPE_V16SF_V16SF_UHI:
38698 case V16SF_FTYPE_V4SF_V16SF_UHI:
38699 case V16SI_FTYPE_SI_V16SI_UHI:
38700 case V16SI_FTYPE_V16HI_V16SI_UHI:
38701 case V16SI_FTYPE_V16QI_V16SI_UHI:
38702 case V8SF_FTYPE_V4SF_V8SF_UQI:
38703 case V4DF_FTYPE_V2DF_V4DF_UQI:
38704 case V8SI_FTYPE_V4SI_V8SI_UQI:
38705 case V8SI_FTYPE_SI_V8SI_UQI:
38706 case V4SI_FTYPE_V4SI_V4SI_UQI:
38707 case V4SI_FTYPE_SI_V4SI_UQI:
38708 case V4DI_FTYPE_V2DI_V4DI_UQI:
38709 case V4DI_FTYPE_DI_V4DI_UQI:
38710 case V2DI_FTYPE_V2DI_V2DI_UQI:
38711 case V2DI_FTYPE_DI_V2DI_UQI:
38712 case V64QI_FTYPE_V64QI_V64QI_UDI:
38713 case V64QI_FTYPE_V16QI_V64QI_UDI:
38714 case V64QI_FTYPE_QI_V64QI_UDI:
38715 case V32QI_FTYPE_V32QI_V32QI_USI:
38716 case V32QI_FTYPE_V16QI_V32QI_USI:
38717 case V32QI_FTYPE_QI_V32QI_USI:
38718 case V16QI_FTYPE_V16QI_V16QI_UHI:
38719 case V16QI_FTYPE_QI_V16QI_UHI:
38720 case V32HI_FTYPE_V8HI_V32HI_USI:
38721 case V32HI_FTYPE_HI_V32HI_USI:
38722 case V16HI_FTYPE_V8HI_V16HI_UHI:
38723 case V16HI_FTYPE_HI_V16HI_UHI:
38724 case V8HI_FTYPE_V8HI_V8HI_UQI:
38725 case V8HI_FTYPE_HI_V8HI_UQI:
38726 case V8SF_FTYPE_V8HI_V8SF_UQI:
38727 case V4SF_FTYPE_V8HI_V4SF_UQI:
38728 case V8SI_FTYPE_V8SF_V8SI_UQI:
38729 case V4SI_FTYPE_V4SF_V4SI_UQI:
38730 case V4DI_FTYPE_V4SF_V4DI_UQI:
38731 case V2DI_FTYPE_V4SF_V2DI_UQI:
38732 case V4SF_FTYPE_V4DI_V4SF_UQI:
38733 case V4SF_FTYPE_V2DI_V4SF_UQI:
38734 case V4DF_FTYPE_V4DI_V4DF_UQI:
38735 case V2DF_FTYPE_V2DI_V2DF_UQI:
38736 case V16QI_FTYPE_V8HI_V16QI_UQI:
38737 case V16QI_FTYPE_V16HI_V16QI_UHI:
38738 case V16QI_FTYPE_V4SI_V16QI_UQI:
38739 case V16QI_FTYPE_V8SI_V16QI_UQI:
38740 case V8HI_FTYPE_V4SI_V8HI_UQI:
38741 case V8HI_FTYPE_V8SI_V8HI_UQI:
38742 case V16QI_FTYPE_V2DI_V16QI_UQI:
38743 case V16QI_FTYPE_V4DI_V16QI_UQI:
38744 case V8HI_FTYPE_V2DI_V8HI_UQI:
38745 case V8HI_FTYPE_V4DI_V8HI_UQI:
38746 case V4SI_FTYPE_V2DI_V4SI_UQI:
38747 case V4SI_FTYPE_V4DI_V4SI_UQI:
38748 case V32QI_FTYPE_V32HI_V32QI_USI:
38749 case UHI_FTYPE_V16QI_V16QI_UHI:
38750 case USI_FTYPE_V32QI_V32QI_USI:
38751 case UDI_FTYPE_V64QI_V64QI_UDI:
38752 case UQI_FTYPE_V8HI_V8HI_UQI:
38753 case UHI_FTYPE_V16HI_V16HI_UHI:
38754 case USI_FTYPE_V32HI_V32HI_USI:
38755 case UQI_FTYPE_V4SI_V4SI_UQI:
38756 case UQI_FTYPE_V8SI_V8SI_UQI:
38757 case UQI_FTYPE_V2DI_V2DI_UQI:
38758 case UQI_FTYPE_V4DI_V4DI_UQI:
38759 case V4SF_FTYPE_V2DF_V4SF_UQI:
38760 case V4SF_FTYPE_V4DF_V4SF_UQI:
38761 case V16SI_FTYPE_V16SI_V16SI_UHI:
38762 case V16SI_FTYPE_V4SI_V16SI_UHI:
38763 case V2DI_FTYPE_V4SI_V2DI_UQI:
38764 case V2DI_FTYPE_V8HI_V2DI_UQI:
38765 case V2DI_FTYPE_V16QI_V2DI_UQI:
38766 case V4DI_FTYPE_V4DI_V4DI_UQI:
38767 case V4DI_FTYPE_V4SI_V4DI_UQI:
38768 case V4DI_FTYPE_V8HI_V4DI_UQI:
38769 case V4DI_FTYPE_V16QI_V4DI_UQI:
38770 case V4DI_FTYPE_V4DF_V4DI_UQI:
38771 case V2DI_FTYPE_V2DF_V2DI_UQI:
38772 case V4SI_FTYPE_V4DF_V4SI_UQI:
38773 case V4SI_FTYPE_V2DF_V4SI_UQI:
38774 case V4SI_FTYPE_V8HI_V4SI_UQI:
38775 case V4SI_FTYPE_V16QI_V4SI_UQI:
38776 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38777 case V8DF_FTYPE_V2DF_V8DF_UQI:
38778 case V8DF_FTYPE_V4DF_V8DF_UQI:
38779 case V8DF_FTYPE_V8DF_V8DF_UQI:
38780 case V8SF_FTYPE_V8SF_V8SF_UQI:
38781 case V8SF_FTYPE_V8SI_V8SF_UQI:
38782 case V4DF_FTYPE_V4DF_V4DF_UQI:
38783 case V4SF_FTYPE_V4SF_V4SF_UQI:
38784 case V2DF_FTYPE_V2DF_V2DF_UQI:
38785 case V2DF_FTYPE_V4SF_V2DF_UQI:
38786 case V2DF_FTYPE_V4SI_V2DF_UQI:
38787 case V4SF_FTYPE_V4SI_V4SF_UQI:
38788 case V4DF_FTYPE_V4SF_V4DF_UQI:
38789 case V4DF_FTYPE_V4SI_V4DF_UQI:
38790 case V8SI_FTYPE_V8SI_V8SI_UQI:
38791 case V8SI_FTYPE_V8HI_V8SI_UQI:
38792 case V8SI_FTYPE_V16QI_V8SI_UQI:
38793 case V8DF_FTYPE_V8SI_V8DF_UQI:
38794 case V8DI_FTYPE_DI_V8DI_UQI:
38795 case V16SF_FTYPE_V8SF_V16SF_UHI:
38796 case V16SI_FTYPE_V8SI_V16SI_UHI:
38797 case V16HI_FTYPE_V16HI_V16HI_UHI:
38798 case V8HI_FTYPE_V16QI_V8HI_UQI:
38799 case V16HI_FTYPE_V16QI_V16HI_UHI:
38800 case V32HI_FTYPE_V32HI_V32HI_USI:
38801 case V32HI_FTYPE_V32QI_V32HI_USI:
38802 case V8DI_FTYPE_V16QI_V8DI_UQI:
38803 case V8DI_FTYPE_V2DI_V8DI_UQI:
38804 case V8DI_FTYPE_V4DI_V8DI_UQI:
38805 case V8DI_FTYPE_V8DI_V8DI_UQI:
38806 case V8DI_FTYPE_V8HI_V8DI_UQI:
38807 case V8DI_FTYPE_V8SI_V8DI_UQI:
38808 case V8HI_FTYPE_V8DI_V8HI_UQI:
38809 case V8SI_FTYPE_V8DI_V8SI_UQI:
38810 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38813 case V32QI_FTYPE_V32QI_V32QI_INT:
38814 case V16HI_FTYPE_V16HI_V16HI_INT:
38815 case V16QI_FTYPE_V16QI_V16QI_INT:
38816 case V4DI_FTYPE_V4DI_V4DI_INT:
38817 case V8HI_FTYPE_V8HI_V8HI_INT:
38818 case V8SI_FTYPE_V8SI_V8SI_INT:
38819 case V8SI_FTYPE_V8SI_V4SI_INT:
38820 case V8SF_FTYPE_V8SF_V8SF_INT:
38821 case V8SF_FTYPE_V8SF_V4SF_INT:
38822 case V4SI_FTYPE_V4SI_V4SI_INT:
38823 case V4DF_FTYPE_V4DF_V4DF_INT:
38824 case V16SF_FTYPE_V16SF_V16SF_INT:
38825 case V16SF_FTYPE_V16SF_V4SF_INT:
38826 case V16SI_FTYPE_V16SI_V4SI_INT:
38827 case V4DF_FTYPE_V4DF_V2DF_INT:
38828 case V4SF_FTYPE_V4SF_V4SF_INT:
38829 case V2DI_FTYPE_V2DI_V2DI_INT:
38830 case V4DI_FTYPE_V4DI_V2DI_INT:
38831 case V2DF_FTYPE_V2DF_V2DF_INT:
38832 case UQI_FTYPE_V8DI_V8UDI_INT:
38833 case UQI_FTYPE_V8DF_V8DF_INT:
38834 case UQI_FTYPE_V2DF_V2DF_INT:
38835 case UQI_FTYPE_V4SF_V4SF_INT:
38836 case UHI_FTYPE_V16SI_V16SI_INT:
38837 case UHI_FTYPE_V16SF_V16SF_INT:
38839 nargs_constant = 1;
38841 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38844 nargs_constant = 1;
38846 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38849 nargs_constant = 1;
38851 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38854 nargs_constant = 1;
38856 case V2DI_FTYPE_V2DI_UINT_UINT:
38858 nargs_constant = 2;
38860 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38863 nargs_constant = 1;
38865 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38869 nargs_constant = 1;
38871 case QI_FTYPE_V8DF_INT_UQI:
38872 case QI_FTYPE_V4DF_INT_UQI:
38873 case QI_FTYPE_V2DF_INT_UQI:
38874 case HI_FTYPE_V16SF_INT_UHI:
38875 case QI_FTYPE_V8SF_INT_UQI:
38876 case QI_FTYPE_V4SF_INT_UQI:
38879 nargs_constant = 1;
38881 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38885 nargs_constant = 1;
38887 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38891 nargs_constant = 1;
38893 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38894 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38895 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38896 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38897 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38898 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38899 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38900 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38901 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38902 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38903 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38904 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38905 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38906 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38907 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38908 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38909 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38910 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38911 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38912 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38913 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38914 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38915 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38916 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38917 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38918 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38919 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38920 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38921 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38922 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38923 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38924 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38925 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38926 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38927 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38928 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38929 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38930 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38931 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38932 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38933 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38934 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38935 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38936 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38937 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38938 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38939 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38940 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38941 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38942 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38943 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38946 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38947 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38948 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38949 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38950 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38952 nargs_constant = 1;
38954 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38955 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38956 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38957 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38958 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38959 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38960 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38961 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38962 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38963 case USI_FTYPE_V32QI_V32QI_INT_USI:
38964 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38965 case USI_FTYPE_V32HI_V32HI_INT_USI:
38966 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38967 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38970 nargs_constant = 1;
38972 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38974 nargs_constant = 2;
38976 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
38977 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
38980 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
38981 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
38984 nargs_constant = 1;
38986 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
38987 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
38988 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
38989 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
38990 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
38991 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
38992 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
38993 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
38994 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
38995 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
38996 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
38997 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
38998 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
38999 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
39000 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
39001 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
39002 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
39003 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
39004 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
39005 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
39006 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
39007 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
39008 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
39009 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
39010 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
39011 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
39012 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
39013 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
39014 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
39015 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
39018 nargs_constant = 1;
39020 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
39021 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
39022 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
39023 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
39024 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
39025 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
39026 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
39027 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
39028 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
39029 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
39030 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
39031 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
39032 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
39033 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
39034 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
39035 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
39036 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
39037 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
39038 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
39039 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
39040 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
39041 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
39042 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
39043 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
39044 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
39045 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
39046 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39049 nargs_constant = 1;
39051 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39052 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39053 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39054 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39055 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39056 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39057 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39058 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39059 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39060 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39064 nargs_constant = 1;
39068 gcc_unreachable ();
39071 gcc_assert (nargs <= ARRAY_SIZE (args));
39073 if (comparison != UNKNOWN)
39075 gcc_assert (nargs == 2);
39076 return ix86_expand_sse_compare (d, exp, target, swap);
39079 if (rmode == VOIDmode || rmode == tmode)
39083 || GET_MODE (target) != tmode
39084 || !insn_p->operand[0].predicate (target, tmode))
39085 target = gen_reg_rtx (tmode);
39086 real_target = target;
39090 real_target = gen_reg_rtx (tmode);
39091 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
39094 for (i = 0; i < nargs; i++)
39096 tree arg = CALL_EXPR_ARG (exp, i);
39097 rtx op = expand_normal (arg);
39098 machine_mode mode = insn_p->operand[i + 1].mode;
39099 bool match = insn_p->operand[i + 1].predicate (op, mode);
39101 if (last_arg_count && (i + 1) == nargs)
39103 /* SIMD shift insns take either an 8-bit immediate or
39104 register as count. But builtin functions take int as
39105 count. If count doesn't match, we put it in register. */
39108 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
39109 if (!insn_p->operand[i + 1].predicate (op, mode))
39110 op = copy_to_reg (op);
39113 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39114 (!mask_pos && (nargs - i) <= nargs_constant))
39119 case CODE_FOR_avx_vinsertf128v4di:
39120 case CODE_FOR_avx_vextractf128v4di:
39121 error ("the last argument must be an 1-bit immediate");
39124 case CODE_FOR_avx512f_cmpv8di3_mask:
39125 case CODE_FOR_avx512f_cmpv16si3_mask:
39126 case CODE_FOR_avx512f_ucmpv8di3_mask:
39127 case CODE_FOR_avx512f_ucmpv16si3_mask:
39128 case CODE_FOR_avx512vl_cmpv4di3_mask:
39129 case CODE_FOR_avx512vl_cmpv8si3_mask:
39130 case CODE_FOR_avx512vl_ucmpv4di3_mask:
39131 case CODE_FOR_avx512vl_ucmpv8si3_mask:
39132 case CODE_FOR_avx512vl_cmpv2di3_mask:
39133 case CODE_FOR_avx512vl_cmpv4si3_mask:
39134 case CODE_FOR_avx512vl_ucmpv2di3_mask:
39135 case CODE_FOR_avx512vl_ucmpv4si3_mask:
39136 error ("the last argument must be a 3-bit immediate");
39139 case CODE_FOR_sse4_1_roundsd:
39140 case CODE_FOR_sse4_1_roundss:
39142 case CODE_FOR_sse4_1_roundpd:
39143 case CODE_FOR_sse4_1_roundps:
39144 case CODE_FOR_avx_roundpd256:
39145 case CODE_FOR_avx_roundps256:
39147 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
39148 case CODE_FOR_sse4_1_roundps_sfix:
39149 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
39150 case CODE_FOR_avx_roundps_sfix256:
39152 case CODE_FOR_sse4_1_blendps:
39153 case CODE_FOR_avx_blendpd256:
39154 case CODE_FOR_avx_vpermilv4df:
39155 case CODE_FOR_avx_vpermilv4df_mask:
39156 case CODE_FOR_avx512f_getmantv8df_mask:
39157 case CODE_FOR_avx512f_getmantv16sf_mask:
39158 case CODE_FOR_avx512vl_getmantv8sf_mask:
39159 case CODE_FOR_avx512vl_getmantv4df_mask:
39160 case CODE_FOR_avx512vl_getmantv4sf_mask:
39161 case CODE_FOR_avx512vl_getmantv2df_mask:
39162 case CODE_FOR_avx512dq_rangepv8df_mask_round:
39163 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
39164 case CODE_FOR_avx512dq_rangepv4df_mask:
39165 case CODE_FOR_avx512dq_rangepv8sf_mask:
39166 case CODE_FOR_avx512dq_rangepv2df_mask:
39167 case CODE_FOR_avx512dq_rangepv4sf_mask:
39168 case CODE_FOR_avx_shufpd256_mask:
39169 error ("the last argument must be a 4-bit immediate");
39172 case CODE_FOR_sha1rnds4:
39173 case CODE_FOR_sse4_1_blendpd:
39174 case CODE_FOR_avx_vpermilv2df:
39175 case CODE_FOR_avx_vpermilv2df_mask:
39176 case CODE_FOR_xop_vpermil2v2df3:
39177 case CODE_FOR_xop_vpermil2v4sf3:
39178 case CODE_FOR_xop_vpermil2v4df3:
39179 case CODE_FOR_xop_vpermil2v8sf3:
39180 case CODE_FOR_avx512f_vinsertf32x4_mask:
39181 case CODE_FOR_avx512f_vinserti32x4_mask:
39182 case CODE_FOR_avx512f_vextractf32x4_mask:
39183 case CODE_FOR_avx512f_vextracti32x4_mask:
39184 case CODE_FOR_sse2_shufpd:
39185 case CODE_FOR_sse2_shufpd_mask:
39186 case CODE_FOR_avx512dq_shuf_f64x2_mask:
39187 case CODE_FOR_avx512dq_shuf_i64x2_mask:
39188 case CODE_FOR_avx512vl_shuf_i32x4_mask:
39189 case CODE_FOR_avx512vl_shuf_f32x4_mask:
39190 error ("the last argument must be a 2-bit immediate");
39193 case CODE_FOR_avx_vextractf128v4df:
39194 case CODE_FOR_avx_vextractf128v8sf:
39195 case CODE_FOR_avx_vextractf128v8si:
39196 case CODE_FOR_avx_vinsertf128v4df:
39197 case CODE_FOR_avx_vinsertf128v8sf:
39198 case CODE_FOR_avx_vinsertf128v8si:
39199 case CODE_FOR_avx512f_vinsertf64x4_mask:
39200 case CODE_FOR_avx512f_vinserti64x4_mask:
39201 case CODE_FOR_avx512f_vextractf64x4_mask:
39202 case CODE_FOR_avx512f_vextracti64x4_mask:
39203 case CODE_FOR_avx512dq_vinsertf32x8_mask:
39204 case CODE_FOR_avx512dq_vinserti32x8_mask:
39205 case CODE_FOR_avx512vl_vinsertv4df:
39206 case CODE_FOR_avx512vl_vinsertv4di:
39207 case CODE_FOR_avx512vl_vinsertv8sf:
39208 case CODE_FOR_avx512vl_vinsertv8si:
39209 error ("the last argument must be a 1-bit immediate");
39212 case CODE_FOR_avx_vmcmpv2df3:
39213 case CODE_FOR_avx_vmcmpv4sf3:
39214 case CODE_FOR_avx_cmpv2df3:
39215 case CODE_FOR_avx_cmpv4sf3:
39216 case CODE_FOR_avx_cmpv4df3:
39217 case CODE_FOR_avx_cmpv8sf3:
39218 case CODE_FOR_avx512f_cmpv8df3_mask:
39219 case CODE_FOR_avx512f_cmpv16sf3_mask:
39220 case CODE_FOR_avx512f_vmcmpv2df3_mask:
39221 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
39222 error ("the last argument must be a 5-bit immediate");
39226 switch (nargs_constant)
39229 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39230 (!mask_pos && (nargs - i) == nargs_constant))
39232 error ("the next to last argument must be an 8-bit immediate");
39236 error ("the last argument must be an 8-bit immediate");
39239 gcc_unreachable ();
39246 if (VECTOR_MODE_P (mode))
39247 op = safe_vector_operand (op, mode);
39249 /* If we aren't optimizing, only allow one memory operand to
39251 if (memory_operand (op, mode))
39254 op = fixup_modeless_constant (op, mode);
39256 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39258 if (optimize || !match || num_memory > 1)
39259 op = copy_to_mode_reg (mode, op);
39263 op = copy_to_reg (op);
39264 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39269 args[i].mode = mode;
39275 pat = GEN_FCN (icode) (real_target, args[0].op);
39278 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
39281 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39285 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39286 args[2].op, args[3].op);
39289 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39290 args[2].op, args[3].op, args[4].op);
39292 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39293 args[2].op, args[3].op, args[4].op,
39297 gcc_unreachable ();
39307 /* Transform pattern of following layout:
39310 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
39318 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
39322 (parallel [ A B ... ]) */
39325 ix86_erase_embedded_rounding (rtx pat)
39327 if (GET_CODE (pat) == INSN)
39328 pat = PATTERN (pat);
39330 gcc_assert (GET_CODE (pat) == PARALLEL);
39332 if (XVECLEN (pat, 0) == 2)
39334 rtx p0 = XVECEXP (pat, 0, 0);
39335 rtx p1 = XVECEXP (pat, 0, 1);
39337 gcc_assert (GET_CODE (p0) == SET
39338 && GET_CODE (p1) == UNSPEC
39339 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
39345 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
39349 for (; i < XVECLEN (pat, 0); ++i)
39351 rtx elem = XVECEXP (pat, 0, i);
39352 if (GET_CODE (elem) != UNSPEC
39353 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
39357 /* No more than 1 occurence was removed. */
39358 gcc_assert (j >= XVECLEN (pat, 0) - 1);
39360 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
39364 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
39367 ix86_expand_sse_comi_round (const struct builtin_description *d,
39368 tree exp, rtx target)
39371 tree arg0 = CALL_EXPR_ARG (exp, 0);
39372 tree arg1 = CALL_EXPR_ARG (exp, 1);
39373 tree arg2 = CALL_EXPR_ARG (exp, 2);
39374 tree arg3 = CALL_EXPR_ARG (exp, 3);
39375 rtx op0 = expand_normal (arg0);
39376 rtx op1 = expand_normal (arg1);
39377 rtx op2 = expand_normal (arg2);
39378 rtx op3 = expand_normal (arg3);
39379 enum insn_code icode = d->icode;
39380 const struct insn_data_d *insn_p = &insn_data[icode];
39381 machine_mode mode0 = insn_p->operand[0].mode;
39382 machine_mode mode1 = insn_p->operand[1].mode;
39383 enum rtx_code comparison = UNEQ;
39384 bool need_ucomi = false;
39386 /* See avxintrin.h for values. */
39387 enum rtx_code comi_comparisons[32] =
39389 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
39390 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
39391 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
39393 bool need_ucomi_values[32] =
39395 true, false, false, true, true, false, false, true,
39396 true, false, false, true, true, false, false, true,
39397 false, true, true, false, false, true, true, false,
39398 false, true, true, false, false, true, true, false
39401 if (!CONST_INT_P (op2))
39403 error ("the third argument must be comparison constant");
39406 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
39408 error ("incorrect comparison mode");
39412 if (!insn_p->operand[2].predicate (op3, SImode))
39414 error ("incorrect rounding operand");
39418 comparison = comi_comparisons[INTVAL (op2)];
39419 need_ucomi = need_ucomi_values[INTVAL (op2)];
39421 if (VECTOR_MODE_P (mode0))
39422 op0 = safe_vector_operand (op0, mode0);
39423 if (VECTOR_MODE_P (mode1))
39424 op1 = safe_vector_operand (op1, mode1);
39426 target = gen_reg_rtx (SImode);
39427 emit_move_insn (target, const0_rtx);
39428 target = gen_rtx_SUBREG (QImode, target, 0);
39430 if ((optimize && !register_operand (op0, mode0))
39431 || !insn_p->operand[0].predicate (op0, mode0))
39432 op0 = copy_to_mode_reg (mode0, op0);
39433 if ((optimize && !register_operand (op1, mode1))
39434 || !insn_p->operand[1].predicate (op1, mode1))
39435 op1 = copy_to_mode_reg (mode1, op1);
39438 icode = icode == CODE_FOR_sse_comi_round
39439 ? CODE_FOR_sse_ucomi_round
39440 : CODE_FOR_sse2_ucomi_round;
39442 pat = GEN_FCN (icode) (op0, op1, op3);
39446 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
39447 if (INTVAL (op3) == NO_ROUND)
39449 pat = ix86_erase_embedded_rounding (pat);
39453 set_dst = SET_DEST (pat);
39457 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
39458 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
39462 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39463 gen_rtx_fmt_ee (comparison, QImode,
39467 return SUBREG_REG (target);
39471 ix86_expand_round_builtin (const struct builtin_description *d,
39472 tree exp, rtx target)
39475 unsigned int i, nargs;
39481 enum insn_code icode = d->icode;
39482 const struct insn_data_d *insn_p = &insn_data[icode];
39483 machine_mode tmode = insn_p->operand[0].mode;
39484 unsigned int nargs_constant = 0;
39485 unsigned int redundant_embed_rnd = 0;
39487 switch ((enum ix86_builtin_func_type) d->flag)
39489 case UINT64_FTYPE_V2DF_INT:
39490 case UINT64_FTYPE_V4SF_INT:
39491 case UINT_FTYPE_V2DF_INT:
39492 case UINT_FTYPE_V4SF_INT:
39493 case INT64_FTYPE_V2DF_INT:
39494 case INT64_FTYPE_V4SF_INT:
39495 case INT_FTYPE_V2DF_INT:
39496 case INT_FTYPE_V4SF_INT:
39499 case V4SF_FTYPE_V4SF_UINT_INT:
39500 case V4SF_FTYPE_V4SF_UINT64_INT:
39501 case V2DF_FTYPE_V2DF_UINT64_INT:
39502 case V4SF_FTYPE_V4SF_INT_INT:
39503 case V4SF_FTYPE_V4SF_INT64_INT:
39504 case V2DF_FTYPE_V2DF_INT64_INT:
39505 case V4SF_FTYPE_V4SF_V4SF_INT:
39506 case V2DF_FTYPE_V2DF_V2DF_INT:
39507 case V4SF_FTYPE_V4SF_V2DF_INT:
39508 case V2DF_FTYPE_V2DF_V4SF_INT:
39511 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39512 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39513 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39514 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39515 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39516 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39517 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39518 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39519 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39520 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39521 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39522 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39523 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39524 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39527 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39528 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39529 nargs_constant = 2;
39532 case INT_FTYPE_V4SF_V4SF_INT_INT:
39533 case INT_FTYPE_V2DF_V2DF_INT_INT:
39534 return ix86_expand_sse_comi_round (d, exp, target);
39535 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39536 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39537 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39538 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39539 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39540 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39543 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39544 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39545 nargs_constant = 4;
39548 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39549 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39550 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39551 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39552 nargs_constant = 3;
39555 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39556 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39557 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39558 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39560 nargs_constant = 4;
39562 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39563 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39564 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39565 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39567 nargs_constant = 3;
39570 gcc_unreachable ();
39572 gcc_assert (nargs <= ARRAY_SIZE (args));
39576 || GET_MODE (target) != tmode
39577 || !insn_p->operand[0].predicate (target, tmode))
39578 target = gen_reg_rtx (tmode);
39580 for (i = 0; i < nargs; i++)
39582 tree arg = CALL_EXPR_ARG (exp, i);
39583 rtx op = expand_normal (arg);
39584 machine_mode mode = insn_p->operand[i + 1].mode;
39585 bool match = insn_p->operand[i + 1].predicate (op, mode);
39587 if (i == nargs - nargs_constant)
39593 case CODE_FOR_avx512f_getmantv8df_mask_round:
39594 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39595 case CODE_FOR_avx512f_vgetmantv2df_round:
39596 case CODE_FOR_avx512f_vgetmantv4sf_round:
39597 error ("the immediate argument must be a 4-bit immediate");
39599 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39600 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39601 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39602 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39603 error ("the immediate argument must be a 5-bit immediate");
39606 error ("the immediate argument must be an 8-bit immediate");
39611 else if (i == nargs-1)
39613 if (!insn_p->operand[nargs].predicate (op, SImode))
39615 error ("incorrect rounding operand");
39619 /* If there is no rounding use normal version of the pattern. */
39620 if (INTVAL (op) == NO_ROUND)
39621 redundant_embed_rnd = 1;
39625 if (VECTOR_MODE_P (mode))
39626 op = safe_vector_operand (op, mode);
39628 op = fixup_modeless_constant (op, mode);
39630 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39632 if (optimize || !match)
39633 op = copy_to_mode_reg (mode, op);
39637 op = copy_to_reg (op);
39638 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39643 args[i].mode = mode;
39649 pat = GEN_FCN (icode) (target, args[0].op);
39652 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39655 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39659 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39660 args[2].op, args[3].op);
39663 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39664 args[2].op, args[3].op, args[4].op);
39666 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39667 args[2].op, args[3].op, args[4].op,
39671 gcc_unreachable ();
39677 if (redundant_embed_rnd)
39678 pat = ix86_erase_embedded_rounding (pat);
39684 /* Subroutine of ix86_expand_builtin to take care of special insns
39685 with variable number of operands. */
39688 ix86_expand_special_args_builtin (const struct builtin_description *d,
39689 tree exp, rtx target)
39693 unsigned int i, nargs, arg_adjust, memory;
39694 bool aligned_mem = false;
39700 enum insn_code icode = d->icode;
39701 bool last_arg_constant = false;
39702 const struct insn_data_d *insn_p = &insn_data[icode];
39703 machine_mode tmode = insn_p->operand[0].mode;
39704 enum { load, store } klass;
39706 switch ((enum ix86_builtin_func_type) d->flag)
39708 case VOID_FTYPE_VOID:
39709 emit_insn (GEN_FCN (icode) (target));
39711 case VOID_FTYPE_UINT64:
39712 case VOID_FTYPE_UNSIGNED:
39718 case INT_FTYPE_VOID:
39719 case USHORT_FTYPE_VOID:
39720 case UINT64_FTYPE_VOID:
39721 case UNSIGNED_FTYPE_VOID:
39726 case UINT64_FTYPE_PUNSIGNED:
39727 case V2DI_FTYPE_PV2DI:
39728 case V4DI_FTYPE_PV4DI:
39729 case V32QI_FTYPE_PCCHAR:
39730 case V16QI_FTYPE_PCCHAR:
39731 case V8SF_FTYPE_PCV4SF:
39732 case V8SF_FTYPE_PCFLOAT:
39733 case V4SF_FTYPE_PCFLOAT:
39734 case V4DF_FTYPE_PCV2DF:
39735 case V4DF_FTYPE_PCDOUBLE:
39736 case V2DF_FTYPE_PCDOUBLE:
39737 case VOID_FTYPE_PVOID:
39738 case V8DI_FTYPE_PV8DI:
39744 case CODE_FOR_sse4_1_movntdqa:
39745 case CODE_FOR_avx2_movntdqa:
39746 case CODE_FOR_avx512f_movntdqa:
39747 aligned_mem = true;
39753 case VOID_FTYPE_PV2SF_V4SF:
39754 case VOID_FTYPE_PV8DI_V8DI:
39755 case VOID_FTYPE_PV4DI_V4DI:
39756 case VOID_FTYPE_PV2DI_V2DI:
39757 case VOID_FTYPE_PCHAR_V32QI:
39758 case VOID_FTYPE_PCHAR_V16QI:
39759 case VOID_FTYPE_PFLOAT_V16SF:
39760 case VOID_FTYPE_PFLOAT_V8SF:
39761 case VOID_FTYPE_PFLOAT_V4SF:
39762 case VOID_FTYPE_PDOUBLE_V8DF:
39763 case VOID_FTYPE_PDOUBLE_V4DF:
39764 case VOID_FTYPE_PDOUBLE_V2DF:
39765 case VOID_FTYPE_PLONGLONG_LONGLONG:
39766 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39767 case VOID_FTYPE_PINT_INT:
39770 /* Reserve memory operand for target. */
39771 memory = ARRAY_SIZE (args);
39774 /* These builtins and instructions require the memory
39775 to be properly aligned. */
39776 case CODE_FOR_avx_movntv4di:
39777 case CODE_FOR_sse2_movntv2di:
39778 case CODE_FOR_avx_movntv8sf:
39779 case CODE_FOR_sse_movntv4sf:
39780 case CODE_FOR_sse4a_vmmovntv4sf:
39781 case CODE_FOR_avx_movntv4df:
39782 case CODE_FOR_sse2_movntv2df:
39783 case CODE_FOR_sse4a_vmmovntv2df:
39784 case CODE_FOR_sse2_movntidi:
39785 case CODE_FOR_sse_movntq:
39786 case CODE_FOR_sse2_movntisi:
39787 case CODE_FOR_avx512f_movntv16sf:
39788 case CODE_FOR_avx512f_movntv8df:
39789 case CODE_FOR_avx512f_movntv8di:
39790 aligned_mem = true;
39796 case V4SF_FTYPE_V4SF_PCV2SF:
39797 case V2DF_FTYPE_V2DF_PCDOUBLE:
39802 case V8SF_FTYPE_PCV8SF_V8SI:
39803 case V4DF_FTYPE_PCV4DF_V4DI:
39804 case V4SF_FTYPE_PCV4SF_V4SI:
39805 case V2DF_FTYPE_PCV2DF_V2DI:
39806 case V8SI_FTYPE_PCV8SI_V8SI:
39807 case V4DI_FTYPE_PCV4DI_V4DI:
39808 case V4SI_FTYPE_PCV4SI_V4SI:
39809 case V2DI_FTYPE_PCV2DI_V2DI:
39814 case VOID_FTYPE_PV8DF_V8DF_UQI:
39815 case VOID_FTYPE_PV4DF_V4DF_UQI:
39816 case VOID_FTYPE_PV2DF_V2DF_UQI:
39817 case VOID_FTYPE_PV16SF_V16SF_UHI:
39818 case VOID_FTYPE_PV8SF_V8SF_UQI:
39819 case VOID_FTYPE_PV4SF_V4SF_UQI:
39820 case VOID_FTYPE_PV8DI_V8DI_UQI:
39821 case VOID_FTYPE_PV4DI_V4DI_UQI:
39822 case VOID_FTYPE_PV2DI_V2DI_UQI:
39823 case VOID_FTYPE_PV16SI_V16SI_UHI:
39824 case VOID_FTYPE_PV8SI_V8SI_UQI:
39825 case VOID_FTYPE_PV4SI_V4SI_UQI:
39828 /* These builtins and instructions require the memory
39829 to be properly aligned. */
39830 case CODE_FOR_avx512f_storev16sf_mask:
39831 case CODE_FOR_avx512f_storev16si_mask:
39832 case CODE_FOR_avx512f_storev8df_mask:
39833 case CODE_FOR_avx512f_storev8di_mask:
39834 case CODE_FOR_avx512vl_storev8sf_mask:
39835 case CODE_FOR_avx512vl_storev8si_mask:
39836 case CODE_FOR_avx512vl_storev4df_mask:
39837 case CODE_FOR_avx512vl_storev4di_mask:
39838 case CODE_FOR_avx512vl_storev4sf_mask:
39839 case CODE_FOR_avx512vl_storev4si_mask:
39840 case CODE_FOR_avx512vl_storev2df_mask:
39841 case CODE_FOR_avx512vl_storev2di_mask:
39842 aligned_mem = true;
39848 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39849 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39850 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39851 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39852 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39853 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39854 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39855 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39856 case VOID_FTYPE_PV8SI_V8DI_UQI:
39857 case VOID_FTYPE_PV8HI_V8DI_UQI:
39858 case VOID_FTYPE_PV16HI_V16SI_UHI:
39859 case VOID_FTYPE_PV16QI_V8DI_UQI:
39860 case VOID_FTYPE_PV16QI_V16SI_UHI:
39861 case VOID_FTYPE_PV4SI_V4DI_UQI:
39862 case VOID_FTYPE_PV4SI_V2DI_UQI:
39863 case VOID_FTYPE_PV8HI_V4DI_UQI:
39864 case VOID_FTYPE_PV8HI_V2DI_UQI:
39865 case VOID_FTYPE_PV8HI_V8SI_UQI:
39866 case VOID_FTYPE_PV8HI_V4SI_UQI:
39867 case VOID_FTYPE_PV16QI_V4DI_UQI:
39868 case VOID_FTYPE_PV16QI_V2DI_UQI:
39869 case VOID_FTYPE_PV16QI_V8SI_UQI:
39870 case VOID_FTYPE_PV16QI_V4SI_UQI:
39871 case VOID_FTYPE_PV8HI_V8HI_UQI:
39872 case VOID_FTYPE_PV16HI_V16HI_UHI:
39873 case VOID_FTYPE_PV32HI_V32HI_USI:
39874 case VOID_FTYPE_PV16QI_V16QI_UHI:
39875 case VOID_FTYPE_PV32QI_V32QI_USI:
39876 case VOID_FTYPE_PV64QI_V64QI_UDI:
39879 /* Reserve memory operand for target. */
39880 memory = ARRAY_SIZE (args);
39882 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39883 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39884 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39885 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39886 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39887 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39888 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39889 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39890 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39891 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39892 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39893 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39894 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39895 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39896 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39897 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39898 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39899 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39905 /* These builtins and instructions require the memory
39906 to be properly aligned. */
39907 case CODE_FOR_avx512f_loadv16sf_mask:
39908 case CODE_FOR_avx512f_loadv16si_mask:
39909 case CODE_FOR_avx512f_loadv8df_mask:
39910 case CODE_FOR_avx512f_loadv8di_mask:
39911 case CODE_FOR_avx512vl_loadv8sf_mask:
39912 case CODE_FOR_avx512vl_loadv8si_mask:
39913 case CODE_FOR_avx512vl_loadv4df_mask:
39914 case CODE_FOR_avx512vl_loadv4di_mask:
39915 case CODE_FOR_avx512vl_loadv4sf_mask:
39916 case CODE_FOR_avx512vl_loadv4si_mask:
39917 case CODE_FOR_avx512vl_loadv2df_mask:
39918 case CODE_FOR_avx512vl_loadv2di_mask:
39919 case CODE_FOR_avx512bw_loadv64qi_mask:
39920 case CODE_FOR_avx512vl_loadv32qi_mask:
39921 case CODE_FOR_avx512vl_loadv16qi_mask:
39922 case CODE_FOR_avx512bw_loadv32hi_mask:
39923 case CODE_FOR_avx512vl_loadv16hi_mask:
39924 case CODE_FOR_avx512vl_loadv8hi_mask:
39925 aligned_mem = true;
39931 case VOID_FTYPE_UINT_UINT_UINT:
39932 case VOID_FTYPE_UINT64_UINT_UINT:
39933 case UCHAR_FTYPE_UINT_UINT_UINT:
39934 case UCHAR_FTYPE_UINT64_UINT_UINT:
39937 memory = ARRAY_SIZE (args);
39938 last_arg_constant = true;
39941 gcc_unreachable ();
39944 gcc_assert (nargs <= ARRAY_SIZE (args));
39946 if (klass == store)
39948 arg = CALL_EXPR_ARG (exp, 0);
39949 op = expand_normal (arg);
39950 gcc_assert (target == 0);
39953 op = ix86_zero_extend_to_Pmode (op);
39954 target = gen_rtx_MEM (tmode, op);
39955 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39956 on it. Try to improve it using get_pointer_alignment,
39957 and if the special builtin is one that requires strict
39958 mode alignment, also from it's GET_MODE_ALIGNMENT.
39959 Failure to do so could lead to ix86_legitimate_combined_insn
39960 rejecting all changes to such insns. */
39961 unsigned int align = get_pointer_alignment (arg);
39962 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39963 align = GET_MODE_ALIGNMENT (tmode);
39964 if (MEM_ALIGN (target) < align)
39965 set_mem_align (target, align);
39968 target = force_reg (tmode, op);
39976 || !register_operand (target, tmode)
39977 || GET_MODE (target) != tmode)
39978 target = gen_reg_rtx (tmode);
39981 for (i = 0; i < nargs; i++)
39983 machine_mode mode = insn_p->operand[i + 1].mode;
39986 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
39987 op = expand_normal (arg);
39988 match = insn_p->operand[i + 1].predicate (op, mode);
39990 if (last_arg_constant && (i + 1) == nargs)
39994 if (icode == CODE_FOR_lwp_lwpvalsi3
39995 || icode == CODE_FOR_lwp_lwpinssi3
39996 || icode == CODE_FOR_lwp_lwpvaldi3
39997 || icode == CODE_FOR_lwp_lwpinsdi3)
39998 error ("the last argument must be a 32-bit immediate");
40000 error ("the last argument must be an 8-bit immediate");
40008 /* This must be the memory operand. */
40009 op = ix86_zero_extend_to_Pmode (op);
40010 op = gen_rtx_MEM (mode, op);
40011 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
40012 on it. Try to improve it using get_pointer_alignment,
40013 and if the special builtin is one that requires strict
40014 mode alignment, also from it's GET_MODE_ALIGNMENT.
40015 Failure to do so could lead to ix86_legitimate_combined_insn
40016 rejecting all changes to such insns. */
40017 unsigned int align = get_pointer_alignment (arg);
40018 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
40019 align = GET_MODE_ALIGNMENT (mode);
40020 if (MEM_ALIGN (op) < align)
40021 set_mem_align (op, align);
40025 /* This must be register. */
40026 if (VECTOR_MODE_P (mode))
40027 op = safe_vector_operand (op, mode);
40029 op = fixup_modeless_constant (op, mode);
40031 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
40032 op = copy_to_mode_reg (mode, op);
40035 op = copy_to_reg (op);
40036 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
40042 args[i].mode = mode;
40048 pat = GEN_FCN (icode) (target);
40051 pat = GEN_FCN (icode) (target, args[0].op);
40054 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40057 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
40060 gcc_unreachable ();
40066 return klass == store ? 0 : target;
40069 /* Return the integer constant in ARG. Constrain it to be in the range
40070 of the subparts of VEC_TYPE; issue an error if not. */
40073 get_element_number (tree vec_type, tree arg)
40075 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
40077 if (!tree_fits_uhwi_p (arg)
40078 || (elt = tree_to_uhwi (arg), elt > max))
40080 error ("selector must be an integer constant in the range 0..%wi", max);
40087 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40088 ix86_expand_vector_init. We DO have language-level syntax for this, in
40089 the form of (type){ init-list }. Except that since we can't place emms
40090 instructions from inside the compiler, we can't allow the use of MMX
40091 registers unless the user explicitly asks for it. So we do *not* define
40092 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
40093 we have builtins invoked by mmintrin.h that gives us license to emit
40094 these sorts of instructions. */
40097 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
40099 machine_mode tmode = TYPE_MODE (type);
40100 machine_mode inner_mode = GET_MODE_INNER (tmode);
40101 int i, n_elt = GET_MODE_NUNITS (tmode);
40102 rtvec v = rtvec_alloc (n_elt);
40104 gcc_assert (VECTOR_MODE_P (tmode));
40105 gcc_assert (call_expr_nargs (exp) == n_elt);
40107 for (i = 0; i < n_elt; ++i)
40109 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
40110 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
40113 if (!target || !register_operand (target, tmode))
40114 target = gen_reg_rtx (tmode);
40116 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
40120 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40121 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
40122 had a language-level syntax for referencing vector elements. */
40125 ix86_expand_vec_ext_builtin (tree exp, rtx target)
40127 machine_mode tmode, mode0;
40132 arg0 = CALL_EXPR_ARG (exp, 0);
40133 arg1 = CALL_EXPR_ARG (exp, 1);
40135 op0 = expand_normal (arg0);
40136 elt = get_element_number (TREE_TYPE (arg0), arg1);
40138 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40139 mode0 = TYPE_MODE (TREE_TYPE (arg0));
40140 gcc_assert (VECTOR_MODE_P (mode0));
40142 op0 = force_reg (mode0, op0);
40144 if (optimize || !target || !register_operand (target, tmode))
40145 target = gen_reg_rtx (tmode);
40147 ix86_expand_vector_extract (true, target, op0, elt);
40152 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40153 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
40154 a language-level syntax for referencing vector elements. */
40157 ix86_expand_vec_set_builtin (tree exp)
40159 machine_mode tmode, mode1;
40160 tree arg0, arg1, arg2;
40162 rtx op0, op1, target;
40164 arg0 = CALL_EXPR_ARG (exp, 0);
40165 arg1 = CALL_EXPR_ARG (exp, 1);
40166 arg2 = CALL_EXPR_ARG (exp, 2);
40168 tmode = TYPE_MODE (TREE_TYPE (arg0));
40169 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40170 gcc_assert (VECTOR_MODE_P (tmode));
40172 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
40173 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
40174 elt = get_element_number (TREE_TYPE (arg0), arg2);
40176 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
40177 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
40179 op0 = force_reg (tmode, op0);
40180 op1 = force_reg (mode1, op1);
40182 /* OP0 is the source of these builtin functions and shouldn't be
40183 modified. Create a copy, use it and return it as target. */
40184 target = gen_reg_rtx (tmode);
40185 emit_move_insn (target, op0);
40186 ix86_expand_vector_set (true, target, op1, elt);
40191 /* Emit conditional move of SRC to DST with condition
40194 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
40200 t = ix86_expand_compare (code, op1, op2);
40201 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
40206 rtx_code_label *nomove = gen_label_rtx ();
40207 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
40208 const0_rtx, GET_MODE (op1), 1, nomove);
40209 emit_move_insn (dst, src);
40210 emit_label (nomove);
40214 /* Choose max of DST and SRC and put it to DST. */
40216 ix86_emit_move_max (rtx dst, rtx src)
40218 ix86_emit_cmove (dst, src, LTU, dst, src);
40221 /* Expand an expression EXP that calls a built-in function,
40222 with result going to TARGET if that's convenient
40223 (and in mode MODE if that's convenient).
40224 SUBTARGET may be used as the target for computing one of EXP's operands.
40225 IGNORE is nonzero if the value is to be ignored. */
40228 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
40229 machine_mode mode, int ignore)
40231 const struct builtin_description *d;
40233 enum insn_code icode;
40234 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
40235 tree arg0, arg1, arg2, arg3, arg4;
40236 rtx op0, op1, op2, op3, op4, pat, insn;
40237 machine_mode mode0, mode1, mode2, mode3, mode4;
40238 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
40240 /* For CPU builtins that can be folded, fold first and expand the fold. */
40243 case IX86_BUILTIN_CPU_INIT:
40245 /* Make it call __cpu_indicator_init in libgcc. */
40246 tree call_expr, fndecl, type;
40247 type = build_function_type_list (integer_type_node, NULL_TREE);
40248 fndecl = build_fn_decl ("__cpu_indicator_init", type);
40249 call_expr = build_call_expr (fndecl, 0);
40250 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
40252 case IX86_BUILTIN_CPU_IS:
40253 case IX86_BUILTIN_CPU_SUPPORTS:
40255 tree arg0 = CALL_EXPR_ARG (exp, 0);
40256 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
40257 gcc_assert (fold_expr != NULL_TREE);
40258 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
40262 /* Determine whether the builtin function is available under the current ISA.
40263 Originally the builtin was not created if it wasn't applicable to the
40264 current ISA based on the command line switches. With function specific
40265 options, we need to check in the context of the function making the call
40266 whether it is supported. */
40267 if (ix86_builtins_isa[fcode].isa
40268 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
40270 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
40271 NULL, (enum fpmath_unit) 0, false);
40274 error ("%qE needs unknown isa option", fndecl);
40277 gcc_assert (opts != NULL);
40278 error ("%qE needs isa option %s", fndecl, opts);
40286 case IX86_BUILTIN_BNDMK:
40288 || GET_MODE (target) != BNDmode
40289 || !register_operand (target, BNDmode))
40290 target = gen_reg_rtx (BNDmode);
40292 arg0 = CALL_EXPR_ARG (exp, 0);
40293 arg1 = CALL_EXPR_ARG (exp, 1);
40295 op0 = expand_normal (arg0);
40296 op1 = expand_normal (arg1);
40298 if (!register_operand (op0, Pmode))
40299 op0 = ix86_zero_extend_to_Pmode (op0);
40300 if (!register_operand (op1, Pmode))
40301 op1 = ix86_zero_extend_to_Pmode (op1);
40303 /* Builtin arg1 is size of block but instruction op1 should
40305 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
40306 NULL_RTX, 1, OPTAB_DIRECT);
40308 emit_insn (BNDmode == BND64mode
40309 ? gen_bnd64_mk (target, op0, op1)
40310 : gen_bnd32_mk (target, op0, op1));
40313 case IX86_BUILTIN_BNDSTX:
40314 arg0 = CALL_EXPR_ARG (exp, 0);
40315 arg1 = CALL_EXPR_ARG (exp, 1);
40316 arg2 = CALL_EXPR_ARG (exp, 2);
40318 op0 = expand_normal (arg0);
40319 op1 = expand_normal (arg1);
40320 op2 = expand_normal (arg2);
40322 if (!register_operand (op0, Pmode))
40323 op0 = ix86_zero_extend_to_Pmode (op0);
40324 if (!register_operand (op1, BNDmode))
40325 op1 = copy_to_mode_reg (BNDmode, op1);
40326 if (!register_operand (op2, Pmode))
40327 op2 = ix86_zero_extend_to_Pmode (op2);
40329 emit_insn (BNDmode == BND64mode
40330 ? gen_bnd64_stx (op2, op0, op1)
40331 : gen_bnd32_stx (op2, op0, op1));
40334 case IX86_BUILTIN_BNDLDX:
40336 || GET_MODE (target) != BNDmode
40337 || !register_operand (target, BNDmode))
40338 target = gen_reg_rtx (BNDmode);
40340 arg0 = CALL_EXPR_ARG (exp, 0);
40341 arg1 = CALL_EXPR_ARG (exp, 1);
40343 op0 = expand_normal (arg0);
40344 op1 = expand_normal (arg1);
40346 if (!register_operand (op0, Pmode))
40347 op0 = ix86_zero_extend_to_Pmode (op0);
40348 if (!register_operand (op1, Pmode))
40349 op1 = ix86_zero_extend_to_Pmode (op1);
40351 emit_insn (BNDmode == BND64mode
40352 ? gen_bnd64_ldx (target, op0, op1)
40353 : gen_bnd32_ldx (target, op0, op1));
40356 case IX86_BUILTIN_BNDCL:
40357 arg0 = CALL_EXPR_ARG (exp, 0);
40358 arg1 = CALL_EXPR_ARG (exp, 1);
40360 op0 = expand_normal (arg0);
40361 op1 = expand_normal (arg1);
40363 if (!register_operand (op0, Pmode))
40364 op0 = ix86_zero_extend_to_Pmode (op0);
40365 if (!register_operand (op1, BNDmode))
40366 op1 = copy_to_mode_reg (BNDmode, op1);
40368 emit_insn (BNDmode == BND64mode
40369 ? gen_bnd64_cl (op1, op0)
40370 : gen_bnd32_cl (op1, op0));
40373 case IX86_BUILTIN_BNDCU:
40374 arg0 = CALL_EXPR_ARG (exp, 0);
40375 arg1 = CALL_EXPR_ARG (exp, 1);
40377 op0 = expand_normal (arg0);
40378 op1 = expand_normal (arg1);
40380 if (!register_operand (op0, Pmode))
40381 op0 = ix86_zero_extend_to_Pmode (op0);
40382 if (!register_operand (op1, BNDmode))
40383 op1 = copy_to_mode_reg (BNDmode, op1);
40385 emit_insn (BNDmode == BND64mode
40386 ? gen_bnd64_cu (op1, op0)
40387 : gen_bnd32_cu (op1, op0));
40390 case IX86_BUILTIN_BNDRET:
40391 arg0 = CALL_EXPR_ARG (exp, 0);
40392 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
40393 target = chkp_get_rtl_bounds (arg0);
40395 /* If no bounds were specified for returned value,
40396 then use INIT bounds. It usually happens when
40397 some built-in function is expanded. */
40400 rtx t1 = gen_reg_rtx (Pmode);
40401 rtx t2 = gen_reg_rtx (Pmode);
40402 target = gen_reg_rtx (BNDmode);
40403 emit_move_insn (t1, const0_rtx);
40404 emit_move_insn (t2, constm1_rtx);
40405 emit_insn (BNDmode == BND64mode
40406 ? gen_bnd64_mk (target, t1, t2)
40407 : gen_bnd32_mk (target, t1, t2));
40410 gcc_assert (target && REG_P (target));
40413 case IX86_BUILTIN_BNDNARROW:
40415 rtx m1, m1h1, m1h2, lb, ub, t1;
40417 /* Return value and lb. */
40418 arg0 = CALL_EXPR_ARG (exp, 0);
40420 arg1 = CALL_EXPR_ARG (exp, 1);
40422 arg2 = CALL_EXPR_ARG (exp, 2);
40424 lb = expand_normal (arg0);
40425 op1 = expand_normal (arg1);
40426 op2 = expand_normal (arg2);
40428 /* Size was passed but we need to use (size - 1) as for bndmk. */
40429 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
40430 NULL_RTX, 1, OPTAB_DIRECT);
40432 /* Add LB to size and inverse to get UB. */
40433 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
40434 op2, 1, OPTAB_DIRECT);
40435 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
40437 if (!register_operand (lb, Pmode))
40438 lb = ix86_zero_extend_to_Pmode (lb);
40439 if (!register_operand (ub, Pmode))
40440 ub = ix86_zero_extend_to_Pmode (ub);
40442 /* We need to move bounds to memory before any computations. */
40447 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
40448 emit_move_insn (m1, op1);
40451 /* Generate mem expression to be used for access to LB and UB. */
40452 m1h1 = adjust_address (m1, Pmode, 0);
40453 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
40455 t1 = gen_reg_rtx (Pmode);
40458 emit_move_insn (t1, m1h1);
40459 ix86_emit_move_max (t1, lb);
40460 emit_move_insn (m1h1, t1);
40462 /* Compute UB. UB is stored in 1's complement form. Therefore
40463 we also use max here. */
40464 emit_move_insn (t1, m1h2);
40465 ix86_emit_move_max (t1, ub);
40466 emit_move_insn (m1h2, t1);
40468 op2 = gen_reg_rtx (BNDmode);
40469 emit_move_insn (op2, m1);
40471 return chkp_join_splitted_slot (lb, op2);
40474 case IX86_BUILTIN_BNDINT:
40476 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
40479 || GET_MODE (target) != BNDmode
40480 || !register_operand (target, BNDmode))
40481 target = gen_reg_rtx (BNDmode);
40483 arg0 = CALL_EXPR_ARG (exp, 0);
40484 arg1 = CALL_EXPR_ARG (exp, 1);
40486 op0 = expand_normal (arg0);
40487 op1 = expand_normal (arg1);
40489 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40490 rh1 = adjust_address (res, Pmode, 0);
40491 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40493 /* Put first bounds to temporaries. */
40494 lb1 = gen_reg_rtx (Pmode);
40495 ub1 = gen_reg_rtx (Pmode);
40498 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40499 emit_move_insn (ub1, adjust_address (op0, Pmode,
40500 GET_MODE_SIZE (Pmode)));
40504 emit_move_insn (res, op0);
40505 emit_move_insn (lb1, rh1);
40506 emit_move_insn (ub1, rh2);
40509 /* Put second bounds to temporaries. */
40510 lb2 = gen_reg_rtx (Pmode);
40511 ub2 = gen_reg_rtx (Pmode);
40514 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40515 emit_move_insn (ub2, adjust_address (op1, Pmode,
40516 GET_MODE_SIZE (Pmode)));
40520 emit_move_insn (res, op1);
40521 emit_move_insn (lb2, rh1);
40522 emit_move_insn (ub2, rh2);
40526 ix86_emit_move_max (lb1, lb2);
40527 emit_move_insn (rh1, lb1);
40529 /* Compute UB. UB is stored in 1's complement form. Therefore
40530 we also use max here. */
40531 ix86_emit_move_max (ub1, ub2);
40532 emit_move_insn (rh2, ub1);
40534 emit_move_insn (target, res);
40539 case IX86_BUILTIN_SIZEOF:
40545 || GET_MODE (target) != Pmode
40546 || !register_operand (target, Pmode))
40547 target = gen_reg_rtx (Pmode);
40549 arg0 = CALL_EXPR_ARG (exp, 0);
40550 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40552 name = DECL_ASSEMBLER_NAME (arg0);
40553 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40555 emit_insn (Pmode == SImode
40556 ? gen_move_size_reloc_si (target, symbol)
40557 : gen_move_size_reloc_di (target, symbol));
40562 case IX86_BUILTIN_BNDLOWER:
40567 || GET_MODE (target) != Pmode
40568 || !register_operand (target, Pmode))
40569 target = gen_reg_rtx (Pmode);
40571 arg0 = CALL_EXPR_ARG (exp, 0);
40572 op0 = expand_normal (arg0);
40574 /* We need to move bounds to memory first. */
40579 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40580 emit_move_insn (mem, op0);
40583 /* Generate mem expression to access LB and load it. */
40584 hmem = adjust_address (mem, Pmode, 0);
40585 emit_move_insn (target, hmem);
40590 case IX86_BUILTIN_BNDUPPER:
40592 rtx mem, hmem, res;
40595 || GET_MODE (target) != Pmode
40596 || !register_operand (target, Pmode))
40597 target = gen_reg_rtx (Pmode);
40599 arg0 = CALL_EXPR_ARG (exp, 0);
40600 op0 = expand_normal (arg0);
40602 /* We need to move bounds to memory first. */
40607 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40608 emit_move_insn (mem, op0);
40611 /* Generate mem expression to access UB. */
40612 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40614 /* We need to inverse all bits of UB. */
40615 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40618 emit_move_insn (target, res);
40623 case IX86_BUILTIN_MASKMOVQ:
40624 case IX86_BUILTIN_MASKMOVDQU:
40625 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40626 ? CODE_FOR_mmx_maskmovq
40627 : CODE_FOR_sse2_maskmovdqu);
40628 /* Note the arg order is different from the operand order. */
40629 arg1 = CALL_EXPR_ARG (exp, 0);
40630 arg2 = CALL_EXPR_ARG (exp, 1);
40631 arg0 = CALL_EXPR_ARG (exp, 2);
40632 op0 = expand_normal (arg0);
40633 op1 = expand_normal (arg1);
40634 op2 = expand_normal (arg2);
40635 mode0 = insn_data[icode].operand[0].mode;
40636 mode1 = insn_data[icode].operand[1].mode;
40637 mode2 = insn_data[icode].operand[2].mode;
40639 op0 = ix86_zero_extend_to_Pmode (op0);
40640 op0 = gen_rtx_MEM (mode1, op0);
40642 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40643 op0 = copy_to_mode_reg (mode0, op0);
40644 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40645 op1 = copy_to_mode_reg (mode1, op1);
40646 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40647 op2 = copy_to_mode_reg (mode2, op2);
40648 pat = GEN_FCN (icode) (op0, op1, op2);
40654 case IX86_BUILTIN_LDMXCSR:
40655 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40656 target = assign_386_stack_local (SImode, SLOT_TEMP);
40657 emit_move_insn (target, op0);
40658 emit_insn (gen_sse_ldmxcsr (target));
40661 case IX86_BUILTIN_STMXCSR:
40662 target = assign_386_stack_local (SImode, SLOT_TEMP);
40663 emit_insn (gen_sse_stmxcsr (target));
40664 return copy_to_mode_reg (SImode, target);
40666 case IX86_BUILTIN_CLFLUSH:
40667 arg0 = CALL_EXPR_ARG (exp, 0);
40668 op0 = expand_normal (arg0);
40669 icode = CODE_FOR_sse2_clflush;
40670 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40671 op0 = ix86_zero_extend_to_Pmode (op0);
40673 emit_insn (gen_sse2_clflush (op0));
40676 case IX86_BUILTIN_CLWB:
40677 arg0 = CALL_EXPR_ARG (exp, 0);
40678 op0 = expand_normal (arg0);
40679 icode = CODE_FOR_clwb;
40680 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40681 op0 = ix86_zero_extend_to_Pmode (op0);
40683 emit_insn (gen_clwb (op0));
40686 case IX86_BUILTIN_CLFLUSHOPT:
40687 arg0 = CALL_EXPR_ARG (exp, 0);
40688 op0 = expand_normal (arg0);
40689 icode = CODE_FOR_clflushopt;
40690 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40691 op0 = ix86_zero_extend_to_Pmode (op0);
40693 emit_insn (gen_clflushopt (op0));
40696 case IX86_BUILTIN_MONITOR:
40697 case IX86_BUILTIN_MONITORX:
40698 arg0 = CALL_EXPR_ARG (exp, 0);
40699 arg1 = CALL_EXPR_ARG (exp, 1);
40700 arg2 = CALL_EXPR_ARG (exp, 2);
40701 op0 = expand_normal (arg0);
40702 op1 = expand_normal (arg1);
40703 op2 = expand_normal (arg2);
40705 op0 = ix86_zero_extend_to_Pmode (op0);
40707 op1 = copy_to_mode_reg (SImode, op1);
40709 op2 = copy_to_mode_reg (SImode, op2);
40711 emit_insn (fcode == IX86_BUILTIN_MONITOR
40712 ? ix86_gen_monitor (op0, op1, op2)
40713 : ix86_gen_monitorx (op0, op1, op2));
40716 case IX86_BUILTIN_MWAIT:
40717 arg0 = CALL_EXPR_ARG (exp, 0);
40718 arg1 = CALL_EXPR_ARG (exp, 1);
40719 op0 = expand_normal (arg0);
40720 op1 = expand_normal (arg1);
40722 op0 = copy_to_mode_reg (SImode, op0);
40724 op1 = copy_to_mode_reg (SImode, op1);
40725 emit_insn (gen_sse3_mwait (op0, op1));
40728 case IX86_BUILTIN_MWAITX:
40729 arg0 = CALL_EXPR_ARG (exp, 0);
40730 arg1 = CALL_EXPR_ARG (exp, 1);
40731 arg2 = CALL_EXPR_ARG (exp, 2);
40732 op0 = expand_normal (arg0);
40733 op1 = expand_normal (arg1);
40734 op2 = expand_normal (arg2);
40736 op0 = copy_to_mode_reg (SImode, op0);
40738 op1 = copy_to_mode_reg (SImode, op1);
40740 op2 = copy_to_mode_reg (SImode, op2);
40741 emit_insn (gen_mwaitx (op0, op1, op2));
40744 case IX86_BUILTIN_CLZERO:
40745 arg0 = CALL_EXPR_ARG (exp, 0);
40746 op0 = expand_normal (arg0);
40748 op0 = ix86_zero_extend_to_Pmode (op0);
40749 emit_insn (ix86_gen_clzero (op0));
40752 case IX86_BUILTIN_VEC_INIT_V2SI:
40753 case IX86_BUILTIN_VEC_INIT_V4HI:
40754 case IX86_BUILTIN_VEC_INIT_V8QI:
40755 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40757 case IX86_BUILTIN_VEC_EXT_V2DF:
40758 case IX86_BUILTIN_VEC_EXT_V2DI:
40759 case IX86_BUILTIN_VEC_EXT_V4SF:
40760 case IX86_BUILTIN_VEC_EXT_V4SI:
40761 case IX86_BUILTIN_VEC_EXT_V8HI:
40762 case IX86_BUILTIN_VEC_EXT_V2SI:
40763 case IX86_BUILTIN_VEC_EXT_V4HI:
40764 case IX86_BUILTIN_VEC_EXT_V16QI:
40765 return ix86_expand_vec_ext_builtin (exp, target);
40767 case IX86_BUILTIN_VEC_SET_V2DI:
40768 case IX86_BUILTIN_VEC_SET_V4SF:
40769 case IX86_BUILTIN_VEC_SET_V4SI:
40770 case IX86_BUILTIN_VEC_SET_V8HI:
40771 case IX86_BUILTIN_VEC_SET_V4HI:
40772 case IX86_BUILTIN_VEC_SET_V16QI:
40773 return ix86_expand_vec_set_builtin (exp);
40775 case IX86_BUILTIN_INFQ:
40776 case IX86_BUILTIN_HUGE_VALQ:
40778 REAL_VALUE_TYPE inf;
40782 tmp = const_double_from_real_value (inf, mode);
40784 tmp = validize_mem (force_const_mem (mode, tmp));
40787 target = gen_reg_rtx (mode);
40789 emit_move_insn (target, tmp);
40793 case IX86_BUILTIN_RDPMC:
40794 case IX86_BUILTIN_RDTSC:
40795 case IX86_BUILTIN_RDTSCP:
40797 op0 = gen_reg_rtx (DImode);
40798 op1 = gen_reg_rtx (DImode);
40800 if (fcode == IX86_BUILTIN_RDPMC)
40802 arg0 = CALL_EXPR_ARG (exp, 0);
40803 op2 = expand_normal (arg0);
40804 if (!register_operand (op2, SImode))
40805 op2 = copy_to_mode_reg (SImode, op2);
40807 insn = (TARGET_64BIT
40808 ? gen_rdpmc_rex64 (op0, op1, op2)
40809 : gen_rdpmc (op0, op2));
40812 else if (fcode == IX86_BUILTIN_RDTSC)
40814 insn = (TARGET_64BIT
40815 ? gen_rdtsc_rex64 (op0, op1)
40816 : gen_rdtsc (op0));
40821 op2 = gen_reg_rtx (SImode);
40823 insn = (TARGET_64BIT
40824 ? gen_rdtscp_rex64 (op0, op1, op2)
40825 : gen_rdtscp (op0, op2));
40828 arg0 = CALL_EXPR_ARG (exp, 0);
40829 op4 = expand_normal (arg0);
40830 if (!address_operand (op4, VOIDmode))
40832 op4 = convert_memory_address (Pmode, op4);
40833 op4 = copy_addr_to_reg (op4);
40835 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40840 /* mode is VOIDmode if __builtin_rd* has been called
40842 if (mode == VOIDmode)
40844 target = gen_reg_rtx (mode);
40849 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40850 op1, 1, OPTAB_DIRECT);
40851 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40852 op0, 1, OPTAB_DIRECT);
40855 emit_move_insn (target, op0);
40858 case IX86_BUILTIN_FXSAVE:
40859 case IX86_BUILTIN_FXRSTOR:
40860 case IX86_BUILTIN_FXSAVE64:
40861 case IX86_BUILTIN_FXRSTOR64:
40862 case IX86_BUILTIN_FNSTENV:
40863 case IX86_BUILTIN_FLDENV:
40867 case IX86_BUILTIN_FXSAVE:
40868 icode = CODE_FOR_fxsave;
40870 case IX86_BUILTIN_FXRSTOR:
40871 icode = CODE_FOR_fxrstor;
40873 case IX86_BUILTIN_FXSAVE64:
40874 icode = CODE_FOR_fxsave64;
40876 case IX86_BUILTIN_FXRSTOR64:
40877 icode = CODE_FOR_fxrstor64;
40879 case IX86_BUILTIN_FNSTENV:
40880 icode = CODE_FOR_fnstenv;
40882 case IX86_BUILTIN_FLDENV:
40883 icode = CODE_FOR_fldenv;
40886 gcc_unreachable ();
40889 arg0 = CALL_EXPR_ARG (exp, 0);
40890 op0 = expand_normal (arg0);
40892 if (!address_operand (op0, VOIDmode))
40894 op0 = convert_memory_address (Pmode, op0);
40895 op0 = copy_addr_to_reg (op0);
40897 op0 = gen_rtx_MEM (mode0, op0);
40899 pat = GEN_FCN (icode) (op0);
40904 case IX86_BUILTIN_XSAVE:
40905 case IX86_BUILTIN_XRSTOR:
40906 case IX86_BUILTIN_XSAVE64:
40907 case IX86_BUILTIN_XRSTOR64:
40908 case IX86_BUILTIN_XSAVEOPT:
40909 case IX86_BUILTIN_XSAVEOPT64:
40910 case IX86_BUILTIN_XSAVES:
40911 case IX86_BUILTIN_XRSTORS:
40912 case IX86_BUILTIN_XSAVES64:
40913 case IX86_BUILTIN_XRSTORS64:
40914 case IX86_BUILTIN_XSAVEC:
40915 case IX86_BUILTIN_XSAVEC64:
40916 arg0 = CALL_EXPR_ARG (exp, 0);
40917 arg1 = CALL_EXPR_ARG (exp, 1);
40918 op0 = expand_normal (arg0);
40919 op1 = expand_normal (arg1);
40921 if (!address_operand (op0, VOIDmode))
40923 op0 = convert_memory_address (Pmode, op0);
40924 op0 = copy_addr_to_reg (op0);
40926 op0 = gen_rtx_MEM (BLKmode, op0);
40928 op1 = force_reg (DImode, op1);
40932 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40933 NULL, 1, OPTAB_DIRECT);
40936 case IX86_BUILTIN_XSAVE:
40937 icode = CODE_FOR_xsave_rex64;
40939 case IX86_BUILTIN_XRSTOR:
40940 icode = CODE_FOR_xrstor_rex64;
40942 case IX86_BUILTIN_XSAVE64:
40943 icode = CODE_FOR_xsave64;
40945 case IX86_BUILTIN_XRSTOR64:
40946 icode = CODE_FOR_xrstor64;
40948 case IX86_BUILTIN_XSAVEOPT:
40949 icode = CODE_FOR_xsaveopt_rex64;
40951 case IX86_BUILTIN_XSAVEOPT64:
40952 icode = CODE_FOR_xsaveopt64;
40954 case IX86_BUILTIN_XSAVES:
40955 icode = CODE_FOR_xsaves_rex64;
40957 case IX86_BUILTIN_XRSTORS:
40958 icode = CODE_FOR_xrstors_rex64;
40960 case IX86_BUILTIN_XSAVES64:
40961 icode = CODE_FOR_xsaves64;
40963 case IX86_BUILTIN_XRSTORS64:
40964 icode = CODE_FOR_xrstors64;
40966 case IX86_BUILTIN_XSAVEC:
40967 icode = CODE_FOR_xsavec_rex64;
40969 case IX86_BUILTIN_XSAVEC64:
40970 icode = CODE_FOR_xsavec64;
40973 gcc_unreachable ();
40976 op2 = gen_lowpart (SImode, op2);
40977 op1 = gen_lowpart (SImode, op1);
40978 pat = GEN_FCN (icode) (op0, op1, op2);
40984 case IX86_BUILTIN_XSAVE:
40985 icode = CODE_FOR_xsave;
40987 case IX86_BUILTIN_XRSTOR:
40988 icode = CODE_FOR_xrstor;
40990 case IX86_BUILTIN_XSAVEOPT:
40991 icode = CODE_FOR_xsaveopt;
40993 case IX86_BUILTIN_XSAVES:
40994 icode = CODE_FOR_xsaves;
40996 case IX86_BUILTIN_XRSTORS:
40997 icode = CODE_FOR_xrstors;
40999 case IX86_BUILTIN_XSAVEC:
41000 icode = CODE_FOR_xsavec;
41003 gcc_unreachable ();
41005 pat = GEN_FCN (icode) (op0, op1);
41012 case IX86_BUILTIN_LLWPCB:
41013 arg0 = CALL_EXPR_ARG (exp, 0);
41014 op0 = expand_normal (arg0);
41015 icode = CODE_FOR_lwp_llwpcb;
41016 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41017 op0 = ix86_zero_extend_to_Pmode (op0);
41018 emit_insn (gen_lwp_llwpcb (op0));
41021 case IX86_BUILTIN_SLWPCB:
41022 icode = CODE_FOR_lwp_slwpcb;
41024 || !insn_data[icode].operand[0].predicate (target, Pmode))
41025 target = gen_reg_rtx (Pmode);
41026 emit_insn (gen_lwp_slwpcb (target));
41029 case IX86_BUILTIN_BEXTRI32:
41030 case IX86_BUILTIN_BEXTRI64:
41031 arg0 = CALL_EXPR_ARG (exp, 0);
41032 arg1 = CALL_EXPR_ARG (exp, 1);
41033 op0 = expand_normal (arg0);
41034 op1 = expand_normal (arg1);
41035 icode = (fcode == IX86_BUILTIN_BEXTRI32
41036 ? CODE_FOR_tbm_bextri_si
41037 : CODE_FOR_tbm_bextri_di);
41038 if (!CONST_INT_P (op1))
41040 error ("last argument must be an immediate");
41045 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
41046 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41047 op1 = GEN_INT (length);
41048 op2 = GEN_INT (lsb_index);
41049 pat = GEN_FCN (icode) (target, op0, op1, op2);
41055 case IX86_BUILTIN_RDRAND16_STEP:
41056 icode = CODE_FOR_rdrandhi_1;
41060 case IX86_BUILTIN_RDRAND32_STEP:
41061 icode = CODE_FOR_rdrandsi_1;
41065 case IX86_BUILTIN_RDRAND64_STEP:
41066 icode = CODE_FOR_rdranddi_1;
41070 op0 = gen_reg_rtx (mode0);
41071 emit_insn (GEN_FCN (icode) (op0));
41073 arg0 = CALL_EXPR_ARG (exp, 0);
41074 op1 = expand_normal (arg0);
41075 if (!address_operand (op1, VOIDmode))
41077 op1 = convert_memory_address (Pmode, op1);
41078 op1 = copy_addr_to_reg (op1);
41080 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41082 op1 = gen_reg_rtx (SImode);
41083 emit_move_insn (op1, CONST1_RTX (SImode));
41085 /* Emit SImode conditional move. */
41086 if (mode0 == HImode)
41088 op2 = gen_reg_rtx (SImode);
41089 emit_insn (gen_zero_extendhisi2 (op2, op0));
41091 else if (mode0 == SImode)
41094 op2 = gen_rtx_SUBREG (SImode, op0, 0);
41097 || !register_operand (target, SImode))
41098 target = gen_reg_rtx (SImode);
41100 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
41102 emit_insn (gen_rtx_SET (target,
41103 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
41106 case IX86_BUILTIN_RDSEED16_STEP:
41107 icode = CODE_FOR_rdseedhi_1;
41111 case IX86_BUILTIN_RDSEED32_STEP:
41112 icode = CODE_FOR_rdseedsi_1;
41116 case IX86_BUILTIN_RDSEED64_STEP:
41117 icode = CODE_FOR_rdseeddi_1;
41121 op0 = gen_reg_rtx (mode0);
41122 emit_insn (GEN_FCN (icode) (op0));
41124 arg0 = CALL_EXPR_ARG (exp, 0);
41125 op1 = expand_normal (arg0);
41126 if (!address_operand (op1, VOIDmode))
41128 op1 = convert_memory_address (Pmode, op1);
41129 op1 = copy_addr_to_reg (op1);
41131 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41133 op2 = gen_reg_rtx (QImode);
41135 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
41137 emit_insn (gen_rtx_SET (op2, pat));
41140 || !register_operand (target, SImode))
41141 target = gen_reg_rtx (SImode);
41143 emit_insn (gen_zero_extendqisi2 (target, op2));
41146 case IX86_BUILTIN_SBB32:
41147 icode = CODE_FOR_subborrowsi;
41151 case IX86_BUILTIN_SBB64:
41152 icode = CODE_FOR_subborrowdi;
41156 case IX86_BUILTIN_ADDCARRYX32:
41157 icode = CODE_FOR_addcarrysi;
41161 case IX86_BUILTIN_ADDCARRYX64:
41162 icode = CODE_FOR_addcarrydi;
41166 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
41167 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
41168 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
41169 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
41171 op1 = expand_normal (arg0);
41172 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
41174 op2 = expand_normal (arg1);
41175 if (!register_operand (op2, mode0))
41176 op2 = copy_to_mode_reg (mode0, op2);
41178 op3 = expand_normal (arg2);
41179 if (!register_operand (op3, mode0))
41180 op3 = copy_to_mode_reg (mode0, op3);
41182 op4 = expand_normal (arg3);
41183 if (!address_operand (op4, VOIDmode))
41185 op4 = convert_memory_address (Pmode, op4);
41186 op4 = copy_addr_to_reg (op4);
41189 /* Generate CF from input operand. */
41190 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
41192 /* Generate instruction that consumes CF. */
41193 op0 = gen_reg_rtx (mode0);
41195 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
41196 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
41197 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
41199 /* Return current CF value. */
41201 target = gen_reg_rtx (QImode);
41203 PUT_MODE (pat, QImode);
41204 emit_insn (gen_rtx_SET (target, pat));
41206 /* Store the result. */
41207 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
41211 case IX86_BUILTIN_READ_FLAGS:
41212 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
41215 || target == NULL_RTX
41216 || !nonimmediate_operand (target, word_mode)
41217 || GET_MODE (target) != word_mode)
41218 target = gen_reg_rtx (word_mode);
41220 emit_insn (gen_pop (target));
41223 case IX86_BUILTIN_WRITE_FLAGS:
41225 arg0 = CALL_EXPR_ARG (exp, 0);
41226 op0 = expand_normal (arg0);
41227 if (!general_no_elim_operand (op0, word_mode))
41228 op0 = copy_to_mode_reg (word_mode, op0);
41230 emit_insn (gen_push (op0));
41231 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
41234 case IX86_BUILTIN_KORTESTC16:
41235 icode = CODE_FOR_kortestchi;
41240 case IX86_BUILTIN_KORTESTZ16:
41241 icode = CODE_FOR_kortestzhi;
41246 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
41247 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
41248 op0 = expand_normal (arg0);
41249 op1 = expand_normal (arg1);
41251 op0 = copy_to_reg (op0);
41252 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41253 op1 = copy_to_reg (op1);
41254 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
41256 target = gen_reg_rtx (QImode);
41257 emit_insn (gen_rtx_SET (target, const0_rtx));
41259 /* Emit kortest. */
41260 emit_insn (GEN_FCN (icode) (op0, op1));
41261 /* And use setcc to return result from flags. */
41262 ix86_expand_setcc (target, EQ,
41263 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
41266 case IX86_BUILTIN_GATHERSIV2DF:
41267 icode = CODE_FOR_avx2_gathersiv2df;
41269 case IX86_BUILTIN_GATHERSIV4DF:
41270 icode = CODE_FOR_avx2_gathersiv4df;
41272 case IX86_BUILTIN_GATHERDIV2DF:
41273 icode = CODE_FOR_avx2_gatherdiv2df;
41275 case IX86_BUILTIN_GATHERDIV4DF:
41276 icode = CODE_FOR_avx2_gatherdiv4df;
41278 case IX86_BUILTIN_GATHERSIV4SF:
41279 icode = CODE_FOR_avx2_gathersiv4sf;
41281 case IX86_BUILTIN_GATHERSIV8SF:
41282 icode = CODE_FOR_avx2_gathersiv8sf;
41284 case IX86_BUILTIN_GATHERDIV4SF:
41285 icode = CODE_FOR_avx2_gatherdiv4sf;
41287 case IX86_BUILTIN_GATHERDIV8SF:
41288 icode = CODE_FOR_avx2_gatherdiv8sf;
41290 case IX86_BUILTIN_GATHERSIV2DI:
41291 icode = CODE_FOR_avx2_gathersiv2di;
41293 case IX86_BUILTIN_GATHERSIV4DI:
41294 icode = CODE_FOR_avx2_gathersiv4di;
41296 case IX86_BUILTIN_GATHERDIV2DI:
41297 icode = CODE_FOR_avx2_gatherdiv2di;
41299 case IX86_BUILTIN_GATHERDIV4DI:
41300 icode = CODE_FOR_avx2_gatherdiv4di;
41302 case IX86_BUILTIN_GATHERSIV4SI:
41303 icode = CODE_FOR_avx2_gathersiv4si;
41305 case IX86_BUILTIN_GATHERSIV8SI:
41306 icode = CODE_FOR_avx2_gathersiv8si;
41308 case IX86_BUILTIN_GATHERDIV4SI:
41309 icode = CODE_FOR_avx2_gatherdiv4si;
41311 case IX86_BUILTIN_GATHERDIV8SI:
41312 icode = CODE_FOR_avx2_gatherdiv8si;
41314 case IX86_BUILTIN_GATHERALTSIV4DF:
41315 icode = CODE_FOR_avx2_gathersiv4df;
41317 case IX86_BUILTIN_GATHERALTDIV8SF:
41318 icode = CODE_FOR_avx2_gatherdiv8sf;
41320 case IX86_BUILTIN_GATHERALTSIV4DI:
41321 icode = CODE_FOR_avx2_gathersiv4di;
41323 case IX86_BUILTIN_GATHERALTDIV8SI:
41324 icode = CODE_FOR_avx2_gatherdiv8si;
41326 case IX86_BUILTIN_GATHER3SIV16SF:
41327 icode = CODE_FOR_avx512f_gathersiv16sf;
41329 case IX86_BUILTIN_GATHER3SIV8DF:
41330 icode = CODE_FOR_avx512f_gathersiv8df;
41332 case IX86_BUILTIN_GATHER3DIV16SF:
41333 icode = CODE_FOR_avx512f_gatherdiv16sf;
41335 case IX86_BUILTIN_GATHER3DIV8DF:
41336 icode = CODE_FOR_avx512f_gatherdiv8df;
41338 case IX86_BUILTIN_GATHER3SIV16SI:
41339 icode = CODE_FOR_avx512f_gathersiv16si;
41341 case IX86_BUILTIN_GATHER3SIV8DI:
41342 icode = CODE_FOR_avx512f_gathersiv8di;
41344 case IX86_BUILTIN_GATHER3DIV16SI:
41345 icode = CODE_FOR_avx512f_gatherdiv16si;
41347 case IX86_BUILTIN_GATHER3DIV8DI:
41348 icode = CODE_FOR_avx512f_gatherdiv8di;
41350 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41351 icode = CODE_FOR_avx512f_gathersiv8df;
41353 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41354 icode = CODE_FOR_avx512f_gatherdiv16sf;
41356 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41357 icode = CODE_FOR_avx512f_gathersiv8di;
41359 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41360 icode = CODE_FOR_avx512f_gatherdiv16si;
41362 case IX86_BUILTIN_GATHER3SIV2DF:
41363 icode = CODE_FOR_avx512vl_gathersiv2df;
41365 case IX86_BUILTIN_GATHER3SIV4DF:
41366 icode = CODE_FOR_avx512vl_gathersiv4df;
41368 case IX86_BUILTIN_GATHER3DIV2DF:
41369 icode = CODE_FOR_avx512vl_gatherdiv2df;
41371 case IX86_BUILTIN_GATHER3DIV4DF:
41372 icode = CODE_FOR_avx512vl_gatherdiv4df;
41374 case IX86_BUILTIN_GATHER3SIV4SF:
41375 icode = CODE_FOR_avx512vl_gathersiv4sf;
41377 case IX86_BUILTIN_GATHER3SIV8SF:
41378 icode = CODE_FOR_avx512vl_gathersiv8sf;
41380 case IX86_BUILTIN_GATHER3DIV4SF:
41381 icode = CODE_FOR_avx512vl_gatherdiv4sf;
41383 case IX86_BUILTIN_GATHER3DIV8SF:
41384 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41386 case IX86_BUILTIN_GATHER3SIV2DI:
41387 icode = CODE_FOR_avx512vl_gathersiv2di;
41389 case IX86_BUILTIN_GATHER3SIV4DI:
41390 icode = CODE_FOR_avx512vl_gathersiv4di;
41392 case IX86_BUILTIN_GATHER3DIV2DI:
41393 icode = CODE_FOR_avx512vl_gatherdiv2di;
41395 case IX86_BUILTIN_GATHER3DIV4DI:
41396 icode = CODE_FOR_avx512vl_gatherdiv4di;
41398 case IX86_BUILTIN_GATHER3SIV4SI:
41399 icode = CODE_FOR_avx512vl_gathersiv4si;
41401 case IX86_BUILTIN_GATHER3SIV8SI:
41402 icode = CODE_FOR_avx512vl_gathersiv8si;
41404 case IX86_BUILTIN_GATHER3DIV4SI:
41405 icode = CODE_FOR_avx512vl_gatherdiv4si;
41407 case IX86_BUILTIN_GATHER3DIV8SI:
41408 icode = CODE_FOR_avx512vl_gatherdiv8si;
41410 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41411 icode = CODE_FOR_avx512vl_gathersiv4df;
41413 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41414 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41416 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41417 icode = CODE_FOR_avx512vl_gathersiv4di;
41419 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41420 icode = CODE_FOR_avx512vl_gatherdiv8si;
41422 case IX86_BUILTIN_SCATTERSIV16SF:
41423 icode = CODE_FOR_avx512f_scattersiv16sf;
41425 case IX86_BUILTIN_SCATTERSIV8DF:
41426 icode = CODE_FOR_avx512f_scattersiv8df;
41428 case IX86_BUILTIN_SCATTERDIV16SF:
41429 icode = CODE_FOR_avx512f_scatterdiv16sf;
41431 case IX86_BUILTIN_SCATTERDIV8DF:
41432 icode = CODE_FOR_avx512f_scatterdiv8df;
41434 case IX86_BUILTIN_SCATTERSIV16SI:
41435 icode = CODE_FOR_avx512f_scattersiv16si;
41437 case IX86_BUILTIN_SCATTERSIV8DI:
41438 icode = CODE_FOR_avx512f_scattersiv8di;
41440 case IX86_BUILTIN_SCATTERDIV16SI:
41441 icode = CODE_FOR_avx512f_scatterdiv16si;
41443 case IX86_BUILTIN_SCATTERDIV8DI:
41444 icode = CODE_FOR_avx512f_scatterdiv8di;
41446 case IX86_BUILTIN_SCATTERSIV8SF:
41447 icode = CODE_FOR_avx512vl_scattersiv8sf;
41449 case IX86_BUILTIN_SCATTERSIV4SF:
41450 icode = CODE_FOR_avx512vl_scattersiv4sf;
41452 case IX86_BUILTIN_SCATTERSIV4DF:
41453 icode = CODE_FOR_avx512vl_scattersiv4df;
41455 case IX86_BUILTIN_SCATTERSIV2DF:
41456 icode = CODE_FOR_avx512vl_scattersiv2df;
41458 case IX86_BUILTIN_SCATTERDIV8SF:
41459 icode = CODE_FOR_avx512vl_scatterdiv8sf;
41461 case IX86_BUILTIN_SCATTERDIV4SF:
41462 icode = CODE_FOR_avx512vl_scatterdiv4sf;
41464 case IX86_BUILTIN_SCATTERDIV4DF:
41465 icode = CODE_FOR_avx512vl_scatterdiv4df;
41467 case IX86_BUILTIN_SCATTERDIV2DF:
41468 icode = CODE_FOR_avx512vl_scatterdiv2df;
41470 case IX86_BUILTIN_SCATTERSIV8SI:
41471 icode = CODE_FOR_avx512vl_scattersiv8si;
41473 case IX86_BUILTIN_SCATTERSIV4SI:
41474 icode = CODE_FOR_avx512vl_scattersiv4si;
41476 case IX86_BUILTIN_SCATTERSIV4DI:
41477 icode = CODE_FOR_avx512vl_scattersiv4di;
41479 case IX86_BUILTIN_SCATTERSIV2DI:
41480 icode = CODE_FOR_avx512vl_scattersiv2di;
41482 case IX86_BUILTIN_SCATTERDIV8SI:
41483 icode = CODE_FOR_avx512vl_scatterdiv8si;
41485 case IX86_BUILTIN_SCATTERDIV4SI:
41486 icode = CODE_FOR_avx512vl_scatterdiv4si;
41488 case IX86_BUILTIN_SCATTERDIV4DI:
41489 icode = CODE_FOR_avx512vl_scatterdiv4di;
41491 case IX86_BUILTIN_SCATTERDIV2DI:
41492 icode = CODE_FOR_avx512vl_scatterdiv2di;
41494 case IX86_BUILTIN_GATHERPFDPD:
41495 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41496 goto vec_prefetch_gen;
41497 case IX86_BUILTIN_SCATTERALTSIV8DF:
41498 icode = CODE_FOR_avx512f_scattersiv8df;
41500 case IX86_BUILTIN_SCATTERALTDIV16SF:
41501 icode = CODE_FOR_avx512f_scatterdiv16sf;
41503 case IX86_BUILTIN_SCATTERALTSIV8DI:
41504 icode = CODE_FOR_avx512f_scattersiv8di;
41506 case IX86_BUILTIN_SCATTERALTDIV16SI:
41507 icode = CODE_FOR_avx512f_scatterdiv16si;
41509 case IX86_BUILTIN_GATHERPFDPS:
41510 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41511 goto vec_prefetch_gen;
41512 case IX86_BUILTIN_GATHERPFQPD:
41513 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41514 goto vec_prefetch_gen;
41515 case IX86_BUILTIN_GATHERPFQPS:
41516 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41517 goto vec_prefetch_gen;
41518 case IX86_BUILTIN_SCATTERPFDPD:
41519 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41520 goto vec_prefetch_gen;
41521 case IX86_BUILTIN_SCATTERPFDPS:
41522 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41523 goto vec_prefetch_gen;
41524 case IX86_BUILTIN_SCATTERPFQPD:
41525 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41526 goto vec_prefetch_gen;
41527 case IX86_BUILTIN_SCATTERPFQPS:
41528 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41529 goto vec_prefetch_gen;
41533 rtx (*gen) (rtx, rtx);
41535 arg0 = CALL_EXPR_ARG (exp, 0);
41536 arg1 = CALL_EXPR_ARG (exp, 1);
41537 arg2 = CALL_EXPR_ARG (exp, 2);
41538 arg3 = CALL_EXPR_ARG (exp, 3);
41539 arg4 = CALL_EXPR_ARG (exp, 4);
41540 op0 = expand_normal (arg0);
41541 op1 = expand_normal (arg1);
41542 op2 = expand_normal (arg2);
41543 op3 = expand_normal (arg3);
41544 op4 = expand_normal (arg4);
41545 /* Note the arg order is different from the operand order. */
41546 mode0 = insn_data[icode].operand[1].mode;
41547 mode2 = insn_data[icode].operand[3].mode;
41548 mode3 = insn_data[icode].operand[4].mode;
41549 mode4 = insn_data[icode].operand[5].mode;
41551 if (target == NULL_RTX
41552 || GET_MODE (target) != insn_data[icode].operand[0].mode
41553 || !insn_data[icode].operand[0].predicate (target,
41554 GET_MODE (target)))
41555 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41557 subtarget = target;
41561 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41562 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41563 half = gen_reg_rtx (V8SImode);
41564 if (!nonimmediate_operand (op2, V16SImode))
41565 op2 = copy_to_mode_reg (V16SImode, op2);
41566 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41569 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41570 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41571 case IX86_BUILTIN_GATHERALTSIV4DF:
41572 case IX86_BUILTIN_GATHERALTSIV4DI:
41573 half = gen_reg_rtx (V4SImode);
41574 if (!nonimmediate_operand (op2, V8SImode))
41575 op2 = copy_to_mode_reg (V8SImode, op2);
41576 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41579 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41580 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41581 half = gen_reg_rtx (mode0);
41582 if (mode0 == V8SFmode)
41583 gen = gen_vec_extract_lo_v16sf;
41585 gen = gen_vec_extract_lo_v16si;
41586 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41587 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41588 emit_insn (gen (half, op0));
41590 if (GET_MODE (op3) != VOIDmode)
41592 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41593 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41594 emit_insn (gen (half, op3));
41598 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41599 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41600 case IX86_BUILTIN_GATHERALTDIV8SF:
41601 case IX86_BUILTIN_GATHERALTDIV8SI:
41602 half = gen_reg_rtx (mode0);
41603 if (mode0 == V4SFmode)
41604 gen = gen_vec_extract_lo_v8sf;
41606 gen = gen_vec_extract_lo_v8si;
41607 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41608 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41609 emit_insn (gen (half, op0));
41611 if (GET_MODE (op3) != VOIDmode)
41613 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41614 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41615 emit_insn (gen (half, op3));
41623 /* Force memory operand only with base register here. But we
41624 don't want to do it on memory operand for other builtin
41626 op1 = ix86_zero_extend_to_Pmode (op1);
41628 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41629 op0 = copy_to_mode_reg (mode0, op0);
41630 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41631 op1 = copy_to_mode_reg (Pmode, op1);
41632 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41633 op2 = copy_to_mode_reg (mode2, op2);
41635 op3 = fixup_modeless_constant (op3, mode3);
41637 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41639 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41640 op3 = copy_to_mode_reg (mode3, op3);
41644 op3 = copy_to_reg (op3);
41645 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41647 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41649 error ("the last argument must be scale 1, 2, 4, 8");
41653 /* Optimize. If mask is known to have all high bits set,
41654 replace op0 with pc_rtx to signal that the instruction
41655 overwrites the whole destination and doesn't use its
41656 previous contents. */
41659 if (TREE_CODE (arg3) == INTEGER_CST)
41661 if (integer_all_onesp (arg3))
41664 else if (TREE_CODE (arg3) == VECTOR_CST)
41666 unsigned int negative = 0;
41667 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41669 tree cst = VECTOR_CST_ELT (arg3, i);
41670 if (TREE_CODE (cst) == INTEGER_CST
41671 && tree_int_cst_sign_bit (cst))
41673 else if (TREE_CODE (cst) == REAL_CST
41674 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41677 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41680 else if (TREE_CODE (arg3) == SSA_NAME
41681 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41683 /* Recognize also when mask is like:
41684 __v2df src = _mm_setzero_pd ();
41685 __v2df mask = _mm_cmpeq_pd (src, src);
41687 __v8sf src = _mm256_setzero_ps ();
41688 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41689 as that is a cheaper way to load all ones into
41690 a register than having to load a constant from
41692 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41693 if (is_gimple_call (def_stmt))
41695 tree fndecl = gimple_call_fndecl (def_stmt);
41697 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41698 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41700 case IX86_BUILTIN_CMPPD:
41701 case IX86_BUILTIN_CMPPS:
41702 case IX86_BUILTIN_CMPPD256:
41703 case IX86_BUILTIN_CMPPS256:
41704 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41707 case IX86_BUILTIN_CMPEQPD:
41708 case IX86_BUILTIN_CMPEQPS:
41709 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41710 && initializer_zerop (gimple_call_arg (def_stmt,
41721 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41728 case IX86_BUILTIN_GATHER3DIV16SF:
41729 if (target == NULL_RTX)
41730 target = gen_reg_rtx (V8SFmode);
41731 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41733 case IX86_BUILTIN_GATHER3DIV16SI:
41734 if (target == NULL_RTX)
41735 target = gen_reg_rtx (V8SImode);
41736 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41738 case IX86_BUILTIN_GATHER3DIV8SF:
41739 case IX86_BUILTIN_GATHERDIV8SF:
41740 if (target == NULL_RTX)
41741 target = gen_reg_rtx (V4SFmode);
41742 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41744 case IX86_BUILTIN_GATHER3DIV8SI:
41745 case IX86_BUILTIN_GATHERDIV8SI:
41746 if (target == NULL_RTX)
41747 target = gen_reg_rtx (V4SImode);
41748 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41751 target = subtarget;
41757 arg0 = CALL_EXPR_ARG (exp, 0);
41758 arg1 = CALL_EXPR_ARG (exp, 1);
41759 arg2 = CALL_EXPR_ARG (exp, 2);
41760 arg3 = CALL_EXPR_ARG (exp, 3);
41761 arg4 = CALL_EXPR_ARG (exp, 4);
41762 op0 = expand_normal (arg0);
41763 op1 = expand_normal (arg1);
41764 op2 = expand_normal (arg2);
41765 op3 = expand_normal (arg3);
41766 op4 = expand_normal (arg4);
41767 mode1 = insn_data[icode].operand[1].mode;
41768 mode2 = insn_data[icode].operand[2].mode;
41769 mode3 = insn_data[icode].operand[3].mode;
41770 mode4 = insn_data[icode].operand[4].mode;
41772 /* Scatter instruction stores operand op3 to memory with
41773 indices from op2 and scale from op4 under writemask op1.
41774 If index operand op2 has more elements then source operand
41775 op3 one need to use only its low half. And vice versa. */
41778 case IX86_BUILTIN_SCATTERALTSIV8DF:
41779 case IX86_BUILTIN_SCATTERALTSIV8DI:
41780 half = gen_reg_rtx (V8SImode);
41781 if (!nonimmediate_operand (op2, V16SImode))
41782 op2 = copy_to_mode_reg (V16SImode, op2);
41783 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41786 case IX86_BUILTIN_SCATTERALTDIV16SF:
41787 case IX86_BUILTIN_SCATTERALTDIV16SI:
41788 half = gen_reg_rtx (mode3);
41789 if (mode3 == V8SFmode)
41790 gen = gen_vec_extract_lo_v16sf;
41792 gen = gen_vec_extract_lo_v16si;
41793 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41794 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41795 emit_insn (gen (half, op3));
41802 /* Force memory operand only with base register here. But we
41803 don't want to do it on memory operand for other builtin
41805 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41807 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41808 op0 = copy_to_mode_reg (Pmode, op0);
41810 op1 = fixup_modeless_constant (op1, mode1);
41812 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41814 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41815 op1 = copy_to_mode_reg (mode1, op1);
41819 op1 = copy_to_reg (op1);
41820 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41823 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41824 op2 = copy_to_mode_reg (mode2, op2);
41826 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41827 op3 = copy_to_mode_reg (mode3, op3);
41829 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41831 error ("the last argument must be scale 1, 2, 4, 8");
41835 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41843 arg0 = CALL_EXPR_ARG (exp, 0);
41844 arg1 = CALL_EXPR_ARG (exp, 1);
41845 arg2 = CALL_EXPR_ARG (exp, 2);
41846 arg3 = CALL_EXPR_ARG (exp, 3);
41847 arg4 = CALL_EXPR_ARG (exp, 4);
41848 op0 = expand_normal (arg0);
41849 op1 = expand_normal (arg1);
41850 op2 = expand_normal (arg2);
41851 op3 = expand_normal (arg3);
41852 op4 = expand_normal (arg4);
41853 mode0 = insn_data[icode].operand[0].mode;
41854 mode1 = insn_data[icode].operand[1].mode;
41855 mode3 = insn_data[icode].operand[3].mode;
41856 mode4 = insn_data[icode].operand[4].mode;
41858 op0 = fixup_modeless_constant (op0, mode0);
41860 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
41862 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41863 op0 = copy_to_mode_reg (mode0, op0);
41867 op0 = copy_to_reg (op0);
41868 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41871 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41872 op1 = copy_to_mode_reg (mode1, op1);
41874 /* Force memory operand only with base register here. But we
41875 don't want to do it on memory operand for other builtin
41877 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41879 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41880 op2 = copy_to_mode_reg (Pmode, op2);
41882 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41884 error ("the forth argument must be scale 1, 2, 4, 8");
41888 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41890 error ("incorrect hint operand");
41894 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41902 case IX86_BUILTIN_XABORT:
41903 icode = CODE_FOR_xabort;
41904 arg0 = CALL_EXPR_ARG (exp, 0);
41905 op0 = expand_normal (arg0);
41906 mode0 = insn_data[icode].operand[0].mode;
41907 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41909 error ("the xabort's argument must be an 8-bit immediate");
41912 emit_insn (gen_xabort (op0));
41919 for (i = 0, d = bdesc_special_args;
41920 i < ARRAY_SIZE (bdesc_special_args);
41922 if (d->code == fcode)
41923 return ix86_expand_special_args_builtin (d, exp, target);
41925 for (i = 0, d = bdesc_args;
41926 i < ARRAY_SIZE (bdesc_args);
41928 if (d->code == fcode)
41931 case IX86_BUILTIN_FABSQ:
41932 case IX86_BUILTIN_COPYSIGNQ:
41934 /* Emit a normal call if SSE isn't available. */
41935 return expand_call (exp, target, ignore);
41937 return ix86_expand_args_builtin (d, exp, target);
41940 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41941 if (d->code == fcode)
41942 return ix86_expand_sse_comi (d, exp, target);
41944 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41945 if (d->code == fcode)
41946 return ix86_expand_round_builtin (d, exp, target);
41948 for (i = 0, d = bdesc_pcmpestr;
41949 i < ARRAY_SIZE (bdesc_pcmpestr);
41951 if (d->code == fcode)
41952 return ix86_expand_sse_pcmpestr (d, exp, target);
41954 for (i = 0, d = bdesc_pcmpistr;
41955 i < ARRAY_SIZE (bdesc_pcmpistr);
41957 if (d->code == fcode)
41958 return ix86_expand_sse_pcmpistr (d, exp, target);
41960 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41961 if (d->code == fcode)
41962 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41963 (enum ix86_builtin_func_type)
41964 d->flag, d->comparison);
41966 gcc_unreachable ();
41969 /* This returns the target-specific builtin with code CODE if
41970 current_function_decl has visibility on this builtin, which is checked
41971 using isa flags. Returns NULL_TREE otherwise. */
41973 static tree ix86_get_builtin (enum ix86_builtins code)
41975 struct cl_target_option *opts;
41976 tree target_tree = NULL_TREE;
41978 /* Determine the isa flags of current_function_decl. */
41980 if (current_function_decl)
41981 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
41983 if (target_tree == NULL)
41984 target_tree = target_option_default_node;
41986 opts = TREE_TARGET_OPTION (target_tree);
41988 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
41989 return ix86_builtin_decl (code, true);
41994 /* Return function decl for target specific builtin
41995 for given MPX builtin passed i FCODE. */
41997 ix86_builtin_mpx_function (unsigned fcode)
42001 case BUILT_IN_CHKP_BNDMK:
42002 return ix86_builtins[IX86_BUILTIN_BNDMK];
42004 case BUILT_IN_CHKP_BNDSTX:
42005 return ix86_builtins[IX86_BUILTIN_BNDSTX];
42007 case BUILT_IN_CHKP_BNDLDX:
42008 return ix86_builtins[IX86_BUILTIN_BNDLDX];
42010 case BUILT_IN_CHKP_BNDCL:
42011 return ix86_builtins[IX86_BUILTIN_BNDCL];
42013 case BUILT_IN_CHKP_BNDCU:
42014 return ix86_builtins[IX86_BUILTIN_BNDCU];
42016 case BUILT_IN_CHKP_BNDRET:
42017 return ix86_builtins[IX86_BUILTIN_BNDRET];
42019 case BUILT_IN_CHKP_INTERSECT:
42020 return ix86_builtins[IX86_BUILTIN_BNDINT];
42022 case BUILT_IN_CHKP_NARROW:
42023 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
42025 case BUILT_IN_CHKP_SIZEOF:
42026 return ix86_builtins[IX86_BUILTIN_SIZEOF];
42028 case BUILT_IN_CHKP_EXTRACT_LOWER:
42029 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
42031 case BUILT_IN_CHKP_EXTRACT_UPPER:
42032 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
42038 gcc_unreachable ();
42041 /* Helper function for ix86_load_bounds and ix86_store_bounds.
42043 Return an address to be used to load/store bounds for pointer
42046 SLOT_NO is an integer constant holding number of a target
42047 dependent special slot to be used in case SLOT is not a memory.
42049 SPECIAL_BASE is a pointer to be used as a base of fake address
42050 to access special slots in Bounds Table. SPECIAL_BASE[-1],
42051 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
42054 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
42058 /* NULL slot means we pass bounds for pointer not passed to the
42059 function at all. Register slot means we pass pointer in a
42060 register. In both these cases bounds are passed via Bounds
42061 Table. Since we do not have actual pointer stored in memory,
42062 we have to use fake addresses to access Bounds Table. We
42063 start with (special_base - sizeof (void*)) and decrease this
42064 address by pointer size to get addresses for other slots. */
42065 if (!slot || REG_P (slot))
42067 gcc_assert (CONST_INT_P (slot_no));
42068 addr = plus_constant (Pmode, special_base,
42069 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
42071 /* If pointer is passed in a memory then its address is used to
42072 access Bounds Table. */
42073 else if (MEM_P (slot))
42075 addr = XEXP (slot, 0);
42076 if (!register_operand (addr, Pmode))
42077 addr = copy_addr_to_reg (addr);
42080 gcc_unreachable ();
42085 /* Expand pass uses this hook to load bounds for function parameter
42086 PTR passed in SLOT in case its bounds are not passed in a register.
42088 If SLOT is a memory, then bounds are loaded as for regular pointer
42089 loaded from memory. PTR may be NULL in case SLOT is a memory.
42090 In such case value of PTR (if required) may be loaded from SLOT.
42092 If SLOT is NULL or a register then SLOT_NO is an integer constant
42093 holding number of the target dependent special slot which should be
42094 used to obtain bounds.
42096 Return loaded bounds. */
42099 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
42101 rtx reg = gen_reg_rtx (BNDmode);
42104 /* Get address to be used to access Bounds Table. Special slots start
42105 at the location of return address of the current function. */
42106 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
42108 /* Load pointer value from a memory if we don't have it. */
42111 gcc_assert (MEM_P (slot));
42112 ptr = copy_addr_to_reg (slot);
42115 if (!register_operand (ptr, Pmode))
42116 ptr = ix86_zero_extend_to_Pmode (ptr);
42118 emit_insn (BNDmode == BND64mode
42119 ? gen_bnd64_ldx (reg, addr, ptr)
42120 : gen_bnd32_ldx (reg, addr, ptr));
42125 /* Expand pass uses this hook to store BOUNDS for call argument PTR
42126 passed in SLOT in case BOUNDS are not passed in a register.
42128 If SLOT is a memory, then BOUNDS are stored as for regular pointer
42129 stored in memory. PTR may be NULL in case SLOT is a memory.
42130 In such case value of PTR (if required) may be loaded from SLOT.
42132 If SLOT is NULL or a register then SLOT_NO is an integer constant
42133 holding number of the target dependent special slot which should be
42134 used to store BOUNDS. */
42137 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
42141 /* Get address to be used to access Bounds Table. Special slots start
42142 at the location of return address of a called function. */
42143 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
42145 /* Load pointer value from a memory if we don't have it. */
42148 gcc_assert (MEM_P (slot));
42149 ptr = copy_addr_to_reg (slot);
42152 if (!register_operand (ptr, Pmode))
42153 ptr = ix86_zero_extend_to_Pmode (ptr);
42155 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
42156 if (!register_operand (bounds, BNDmode))
42157 bounds = copy_to_mode_reg (BNDmode, bounds);
42159 emit_insn (BNDmode == BND64mode
42160 ? gen_bnd64_stx (addr, ptr, bounds)
42161 : gen_bnd32_stx (addr, ptr, bounds));
42164 /* Load and return bounds returned by function in SLOT. */
42167 ix86_load_returned_bounds (rtx slot)
42171 gcc_assert (REG_P (slot));
42172 res = gen_reg_rtx (BNDmode);
42173 emit_move_insn (res, slot);
42178 /* Store BOUNDS returned by function into SLOT. */
42181 ix86_store_returned_bounds (rtx slot, rtx bounds)
42183 gcc_assert (REG_P (slot));
42184 emit_move_insn (slot, bounds);
42187 /* Returns a function decl for a vectorized version of the combined function
42188 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
42189 if it is not available. */
42192 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
42195 machine_mode in_mode, out_mode;
42198 if (TREE_CODE (type_out) != VECTOR_TYPE
42199 || TREE_CODE (type_in) != VECTOR_TYPE)
42202 out_mode = TYPE_MODE (TREE_TYPE (type_out));
42203 out_n = TYPE_VECTOR_SUBPARTS (type_out);
42204 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42205 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42210 if (out_mode == SFmode && in_mode == SFmode)
42212 if (out_n == 16 && in_n == 16)
42213 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
42220 /* The round insn does not trap on denormals. */
42221 if (flag_trapping_math || !TARGET_ROUND)
42224 if (out_mode == SImode && in_mode == DFmode)
42226 if (out_n == 4 && in_n == 2)
42227 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
42228 else if (out_n == 8 && in_n == 4)
42229 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
42230 else if (out_n == 16 && in_n == 8)
42231 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
42233 if (out_mode == SImode && in_mode == SFmode)
42235 if (out_n == 4 && in_n == 4)
42236 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
42237 else if (out_n == 8 && in_n == 8)
42238 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
42245 /* The round insn does not trap on denormals. */
42246 if (flag_trapping_math || !TARGET_ROUND)
42249 if (out_mode == SImode && in_mode == DFmode)
42251 if (out_n == 4 && in_n == 2)
42252 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
42253 else if (out_n == 8 && in_n == 4)
42254 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
42255 else if (out_n == 16 && in_n == 8)
42256 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
42258 if (out_mode == SImode && in_mode == SFmode)
42260 if (out_n == 4 && in_n == 4)
42261 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
42262 else if (out_n == 8 && in_n == 8)
42263 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
42270 if (out_mode == SImode && in_mode == DFmode)
42272 if (out_n == 4 && in_n == 2)
42273 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
42274 else if (out_n == 8 && in_n == 4)
42275 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
42277 if (out_mode == SImode && in_mode == SFmode)
42279 if (out_n == 4 && in_n == 4)
42280 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
42281 else if (out_n == 8 && in_n == 8)
42282 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
42289 /* The round insn does not trap on denormals. */
42290 if (flag_trapping_math || !TARGET_ROUND)
42293 if (out_mode == SImode && in_mode == DFmode)
42295 if (out_n == 4 && in_n == 2)
42296 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
42297 else if (out_n == 8 && in_n == 4)
42298 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
42299 else if (out_n == 16 && in_n == 8)
42300 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
42302 if (out_mode == SImode && in_mode == SFmode)
42304 if (out_n == 4 && in_n == 4)
42305 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
42306 else if (out_n == 8 && in_n == 8)
42307 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
42312 /* The round insn does not trap on denormals. */
42313 if (flag_trapping_math || !TARGET_ROUND)
42316 if (out_mode == DFmode && in_mode == DFmode)
42318 if (out_n == 2 && in_n == 2)
42319 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
42320 else if (out_n == 4 && in_n == 4)
42321 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
42323 if (out_mode == SFmode && in_mode == SFmode)
42325 if (out_n == 4 && in_n == 4)
42326 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
42327 else if (out_n == 8 && in_n == 8)
42328 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
42333 /* The round insn does not trap on denormals. */
42334 if (flag_trapping_math || !TARGET_ROUND)
42337 if (out_mode == DFmode && in_mode == DFmode)
42339 if (out_n == 2 && in_n == 2)
42340 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
42341 else if (out_n == 4 && in_n == 4)
42342 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
42344 if (out_mode == SFmode && in_mode == SFmode)
42346 if (out_n == 4 && in_n == 4)
42347 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
42348 else if (out_n == 8 && in_n == 8)
42349 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
42354 /* The round insn does not trap on denormals. */
42355 if (flag_trapping_math || !TARGET_ROUND)
42358 if (out_mode == DFmode && in_mode == DFmode)
42360 if (out_n == 2 && in_n == 2)
42361 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
42362 else if (out_n == 4 && in_n == 4)
42363 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
42365 if (out_mode == SFmode && in_mode == SFmode)
42367 if (out_n == 4 && in_n == 4)
42368 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
42369 else if (out_n == 8 && in_n == 8)
42370 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
42375 /* The round insn does not trap on denormals. */
42376 if (flag_trapping_math || !TARGET_ROUND)
42379 if (out_mode == DFmode && in_mode == DFmode)
42381 if (out_n == 2 && in_n == 2)
42382 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
42383 else if (out_n == 4 && in_n == 4)
42384 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
42386 if (out_mode == SFmode && in_mode == SFmode)
42388 if (out_n == 4 && in_n == 4)
42389 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42390 else if (out_n == 8 && in_n == 8)
42391 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42396 if (out_mode == DFmode && in_mode == DFmode)
42398 if (out_n == 2 && in_n == 2)
42399 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42400 if (out_n == 4 && in_n == 4)
42401 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42403 if (out_mode == SFmode && in_mode == SFmode)
42405 if (out_n == 4 && in_n == 4)
42406 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42407 if (out_n == 8 && in_n == 8)
42408 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42416 /* Dispatch to a handler for a vectorization library. */
42417 if (ix86_veclib_handler)
42418 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
42423 /* Handler for an SVML-style interface to
42424 a library with vectorized intrinsics. */
42427 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
42430 tree fntype, new_fndecl, args;
42433 machine_mode el_mode, in_mode;
42436 /* The SVML is suitable for unsafe math only. */
42437 if (!flag_unsafe_math_optimizations)
42440 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42441 n = TYPE_VECTOR_SUBPARTS (type_out);
42442 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42443 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42444 if (el_mode != in_mode
42468 if ((el_mode != DFmode || n != 2)
42469 && (el_mode != SFmode || n != 4))
42477 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42478 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42480 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
42481 strcpy (name, "vmlsLn4");
42482 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
42483 strcpy (name, "vmldLn2");
42486 sprintf (name, "vmls%s", bname+10);
42487 name[strlen (name)-1] = '4';
42490 sprintf (name, "vmld%s2", bname+10);
42492 /* Convert to uppercase. */
42496 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42500 fntype = build_function_type_list (type_out, type_in, NULL);
42502 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42504 /* Build a function declaration for the vectorized function. */
42505 new_fndecl = build_decl (BUILTINS_LOCATION,
42506 FUNCTION_DECL, get_identifier (name), fntype);
42507 TREE_PUBLIC (new_fndecl) = 1;
42508 DECL_EXTERNAL (new_fndecl) = 1;
42509 DECL_IS_NOVOPS (new_fndecl) = 1;
42510 TREE_READONLY (new_fndecl) = 1;
42515 /* Handler for an ACML-style interface to
42516 a library with vectorized intrinsics. */
42519 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
42521 char name[20] = "__vr.._";
42522 tree fntype, new_fndecl, args;
42525 machine_mode el_mode, in_mode;
42528 /* The ACML is 64bits only and suitable for unsafe math only as
42529 it does not correctly support parts of IEEE with the required
42530 precision such as denormals. */
42532 || !flag_unsafe_math_optimizations)
42535 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42536 n = TYPE_VECTOR_SUBPARTS (type_out);
42537 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42538 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42539 if (el_mode != in_mode
42551 if (el_mode == DFmode && n == 2)
42556 else if (el_mode == SFmode && n == 4)
42569 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42570 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42571 sprintf (name + 7, "%s", bname+10);
42574 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42578 fntype = build_function_type_list (type_out, type_in, NULL);
42580 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42582 /* Build a function declaration for the vectorized function. */
42583 new_fndecl = build_decl (BUILTINS_LOCATION,
42584 FUNCTION_DECL, get_identifier (name), fntype);
42585 TREE_PUBLIC (new_fndecl) = 1;
42586 DECL_EXTERNAL (new_fndecl) = 1;
42587 DECL_IS_NOVOPS (new_fndecl) = 1;
42588 TREE_READONLY (new_fndecl) = 1;
42593 /* Returns a decl of a function that implements gather load with
42594 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42595 Return NULL_TREE if it is not available. */
42598 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42599 const_tree index_type, int scale)
42602 enum ix86_builtins code;
42607 if ((TREE_CODE (index_type) != INTEGER_TYPE
42608 && !POINTER_TYPE_P (index_type))
42609 || (TYPE_MODE (index_type) != SImode
42610 && TYPE_MODE (index_type) != DImode))
42613 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42616 /* v*gather* insn sign extends index to pointer mode. */
42617 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42618 && TYPE_UNSIGNED (index_type))
42623 || (scale & (scale - 1)) != 0)
42626 si = TYPE_MODE (index_type) == SImode;
42627 switch (TYPE_MODE (mem_vectype))
42630 if (TARGET_AVX512VL)
42631 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42633 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42636 if (TARGET_AVX512VL)
42637 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42639 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42642 if (TARGET_AVX512VL)
42643 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42645 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42648 if (TARGET_AVX512VL)
42649 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42651 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42654 if (TARGET_AVX512VL)
42655 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42657 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42660 if (TARGET_AVX512VL)
42661 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42663 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42666 if (TARGET_AVX512VL)
42667 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42669 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42672 if (TARGET_AVX512VL)
42673 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42675 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42678 if (TARGET_AVX512F)
42679 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42684 if (TARGET_AVX512F)
42685 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42690 if (TARGET_AVX512F)
42691 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42696 if (TARGET_AVX512F)
42697 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42705 return ix86_get_builtin (code);
42708 /* Returns a decl of a function that implements scatter store with
42709 register type VECTYPE and index type INDEX_TYPE and SCALE.
42710 Return NULL_TREE if it is not available. */
42713 ix86_vectorize_builtin_scatter (const_tree vectype,
42714 const_tree index_type, int scale)
42717 enum ix86_builtins code;
42719 if (!TARGET_AVX512F)
42722 if ((TREE_CODE (index_type) != INTEGER_TYPE
42723 && !POINTER_TYPE_P (index_type))
42724 || (TYPE_MODE (index_type) != SImode
42725 && TYPE_MODE (index_type) != DImode))
42728 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42731 /* v*scatter* insn sign extends index to pointer mode. */
42732 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42733 && TYPE_UNSIGNED (index_type))
42736 /* Scale can be 1, 2, 4 or 8. */
42739 || (scale & (scale - 1)) != 0)
42742 si = TYPE_MODE (index_type) == SImode;
42743 switch (TYPE_MODE (vectype))
42746 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42749 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42752 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42755 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42761 return ix86_builtins[code];
42764 /* Return true if it is safe to use the rsqrt optabs to optimize
42770 return (TARGET_SSE_MATH
42771 && flag_finite_math_only
42772 && !flag_trapping_math
42773 && flag_unsafe_math_optimizations);
42776 /* Returns a code for a target-specific builtin that implements
42777 reciprocal of the function, or NULL_TREE if not available. */
42780 ix86_builtin_reciprocal (tree fndecl)
42782 switch (DECL_FUNCTION_CODE (fndecl))
42784 /* Vectorized version of sqrt to rsqrt conversion. */
42785 case IX86_BUILTIN_SQRTPS_NR:
42786 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42788 case IX86_BUILTIN_SQRTPS_NR256:
42789 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42796 /* Helper for avx_vpermilps256_operand et al. This is also used by
42797 the expansion functions to turn the parallel back into a mask.
42798 The return value is 0 for no match and the imm8+1 for a match. */
42801 avx_vpermilp_parallel (rtx par, machine_mode mode)
42803 unsigned i, nelt = GET_MODE_NUNITS (mode);
42805 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42807 if (XVECLEN (par, 0) != (int) nelt)
42810 /* Validate that all of the elements are constants, and not totally
42811 out of range. Copy the data into an integral array to make the
42812 subsequent checks easier. */
42813 for (i = 0; i < nelt; ++i)
42815 rtx er = XVECEXP (par, 0, i);
42816 unsigned HOST_WIDE_INT ei;
42818 if (!CONST_INT_P (er))
42829 /* In the 512-bit DFmode case, we can only move elements within
42830 a 128-bit lane. First fill the second part of the mask,
42832 for (i = 4; i < 6; ++i)
42834 if (ipar[i] < 4 || ipar[i] >= 6)
42836 mask |= (ipar[i] - 4) << i;
42838 for (i = 6; i < 8; ++i)
42842 mask |= (ipar[i] - 6) << i;
42847 /* In the 256-bit DFmode case, we can only move elements within
42849 for (i = 0; i < 2; ++i)
42853 mask |= ipar[i] << i;
42855 for (i = 2; i < 4; ++i)
42859 mask |= (ipar[i] - 2) << i;
42864 /* In 512 bit SFmode case, permutation in the upper 256 bits
42865 must mirror the permutation in the lower 256-bits. */
42866 for (i = 0; i < 8; ++i)
42867 if (ipar[i] + 8 != ipar[i + 8])
42872 /* In 256 bit SFmode case, we have full freedom of
42873 movement within the low 128-bit lane, but the high 128-bit
42874 lane must mirror the exact same pattern. */
42875 for (i = 0; i < 4; ++i)
42876 if (ipar[i] + 4 != ipar[i + 4])
42883 /* In the 128-bit case, we've full freedom in the placement of
42884 the elements from the source operand. */
42885 for (i = 0; i < nelt; ++i)
42886 mask |= ipar[i] << (i * (nelt / 2));
42890 gcc_unreachable ();
42893 /* Make sure success has a non-zero value by adding one. */
42897 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42898 the expansion functions to turn the parallel back into a mask.
42899 The return value is 0 for no match and the imm8+1 for a match. */
42902 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42904 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42906 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42908 if (XVECLEN (par, 0) != (int) nelt)
42911 /* Validate that all of the elements are constants, and not totally
42912 out of range. Copy the data into an integral array to make the
42913 subsequent checks easier. */
42914 for (i = 0; i < nelt; ++i)
42916 rtx er = XVECEXP (par, 0, i);
42917 unsigned HOST_WIDE_INT ei;
42919 if (!CONST_INT_P (er))
42922 if (ei >= 2 * nelt)
42927 /* Validate that the halves of the permute are halves. */
42928 for (i = 0; i < nelt2 - 1; ++i)
42929 if (ipar[i] + 1 != ipar[i + 1])
42931 for (i = nelt2; i < nelt - 1; ++i)
42932 if (ipar[i] + 1 != ipar[i + 1])
42935 /* Reconstruct the mask. */
42936 for (i = 0; i < 2; ++i)
42938 unsigned e = ipar[i * nelt2];
42942 mask |= e << (i * 4);
42945 /* Make sure success has a non-zero value by adding one. */
42949 /* Return a register priority for hard reg REGNO. */
42951 ix86_register_priority (int hard_regno)
42953 /* ebp and r13 as the base always wants a displacement, r12 as the
42954 base always wants an index. So discourage their usage in an
42956 if (hard_regno == R12_REG || hard_regno == R13_REG)
42958 if (hard_regno == BP_REG)
42960 /* New x86-64 int registers result in bigger code size. Discourage
42962 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42964 /* New x86-64 SSE registers result in bigger code size. Discourage
42966 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42968 /* Usage of AX register results in smaller code. Prefer it. */
42969 if (hard_regno == AX_REG)
42974 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
42976 Put float CONST_DOUBLE in the constant pool instead of fp regs.
42977 QImode must go into class Q_REGS.
42978 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
42979 movdf to do mem-to-mem moves through integer regs. */
42982 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
42984 machine_mode mode = GET_MODE (x);
42986 /* We're only allowed to return a subclass of CLASS. Many of the
42987 following checks fail for NO_REGS, so eliminate that early. */
42988 if (regclass == NO_REGS)
42991 /* All classes can load zeros. */
42992 if (x == CONST0_RTX (mode))
42995 /* Force constants into memory if we are loading a (nonzero) constant into
42996 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
42997 instructions to load from a constant. */
42999 && (MAYBE_MMX_CLASS_P (regclass)
43000 || MAYBE_SSE_CLASS_P (regclass)
43001 || MAYBE_MASK_CLASS_P (regclass)))
43004 /* Prefer SSE regs only, if we can use them for math. */
43005 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
43006 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
43008 /* Floating-point constants need more complex checks. */
43009 if (CONST_DOUBLE_P (x))
43011 /* General regs can load everything. */
43012 if (reg_class_subset_p (regclass, GENERAL_REGS))
43015 /* Floats can load 0 and 1 plus some others. Note that we eliminated
43016 zero above. We only want to wind up preferring 80387 registers if
43017 we plan on doing computation with them. */
43019 && standard_80387_constant_p (x) > 0)
43021 /* Limit class to non-sse. */
43022 if (regclass == FLOAT_SSE_REGS)
43024 if (regclass == FP_TOP_SSE_REGS)
43026 if (regclass == FP_SECOND_SSE_REGS)
43027 return FP_SECOND_REG;
43028 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
43035 /* Generally when we see PLUS here, it's the function invariant
43036 (plus soft-fp const_int). Which can only be computed into general
43038 if (GET_CODE (x) == PLUS)
43039 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
43041 /* QImode constants are easy to load, but non-constant QImode data
43042 must go into Q_REGS. */
43043 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
43045 if (reg_class_subset_p (regclass, Q_REGS))
43047 if (reg_class_subset_p (Q_REGS, regclass))
43055 /* Discourage putting floating-point values in SSE registers unless
43056 SSE math is being used, and likewise for the 387 registers. */
43058 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
43060 machine_mode mode = GET_MODE (x);
43062 /* Restrict the output reload class to the register bank that we are doing
43063 math on. If we would like not to return a subset of CLASS, reject this
43064 alternative: if reload cannot do this, it will still use its choice. */
43065 mode = GET_MODE (x);
43066 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
43067 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
43069 if (X87_FLOAT_MODE_P (mode))
43071 if (regclass == FP_TOP_SSE_REGS)
43073 else if (regclass == FP_SECOND_SSE_REGS)
43074 return FP_SECOND_REG;
43076 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
43083 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
43084 machine_mode mode, secondary_reload_info *sri)
43086 /* Double-word spills from general registers to non-offsettable memory
43087 references (zero-extended addresses) require special handling. */
43090 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
43091 && INTEGER_CLASS_P (rclass)
43092 && !offsettable_memref_p (x))
43095 ? CODE_FOR_reload_noff_load
43096 : CODE_FOR_reload_noff_store);
43097 /* Add the cost of moving address to a temporary. */
43098 sri->extra_cost = 1;
43103 /* QImode spills from non-QI registers require
43104 intermediate register on 32bit targets. */
43106 && (MAYBE_MASK_CLASS_P (rclass)
43107 || (!TARGET_64BIT && !in_p
43108 && INTEGER_CLASS_P (rclass)
43109 && MAYBE_NON_Q_CLASS_P (rclass))))
43118 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
43119 regno = true_regnum (x);
43121 /* Return Q_REGS if the operand is in memory. */
43126 /* This condition handles corner case where an expression involving
43127 pointers gets vectorized. We're trying to use the address of a
43128 stack slot as a vector initializer.
43130 (set (reg:V2DI 74 [ vect_cst_.2 ])
43131 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
43133 Eventually frame gets turned into sp+offset like this:
43135 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43136 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43137 (const_int 392 [0x188]))))
43139 That later gets turned into:
43141 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43142 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43143 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
43145 We'll have the following reload recorded:
43147 Reload 0: reload_in (DI) =
43148 (plus:DI (reg/f:DI 7 sp)
43149 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
43150 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43151 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
43152 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
43153 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43154 reload_reg_rtx: (reg:V2DI 22 xmm1)
43156 Which isn't going to work since SSE instructions can't handle scalar
43157 additions. Returning GENERAL_REGS forces the addition into integer
43158 register and reload can handle subsequent reloads without problems. */
43160 if (in_p && GET_CODE (x) == PLUS
43161 && SSE_CLASS_P (rclass)
43162 && SCALAR_INT_MODE_P (mode))
43163 return GENERAL_REGS;
43168 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
43171 ix86_class_likely_spilled_p (reg_class_t rclass)
43182 case SSE_FIRST_REG:
43184 case FP_SECOND_REG:
43195 /* If we are copying between general and FP registers, we need a memory
43196 location. The same is true for SSE and MMX registers.
43198 To optimize register_move_cost performance, allow inline variant.
43200 The macro can't work reliably when one of the CLASSES is class containing
43201 registers from multiple units (SSE, MMX, integer). We avoid this by never
43202 combining those units in single alternative in the machine description.
43203 Ensure that this constraint holds to avoid unexpected surprises.
43205 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
43206 enforce these sanity checks. */
43209 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43210 machine_mode mode, int strict)
43212 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
43214 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
43215 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
43216 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
43217 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
43218 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
43219 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
43221 gcc_assert (!strict || lra_in_progress);
43225 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
43228 /* Between mask and general, we have moves no larger than word size. */
43229 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
43230 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
43233 /* ??? This is a lie. We do have moves between mmx/general, and for
43234 mmx/sse2. But by saying we need secondary memory we discourage the
43235 register allocator from using the mmx registers unless needed. */
43236 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
43239 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43241 /* SSE1 doesn't have any direct moves from other classes. */
43245 /* If the target says that inter-unit moves are more expensive
43246 than moving through memory, then don't generate them. */
43247 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
43248 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
43251 /* Between SSE and general, we have moves no larger than word size. */
43252 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43260 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43261 machine_mode mode, int strict)
43263 return inline_secondary_memory_needed (class1, class2, mode, strict);
43266 /* Implement the TARGET_CLASS_MAX_NREGS hook.
43268 On the 80386, this is the size of MODE in words,
43269 except in the FP regs, where a single reg is always enough. */
43271 static unsigned char
43272 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
43274 if (MAYBE_INTEGER_CLASS_P (rclass))
43276 if (mode == XFmode)
43277 return (TARGET_64BIT ? 2 : 3);
43278 else if (mode == XCmode)
43279 return (TARGET_64BIT ? 4 : 6);
43281 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
43285 if (COMPLEX_MODE_P (mode))
43292 /* Return true if the registers in CLASS cannot represent the change from
43293 modes FROM to TO. */
43296 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
43297 enum reg_class regclass)
43302 /* x87 registers can't do subreg at all, as all values are reformatted
43303 to extended precision. */
43304 if (MAYBE_FLOAT_CLASS_P (regclass))
43307 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
43309 /* Vector registers do not support QI or HImode loads. If we don't
43310 disallow a change to these modes, reload will assume it's ok to
43311 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
43312 the vec_dupv4hi pattern. */
43313 if (GET_MODE_SIZE (from) < 4)
43320 /* Return the cost of moving data of mode M between a
43321 register and memory. A value of 2 is the default; this cost is
43322 relative to those in `REGISTER_MOVE_COST'.
43324 This function is used extensively by register_move_cost that is used to
43325 build tables at startup. Make it inline in this case.
43326 When IN is 2, return maximum of in and out move cost.
43328 If moving between registers and memory is more expensive than
43329 between two registers, you should define this macro to express the
43332 Model also increased moving costs of QImode registers in non
43336 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43340 if (FLOAT_CLASS_P (regclass))
43358 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43359 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43361 if (SSE_CLASS_P (regclass))
43364 switch (GET_MODE_SIZE (mode))
43379 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43380 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43382 if (MMX_CLASS_P (regclass))
43385 switch (GET_MODE_SIZE (mode))
43397 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43398 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43400 switch (GET_MODE_SIZE (mode))
43403 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43406 return ix86_cost->int_store[0];
43407 if (TARGET_PARTIAL_REG_DEPENDENCY
43408 && optimize_function_for_speed_p (cfun))
43409 cost = ix86_cost->movzbl_load;
43411 cost = ix86_cost->int_load[0];
43413 return MAX (cost, ix86_cost->int_store[0]);
43419 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43421 return ix86_cost->movzbl_load;
43423 return ix86_cost->int_store[0] + 4;
43428 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43429 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43431 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43432 if (mode == TFmode)
43435 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43437 cost = ix86_cost->int_load[2];
43439 cost = ix86_cost->int_store[2];
43440 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43445 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43448 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43452 /* Return the cost of moving data from a register in class CLASS1 to
43453 one in class CLASS2.
43455 It is not required that the cost always equal 2 when FROM is the same as TO;
43456 on some machines it is expensive to move between registers if they are not
43457 general registers. */
43460 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43461 reg_class_t class2_i)
43463 enum reg_class class1 = (enum reg_class) class1_i;
43464 enum reg_class class2 = (enum reg_class) class2_i;
43466 /* In case we require secondary memory, compute cost of the store followed
43467 by load. In order to avoid bad register allocation choices, we need
43468 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43470 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43474 cost += inline_memory_move_cost (mode, class1, 2);
43475 cost += inline_memory_move_cost (mode, class2, 2);
43477 /* In case of copying from general_purpose_register we may emit multiple
43478 stores followed by single load causing memory size mismatch stall.
43479 Count this as arbitrarily high cost of 20. */
43480 if (targetm.class_max_nregs (class1, mode)
43481 > targetm.class_max_nregs (class2, mode))
43484 /* In the case of FP/MMX moves, the registers actually overlap, and we
43485 have to switch modes in order to treat them differently. */
43486 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43487 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43493 /* Moves between SSE/MMX and integer unit are expensive. */
43494 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43495 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43497 /* ??? By keeping returned value relatively high, we limit the number
43498 of moves between integer and MMX/SSE registers for all targets.
43499 Additionally, high value prevents problem with x86_modes_tieable_p(),
43500 where integer modes in MMX/SSE registers are not tieable
43501 because of missing QImode and HImode moves to, from or between
43502 MMX/SSE registers. */
43503 return MAX (8, ix86_cost->mmxsse_to_integer);
43505 if (MAYBE_FLOAT_CLASS_P (class1))
43506 return ix86_cost->fp_move;
43507 if (MAYBE_SSE_CLASS_P (class1))
43508 return ix86_cost->sse_move;
43509 if (MAYBE_MMX_CLASS_P (class1))
43510 return ix86_cost->mmx_move;
43514 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43518 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43520 /* Flags and only flags can only hold CCmode values. */
43521 if (CC_REGNO_P (regno))
43522 return GET_MODE_CLASS (mode) == MODE_CC;
43523 if (GET_MODE_CLASS (mode) == MODE_CC
43524 || GET_MODE_CLASS (mode) == MODE_RANDOM
43525 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43527 if (STACK_REGNO_P (regno))
43528 return VALID_FP_MODE_P (mode);
43529 if (MASK_REGNO_P (regno))
43530 return (VALID_MASK_REG_MODE (mode)
43531 || (TARGET_AVX512BW
43532 && VALID_MASK_AVX512BW_MODE (mode)));
43533 if (BND_REGNO_P (regno))
43534 return VALID_BND_REG_MODE (mode);
43535 if (SSE_REGNO_P (regno))
43537 /* We implement the move patterns for all vector modes into and
43538 out of SSE registers, even when no operation instructions
43541 /* For AVX-512 we allow, regardless of regno:
43543 - any of 512-bit wide vector mode
43544 - any scalar mode. */
43547 || VALID_AVX512F_REG_MODE (mode)
43548 || VALID_AVX512F_SCALAR_MODE (mode)))
43551 /* TODO check for QI/HI scalars. */
43552 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43553 if (TARGET_AVX512VL
43556 || VALID_AVX256_REG_MODE (mode)
43557 || VALID_AVX512VL_128_REG_MODE (mode)))
43560 /* xmm16-xmm31 are only available for AVX-512. */
43561 if (EXT_REX_SSE_REGNO_P (regno))
43564 /* OImode and AVX modes are available only when AVX is enabled. */
43565 return ((TARGET_AVX
43566 && VALID_AVX256_REG_OR_OI_MODE (mode))
43567 || VALID_SSE_REG_MODE (mode)
43568 || VALID_SSE2_REG_MODE (mode)
43569 || VALID_MMX_REG_MODE (mode)
43570 || VALID_MMX_REG_MODE_3DNOW (mode));
43572 if (MMX_REGNO_P (regno))
43574 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43575 so if the register is available at all, then we can move data of
43576 the given mode into or out of it. */
43577 return (VALID_MMX_REG_MODE (mode)
43578 || VALID_MMX_REG_MODE_3DNOW (mode));
43581 if (mode == QImode)
43583 /* Take care for QImode values - they can be in non-QI regs,
43584 but then they do cause partial register stalls. */
43585 if (ANY_QI_REGNO_P (regno))
43587 if (!TARGET_PARTIAL_REG_STALL)
43589 /* LRA checks if the hard register is OK for the given mode.
43590 QImode values can live in non-QI regs, so we allow all
43592 if (lra_in_progress)
43594 return !can_create_pseudo_p ();
43596 /* We handle both integer and floats in the general purpose registers. */
43597 else if (VALID_INT_MODE_P (mode))
43599 else if (VALID_FP_MODE_P (mode))
43601 else if (VALID_DFP_MODE_P (mode))
43603 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43604 on to use that value in smaller contexts, this can easily force a
43605 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43606 supporting DImode, allow it. */
43607 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43613 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43614 tieable integer mode. */
43617 ix86_tieable_integer_mode_p (machine_mode mode)
43626 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43629 return TARGET_64BIT;
43636 /* Return true if MODE1 is accessible in a register that can hold MODE2
43637 without copying. That is, all register classes that can hold MODE2
43638 can also hold MODE1. */
43641 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43643 if (mode1 == mode2)
43646 if (ix86_tieable_integer_mode_p (mode1)
43647 && ix86_tieable_integer_mode_p (mode2))
43650 /* MODE2 being XFmode implies fp stack or general regs, which means we
43651 can tie any smaller floating point modes to it. Note that we do not
43652 tie this with TFmode. */
43653 if (mode2 == XFmode)
43654 return mode1 == SFmode || mode1 == DFmode;
43656 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43657 that we can tie it with SFmode. */
43658 if (mode2 == DFmode)
43659 return mode1 == SFmode;
43661 /* If MODE2 is only appropriate for an SSE register, then tie with
43662 any other mode acceptable to SSE registers. */
43663 if (GET_MODE_SIZE (mode2) == 32
43664 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43665 return (GET_MODE_SIZE (mode1) == 32
43666 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43667 if (GET_MODE_SIZE (mode2) == 16
43668 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43669 return (GET_MODE_SIZE (mode1) == 16
43670 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43672 /* If MODE2 is appropriate for an MMX register, then tie
43673 with any other mode acceptable to MMX registers. */
43674 if (GET_MODE_SIZE (mode2) == 8
43675 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43676 return (GET_MODE_SIZE (mode1) == 8
43677 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43682 /* Return the cost of moving between two registers of mode MODE. */
43685 ix86_set_reg_reg_cost (machine_mode mode)
43687 unsigned int units = UNITS_PER_WORD;
43689 switch (GET_MODE_CLASS (mode))
43695 units = GET_MODE_SIZE (CCmode);
43699 if ((TARGET_SSE && mode == TFmode)
43700 || (TARGET_80387 && mode == XFmode)
43701 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43702 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43703 units = GET_MODE_SIZE (mode);
43706 case MODE_COMPLEX_FLOAT:
43707 if ((TARGET_SSE && mode == TCmode)
43708 || (TARGET_80387 && mode == XCmode)
43709 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43710 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43711 units = GET_MODE_SIZE (mode);
43714 case MODE_VECTOR_INT:
43715 case MODE_VECTOR_FLOAT:
43716 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43717 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43718 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43719 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43720 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43721 units = GET_MODE_SIZE (mode);
43724 /* Return the cost of moving between two registers of mode MODE,
43725 assuming that the move will be in pieces of at most UNITS bytes. */
43726 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43729 /* Compute a (partial) cost for rtx X. Return true if the complete
43730 cost has been computed, and false if subexpressions should be
43731 scanned. In either case, *TOTAL contains the cost result. */
43734 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43735 int *total, bool speed)
43738 enum rtx_code code = GET_CODE (x);
43739 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43740 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43745 if (register_operand (SET_DEST (x), VOIDmode)
43746 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43748 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43757 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43759 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43761 else if (flag_pic && SYMBOLIC_CONST (x)
43763 && (GET_CODE (x) == LABEL_REF
43764 || (GET_CODE (x) == SYMBOL_REF
43765 && SYMBOL_REF_LOCAL_P (x))))
43766 /* Use 0 cost for CONST to improve its propagation. */
43767 && (TARGET_64BIT || GET_CODE (x) != CONST))
43773 case CONST_WIDE_INT:
43778 switch (standard_80387_constant_p (x))
43783 default: /* Other constants */
43790 if (SSE_FLOAT_MODE_P (mode))
43793 switch (standard_sse_constant_p (x))
43797 case 1: /* 0: xor eliminates false dependency */
43800 default: /* -1: cmp contains false dependency */
43805 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43806 it'll probably end up. Add a penalty for size. */
43807 *total = (COSTS_N_INSNS (1)
43808 + (flag_pic != 0 && !TARGET_64BIT)
43809 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43813 /* The zero extensions is often completely free on x86_64, so make
43814 it as cheap as possible. */
43815 if (TARGET_64BIT && mode == DImode
43816 && GET_MODE (XEXP (x, 0)) == SImode)
43818 else if (TARGET_ZERO_EXTEND_WITH_AND)
43819 *total = cost->add;
43821 *total = cost->movzx;
43825 *total = cost->movsx;
43829 if (SCALAR_INT_MODE_P (mode)
43830 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43831 && CONST_INT_P (XEXP (x, 1)))
43833 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43836 *total = cost->add;
43839 if ((value == 2 || value == 3)
43840 && cost->lea <= cost->shift_const)
43842 *total = cost->lea;
43852 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43854 /* ??? Should be SSE vector operation cost. */
43855 /* At least for published AMD latencies, this really is the same
43856 as the latency for a simple fpu operation like fabs. */
43857 /* V*QImode is emulated with 1-11 insns. */
43858 if (mode == V16QImode || mode == V32QImode)
43861 if (TARGET_XOP && mode == V16QImode)
43863 /* For XOP we use vpshab, which requires a broadcast of the
43864 value to the variable shift insn. For constants this
43865 means a V16Q const in mem; even when we can perform the
43866 shift with one insn set the cost to prefer paddb. */
43867 if (CONSTANT_P (XEXP (x, 1)))
43869 *total = (cost->fabs
43870 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43871 + (speed ? 2 : COSTS_N_BYTES (16)));
43876 else if (TARGET_SSSE3)
43878 *total = cost->fabs * count;
43881 *total = cost->fabs;
43883 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43885 if (CONST_INT_P (XEXP (x, 1)))
43887 if (INTVAL (XEXP (x, 1)) > 32)
43888 *total = cost->shift_const + COSTS_N_INSNS (2);
43890 *total = cost->shift_const * 2;
43894 if (GET_CODE (XEXP (x, 1)) == AND)
43895 *total = cost->shift_var * 2;
43897 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43902 if (CONST_INT_P (XEXP (x, 1)))
43903 *total = cost->shift_const;
43904 else if (SUBREG_P (XEXP (x, 1))
43905 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43907 /* Return the cost after shift-and truncation. */
43908 *total = cost->shift_var;
43912 *total = cost->shift_var;
43920 gcc_assert (FLOAT_MODE_P (mode));
43921 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43923 /* ??? SSE scalar/vector cost should be used here. */
43924 /* ??? Bald assumption that fma has the same cost as fmul. */
43925 *total = cost->fmul;
43926 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43928 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43930 if (GET_CODE (sub) == NEG)
43931 sub = XEXP (sub, 0);
43932 *total += rtx_cost (sub, mode, FMA, 0, speed);
43935 if (GET_CODE (sub) == NEG)
43936 sub = XEXP (sub, 0);
43937 *total += rtx_cost (sub, mode, FMA, 2, speed);
43942 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43944 /* ??? SSE scalar cost should be used here. */
43945 *total = cost->fmul;
43948 else if (X87_FLOAT_MODE_P (mode))
43950 *total = cost->fmul;
43953 else if (FLOAT_MODE_P (mode))
43955 /* ??? SSE vector cost should be used here. */
43956 *total = cost->fmul;
43959 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43961 /* V*QImode is emulated with 7-13 insns. */
43962 if (mode == V16QImode || mode == V32QImode)
43965 if (TARGET_XOP && mode == V16QImode)
43967 else if (TARGET_SSSE3)
43969 *total = cost->fmul * 2 + cost->fabs * extra;
43971 /* V*DImode is emulated with 5-8 insns. */
43972 else if (mode == V2DImode || mode == V4DImode)
43974 if (TARGET_XOP && mode == V2DImode)
43975 *total = cost->fmul * 2 + cost->fabs * 3;
43977 *total = cost->fmul * 3 + cost->fabs * 5;
43979 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
43980 insns, including two PMULUDQ. */
43981 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
43982 *total = cost->fmul * 2 + cost->fabs * 5;
43984 *total = cost->fmul;
43989 rtx op0 = XEXP (x, 0);
43990 rtx op1 = XEXP (x, 1);
43992 if (CONST_INT_P (XEXP (x, 1)))
43994 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43995 for (nbits = 0; value != 0; value &= value - 1)
43999 /* This is arbitrary. */
44002 /* Compute costs correctly for widening multiplication. */
44003 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
44004 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
44005 == GET_MODE_SIZE (mode))
44007 int is_mulwiden = 0;
44008 machine_mode inner_mode = GET_MODE (op0);
44010 if (GET_CODE (op0) == GET_CODE (op1))
44011 is_mulwiden = 1, op1 = XEXP (op1, 0);
44012 else if (CONST_INT_P (op1))
44014 if (GET_CODE (op0) == SIGN_EXTEND)
44015 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
44018 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
44022 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
44025 *total = (cost->mult_init[MODE_INDEX (mode)]
44026 + nbits * cost->mult_bit
44027 + rtx_cost (op0, mode, outer_code, opno, speed)
44028 + rtx_cost (op1, mode, outer_code, opno, speed));
44037 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44038 /* ??? SSE cost should be used here. */
44039 *total = cost->fdiv;
44040 else if (X87_FLOAT_MODE_P (mode))
44041 *total = cost->fdiv;
44042 else if (FLOAT_MODE_P (mode))
44043 /* ??? SSE vector cost should be used here. */
44044 *total = cost->fdiv;
44046 *total = cost->divide[MODE_INDEX (mode)];
44050 if (GET_MODE_CLASS (mode) == MODE_INT
44051 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
44053 if (GET_CODE (XEXP (x, 0)) == PLUS
44054 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
44055 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
44056 && CONSTANT_P (XEXP (x, 1)))
44058 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
44059 if (val == 2 || val == 4 || val == 8)
44061 *total = cost->lea;
44062 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44063 outer_code, opno, speed);
44064 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
44065 outer_code, opno, speed);
44066 *total += rtx_cost (XEXP (x, 1), mode,
44067 outer_code, opno, speed);
44071 else if (GET_CODE (XEXP (x, 0)) == MULT
44072 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
44074 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
44075 if (val == 2 || val == 4 || val == 8)
44077 *total = cost->lea;
44078 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44079 outer_code, opno, speed);
44080 *total += rtx_cost (XEXP (x, 1), mode,
44081 outer_code, opno, speed);
44085 else if (GET_CODE (XEXP (x, 0)) == PLUS)
44087 *total = cost->lea;
44088 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44089 outer_code, opno, speed);
44090 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44091 outer_code, opno, speed);
44092 *total += rtx_cost (XEXP (x, 1), mode,
44093 outer_code, opno, speed);
44100 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44102 /* ??? SSE cost should be used here. */
44103 *total = cost->fadd;
44106 else if (X87_FLOAT_MODE_P (mode))
44108 *total = cost->fadd;
44111 else if (FLOAT_MODE_P (mode))
44113 /* ??? SSE vector cost should be used here. */
44114 *total = cost->fadd;
44122 if (GET_MODE_CLASS (mode) == MODE_INT
44123 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44125 *total = (cost->add * 2
44126 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
44127 << (GET_MODE (XEXP (x, 0)) != DImode))
44128 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
44129 << (GET_MODE (XEXP (x, 1)) != DImode)));
44135 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44137 /* ??? SSE cost should be used here. */
44138 *total = cost->fchs;
44141 else if (X87_FLOAT_MODE_P (mode))
44143 *total = cost->fchs;
44146 else if (FLOAT_MODE_P (mode))
44148 /* ??? SSE vector cost should be used here. */
44149 *total = cost->fchs;
44155 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44157 /* ??? Should be SSE vector operation cost. */
44158 /* At least for published AMD latencies, this really is the same
44159 as the latency for a simple fpu operation like fabs. */
44160 *total = cost->fabs;
44162 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44163 *total = cost->add * 2;
44165 *total = cost->add;
44169 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
44170 && XEXP (XEXP (x, 0), 1) == const1_rtx
44171 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
44172 && XEXP (x, 1) == const0_rtx)
44174 /* This kind of construct is implemented using test[bwl].
44175 Treat it as if we had an AND. */
44176 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
44177 *total = (cost->add
44178 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
44180 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
44184 /* The embedded comparison operand is completely free. */
44185 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
44186 && XEXP (x, 1) == const0_rtx)
44192 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
44197 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44198 /* ??? SSE cost should be used here. */
44199 *total = cost->fabs;
44200 else if (X87_FLOAT_MODE_P (mode))
44201 *total = cost->fabs;
44202 else if (FLOAT_MODE_P (mode))
44203 /* ??? SSE vector cost should be used here. */
44204 *total = cost->fabs;
44208 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44209 /* ??? SSE cost should be used here. */
44210 *total = cost->fsqrt;
44211 else if (X87_FLOAT_MODE_P (mode))
44212 *total = cost->fsqrt;
44213 else if (FLOAT_MODE_P (mode))
44214 /* ??? SSE vector cost should be used here. */
44215 *total = cost->fsqrt;
44219 if (XINT (x, 1) == UNSPEC_TP)
44225 case VEC_DUPLICATE:
44226 /* ??? Assume all of these vector manipulation patterns are
44227 recognizable. In which case they all pretty much have the
44229 *total = cost->fabs;
44232 mask = XEXP (x, 2);
44233 /* This is masked instruction, assume the same cost,
44234 as nonmasked variant. */
44235 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
44236 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
44238 *total = cost->fabs;
44248 static int current_machopic_label_num;
44250 /* Given a symbol name and its associated stub, write out the
44251 definition of the stub. */
44254 machopic_output_stub (FILE *file, const char *symb, const char *stub)
44256 unsigned int length;
44257 char *binder_name, *symbol_name, lazy_ptr_name[32];
44258 int label = ++current_machopic_label_num;
44260 /* For 64-bit we shouldn't get here. */
44261 gcc_assert (!TARGET_64BIT);
44263 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
44264 symb = targetm.strip_name_encoding (symb);
44266 length = strlen (stub);
44267 binder_name = XALLOCAVEC (char, length + 32);
44268 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
44270 length = strlen (symb);
44271 symbol_name = XALLOCAVEC (char, length + 32);
44272 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
44274 sprintf (lazy_ptr_name, "L%d$lz", label);
44276 if (MACHOPIC_ATT_STUB)
44277 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
44278 else if (MACHOPIC_PURE)
44279 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
44281 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
44283 fprintf (file, "%s:\n", stub);
44284 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44286 if (MACHOPIC_ATT_STUB)
44288 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
44290 else if (MACHOPIC_PURE)
44293 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44294 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
44295 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
44296 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
44297 label, lazy_ptr_name, label);
44298 fprintf (file, "\tjmp\t*%%ecx\n");
44301 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
44303 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
44304 it needs no stub-binding-helper. */
44305 if (MACHOPIC_ATT_STUB)
44308 fprintf (file, "%s:\n", binder_name);
44312 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
44313 fprintf (file, "\tpushl\t%%ecx\n");
44316 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44318 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44320 /* N.B. Keep the correspondence of these
44321 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44322 old-pic/new-pic/non-pic stubs; altering this will break
44323 compatibility with existing dylibs. */
44326 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44327 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44330 /* 16-byte -mdynamic-no-pic stub. */
44331 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44333 fprintf (file, "%s:\n", lazy_ptr_name);
44334 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44335 fprintf (file, ASM_LONG "%s\n", binder_name);
44337 #endif /* TARGET_MACHO */
44339 /* Order the registers for register allocator. */
44342 x86_order_regs_for_local_alloc (void)
44347 /* First allocate the local general purpose registers. */
44348 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44349 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44350 reg_alloc_order [pos++] = i;
44352 /* Global general purpose registers. */
44353 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44354 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44355 reg_alloc_order [pos++] = i;
44357 /* x87 registers come first in case we are doing FP math
44359 if (!TARGET_SSE_MATH)
44360 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44361 reg_alloc_order [pos++] = i;
44363 /* SSE registers. */
44364 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44365 reg_alloc_order [pos++] = i;
44366 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44367 reg_alloc_order [pos++] = i;
44369 /* Extended REX SSE registers. */
44370 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44371 reg_alloc_order [pos++] = i;
44373 /* Mask register. */
44374 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44375 reg_alloc_order [pos++] = i;
44377 /* MPX bound registers. */
44378 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44379 reg_alloc_order [pos++] = i;
44381 /* x87 registers. */
44382 if (TARGET_SSE_MATH)
44383 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44384 reg_alloc_order [pos++] = i;
44386 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44387 reg_alloc_order [pos++] = i;
44389 /* Initialize the rest of array as we do not allocate some registers
44391 while (pos < FIRST_PSEUDO_REGISTER)
44392 reg_alloc_order [pos++] = 0;
44395 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44396 in struct attribute_spec handler. */
44398 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44401 bool *no_add_attrs)
44403 if (TREE_CODE (*node) != FUNCTION_TYPE
44404 && TREE_CODE (*node) != METHOD_TYPE
44405 && TREE_CODE (*node) != FIELD_DECL
44406 && TREE_CODE (*node) != TYPE_DECL)
44408 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44410 *no_add_attrs = true;
44415 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44417 *no_add_attrs = true;
44420 if (is_attribute_p ("callee_pop_aggregate_return", name))
44424 cst = TREE_VALUE (args);
44425 if (TREE_CODE (cst) != INTEGER_CST)
44427 warning (OPT_Wattributes,
44428 "%qE attribute requires an integer constant argument",
44430 *no_add_attrs = true;
44432 else if (compare_tree_int (cst, 0) != 0
44433 && compare_tree_int (cst, 1) != 0)
44435 warning (OPT_Wattributes,
44436 "argument to %qE attribute is neither zero, nor one",
44438 *no_add_attrs = true;
44447 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44448 struct attribute_spec.handler. */
44450 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44451 bool *no_add_attrs)
44453 if (TREE_CODE (*node) != FUNCTION_TYPE
44454 && TREE_CODE (*node) != METHOD_TYPE
44455 && TREE_CODE (*node) != FIELD_DECL
44456 && TREE_CODE (*node) != TYPE_DECL)
44458 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44460 *no_add_attrs = true;
44464 /* Can combine regparm with all attributes but fastcall. */
44465 if (is_attribute_p ("ms_abi", name))
44467 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44469 error ("ms_abi and sysv_abi attributes are not compatible");
44474 else if (is_attribute_p ("sysv_abi", name))
44476 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44478 error ("ms_abi and sysv_abi attributes are not compatible");
44487 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44488 struct attribute_spec.handler. */
44490 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44491 bool *no_add_attrs)
44494 if (DECL_P (*node))
44496 if (TREE_CODE (*node) == TYPE_DECL)
44497 type = &TREE_TYPE (*node);
44502 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44504 warning (OPT_Wattributes, "%qE attribute ignored",
44506 *no_add_attrs = true;
44509 else if ((is_attribute_p ("ms_struct", name)
44510 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44511 || ((is_attribute_p ("gcc_struct", name)
44512 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44514 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44516 *no_add_attrs = true;
44523 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44524 bool *no_add_attrs)
44526 if (TREE_CODE (*node) != FUNCTION_DECL)
44528 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44530 *no_add_attrs = true;
44536 ix86_ms_bitfield_layout_p (const_tree record_type)
44538 return ((TARGET_MS_BITFIELD_LAYOUT
44539 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44540 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44543 /* Returns an expression indicating where the this parameter is
44544 located on entry to the FUNCTION. */
44547 x86_this_parameter (tree function)
44549 tree type = TREE_TYPE (function);
44550 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44555 const int *parm_regs;
44557 if (ix86_function_type_abi (type) == MS_ABI)
44558 parm_regs = x86_64_ms_abi_int_parameter_registers;
44560 parm_regs = x86_64_int_parameter_registers;
44561 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44564 nregs = ix86_function_regparm (type, function);
44566 if (nregs > 0 && !stdarg_p (type))
44569 unsigned int ccvt = ix86_get_callcvt (type);
44571 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44572 regno = aggr ? DX_REG : CX_REG;
44573 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44577 return gen_rtx_MEM (SImode,
44578 plus_constant (Pmode, stack_pointer_rtx, 4));
44587 return gen_rtx_MEM (SImode,
44588 plus_constant (Pmode,
44589 stack_pointer_rtx, 4));
44592 return gen_rtx_REG (SImode, regno);
44595 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44599 /* Determine whether x86_output_mi_thunk can succeed. */
44602 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44603 const_tree function)
44605 /* 64-bit can handle anything. */
44609 /* For 32-bit, everything's fine if we have one free register. */
44610 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44613 /* Need a free register for vcall_offset. */
44617 /* Need a free register for GOT references. */
44618 if (flag_pic && !targetm.binds_local_p (function))
44621 /* Otherwise ok. */
44625 /* Output the assembler code for a thunk function. THUNK_DECL is the
44626 declaration for the thunk function itself, FUNCTION is the decl for
44627 the target function. DELTA is an immediate constant offset to be
44628 added to THIS. If VCALL_OFFSET is nonzero, the word at
44629 *(*this + vcall_offset) should be added to THIS. */
44632 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44633 HOST_WIDE_INT vcall_offset, tree function)
44635 rtx this_param = x86_this_parameter (function);
44636 rtx this_reg, tmp, fnaddr;
44637 unsigned int tmp_regno;
44641 tmp_regno = R10_REG;
44644 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44645 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44646 tmp_regno = AX_REG;
44647 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44648 tmp_regno = DX_REG;
44650 tmp_regno = CX_REG;
44653 emit_note (NOTE_INSN_PROLOGUE_END);
44655 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44656 pull it in now and let DELTA benefit. */
44657 if (REG_P (this_param))
44658 this_reg = this_param;
44659 else if (vcall_offset)
44661 /* Put the this parameter into %eax. */
44662 this_reg = gen_rtx_REG (Pmode, AX_REG);
44663 emit_move_insn (this_reg, this_param);
44666 this_reg = NULL_RTX;
44668 /* Adjust the this parameter by a fixed constant. */
44671 rtx delta_rtx = GEN_INT (delta);
44672 rtx delta_dst = this_reg ? this_reg : this_param;
44676 if (!x86_64_general_operand (delta_rtx, Pmode))
44678 tmp = gen_rtx_REG (Pmode, tmp_regno);
44679 emit_move_insn (tmp, delta_rtx);
44684 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44687 /* Adjust the this parameter by a value stored in the vtable. */
44690 rtx vcall_addr, vcall_mem, this_mem;
44692 tmp = gen_rtx_REG (Pmode, tmp_regno);
44694 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44695 if (Pmode != ptr_mode)
44696 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44697 emit_move_insn (tmp, this_mem);
44699 /* Adjust the this parameter. */
44700 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44702 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44704 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44705 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44706 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44709 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44710 if (Pmode != ptr_mode)
44711 emit_insn (gen_addsi_1_zext (this_reg,
44712 gen_rtx_REG (ptr_mode,
44716 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44719 /* If necessary, drop THIS back to its stack slot. */
44720 if (this_reg && this_reg != this_param)
44721 emit_move_insn (this_param, this_reg);
44723 fnaddr = XEXP (DECL_RTL (function), 0);
44726 if (!flag_pic || targetm.binds_local_p (function)
44731 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44732 tmp = gen_rtx_CONST (Pmode, tmp);
44733 fnaddr = gen_const_mem (Pmode, tmp);
44738 if (!flag_pic || targetm.binds_local_p (function))
44741 else if (TARGET_MACHO)
44743 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44744 fnaddr = XEXP (fnaddr, 0);
44746 #endif /* TARGET_MACHO */
44749 tmp = gen_rtx_REG (Pmode, CX_REG);
44750 output_set_got (tmp, NULL_RTX);
44752 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44753 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44754 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44755 fnaddr = gen_const_mem (Pmode, fnaddr);
44759 /* Our sibling call patterns do not allow memories, because we have no
44760 predicate that can distinguish between frame and non-frame memory.
44761 For our purposes here, we can get away with (ab)using a jump pattern,
44762 because we're going to do no optimization. */
44763 if (MEM_P (fnaddr))
44765 if (sibcall_insn_operand (fnaddr, word_mode))
44767 fnaddr = XEXP (DECL_RTL (function), 0);
44768 tmp = gen_rtx_MEM (QImode, fnaddr);
44769 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44770 tmp = emit_call_insn (tmp);
44771 SIBLING_CALL_P (tmp) = 1;
44774 emit_jump_insn (gen_indirect_jump (fnaddr));
44778 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44780 // CM_LARGE_PIC always uses pseudo PIC register which is
44781 // uninitialized. Since FUNCTION is local and calling it
44782 // doesn't go through PLT, we use scratch register %r11 as
44783 // PIC register and initialize it here.
44784 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44785 ix86_init_large_pic_reg (tmp_regno);
44786 fnaddr = legitimize_pic_address (fnaddr,
44787 gen_rtx_REG (Pmode, tmp_regno));
44790 if (!sibcall_insn_operand (fnaddr, word_mode))
44792 tmp = gen_rtx_REG (word_mode, tmp_regno);
44793 if (GET_MODE (fnaddr) != word_mode)
44794 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44795 emit_move_insn (tmp, fnaddr);
44799 tmp = gen_rtx_MEM (QImode, fnaddr);
44800 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44801 tmp = emit_call_insn (tmp);
44802 SIBLING_CALL_P (tmp) = 1;
44806 /* Emit just enough of rest_of_compilation to get the insns emitted.
44807 Note that use_thunk calls assemble_start_function et al. */
44808 insn = get_insns ();
44809 shorten_branches (insn);
44810 final_start_function (insn, file, 1);
44811 final (insn, file, 1);
44812 final_end_function ();
44816 x86_file_start (void)
44818 default_file_start ();
44820 fputs ("\t.code16gcc\n", asm_out_file);
44822 darwin_file_start ();
44824 if (X86_FILE_START_VERSION_DIRECTIVE)
44825 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44826 if (X86_FILE_START_FLTUSED)
44827 fputs ("\t.global\t__fltused\n", asm_out_file);
44828 if (ix86_asm_dialect == ASM_INTEL)
44829 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44833 x86_field_alignment (tree field, int computed)
44836 tree type = TREE_TYPE (field);
44838 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44841 return iamcu_alignment (type, computed);
44842 mode = TYPE_MODE (strip_array_types (type));
44843 if (mode == DFmode || mode == DCmode
44844 || GET_MODE_CLASS (mode) == MODE_INT
44845 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44846 return MIN (32, computed);
44850 /* Print call to TARGET to FILE. */
44853 x86_print_call_or_nop (FILE *file, const char *target)
44855 if (flag_nop_mcount)
44856 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44858 fprintf (file, "1:\tcall\t%s\n", target);
44861 /* Output assembler code to FILE to increment profiler label # LABELNO
44862 for profiling a function entry. */
44864 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44866 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44870 #ifndef NO_PROFILE_COUNTERS
44871 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44874 if (!TARGET_PECOFF && flag_pic)
44875 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44877 x86_print_call_or_nop (file, mcount_name);
44881 #ifndef NO_PROFILE_COUNTERS
44882 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44885 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44889 #ifndef NO_PROFILE_COUNTERS
44890 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44893 x86_print_call_or_nop (file, mcount_name);
44896 if (flag_record_mcount)
44898 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44899 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44900 fprintf (file, "\t.previous\n");
44904 /* We don't have exact information about the insn sizes, but we may assume
44905 quite safely that we are informed about all 1 byte insns and memory
44906 address sizes. This is enough to eliminate unnecessary padding in
44910 min_insn_size (rtx_insn *insn)
44914 if (!INSN_P (insn) || !active_insn_p (insn))
44917 /* Discard alignments we've emit and jump instructions. */
44918 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44919 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44922 /* Important case - calls are always 5 bytes.
44923 It is common to have many calls in the row. */
44925 && symbolic_reference_mentioned_p (PATTERN (insn))
44926 && !SIBLING_CALL_P (insn))
44928 len = get_attr_length (insn);
44932 /* For normal instructions we rely on get_attr_length being exact,
44933 with a few exceptions. */
44934 if (!JUMP_P (insn))
44936 enum attr_type type = get_attr_type (insn);
44941 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44942 || asm_noperands (PATTERN (insn)) >= 0)
44949 /* Otherwise trust get_attr_length. */
44953 l = get_attr_length_address (insn);
44954 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44963 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44965 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44969 ix86_avoid_jump_mispredicts (void)
44971 rtx_insn *insn, *start = get_insns ();
44972 int nbytes = 0, njumps = 0;
44973 bool isjump = false;
44975 /* Look for all minimal intervals of instructions containing 4 jumps.
44976 The intervals are bounded by START and INSN. NBYTES is the total
44977 size of instructions in the interval including INSN and not including
44978 START. When the NBYTES is smaller than 16 bytes, it is possible
44979 that the end of START and INSN ends up in the same 16byte page.
44981 The smallest offset in the page INSN can start is the case where START
44982 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
44983 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
44985 Don't consider asm goto as jump, while it can contain a jump, it doesn't
44986 have to, control transfer to label(s) can be performed through other
44987 means, and also we estimate minimum length of all asm stmts as 0. */
44988 for (insn = start; insn; insn = NEXT_INSN (insn))
44992 if (LABEL_P (insn))
44994 int align = label_to_alignment (insn);
44995 int max_skip = label_to_max_skip (insn);
44999 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
45000 already in the current 16 byte page, because otherwise
45001 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
45002 bytes to reach 16 byte boundary. */
45004 || (align <= 3 && max_skip != (1 << align) - 1))
45007 fprintf (dump_file, "Label %i with max_skip %i\n",
45008 INSN_UID (insn), max_skip);
45011 while (nbytes + max_skip >= 16)
45013 start = NEXT_INSN (start);
45014 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45016 njumps--, isjump = true;
45019 nbytes -= min_insn_size (start);
45025 min_size = min_insn_size (insn);
45026 nbytes += min_size;
45028 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
45029 INSN_UID (insn), min_size);
45030 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
45038 start = NEXT_INSN (start);
45039 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45041 njumps--, isjump = true;
45044 nbytes -= min_insn_size (start);
45046 gcc_assert (njumps >= 0);
45048 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
45049 INSN_UID (start), INSN_UID (insn), nbytes);
45051 if (njumps == 3 && isjump && nbytes < 16)
45053 int padsize = 15 - nbytes + min_insn_size (insn);
45056 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
45057 INSN_UID (insn), padsize);
45058 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
45064 /* AMD Athlon works faster
45065 when RET is not destination of conditional jump or directly preceded
45066 by other jump instruction. We avoid the penalty by inserting NOP just
45067 before the RET instructions in such cases. */
45069 ix86_pad_returns (void)
45074 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45076 basic_block bb = e->src;
45077 rtx_insn *ret = BB_END (bb);
45079 bool replace = false;
45081 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
45082 || optimize_bb_for_size_p (bb))
45084 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
45085 if (active_insn_p (prev) || LABEL_P (prev))
45087 if (prev && LABEL_P (prev))
45092 FOR_EACH_EDGE (e, ei, bb->preds)
45093 if (EDGE_FREQUENCY (e) && e->src->index >= 0
45094 && !(e->flags & EDGE_FALLTHRU))
45102 prev = prev_active_insn (ret);
45104 && ((JUMP_P (prev) && any_condjump_p (prev))
45107 /* Empty functions get branch mispredict even when
45108 the jump destination is not visible to us. */
45109 if (!prev && !optimize_function_for_size_p (cfun))
45114 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
45120 /* Count the minimum number of instructions in BB. Return 4 if the
45121 number of instructions >= 4. */
45124 ix86_count_insn_bb (basic_block bb)
45127 int insn_count = 0;
45129 /* Count number of instructions in this block. Return 4 if the number
45130 of instructions >= 4. */
45131 FOR_BB_INSNS (bb, insn)
45133 /* Only happen in exit blocks. */
45135 && ANY_RETURN_P (PATTERN (insn)))
45138 if (NONDEBUG_INSN_P (insn)
45139 && GET_CODE (PATTERN (insn)) != USE
45140 && GET_CODE (PATTERN (insn)) != CLOBBER)
45143 if (insn_count >= 4)
45152 /* Count the minimum number of instructions in code path in BB.
45153 Return 4 if the number of instructions >= 4. */
45156 ix86_count_insn (basic_block bb)
45160 int min_prev_count;
45162 /* Only bother counting instructions along paths with no
45163 more than 2 basic blocks between entry and exit. Given
45164 that BB has an edge to exit, determine if a predecessor
45165 of BB has an edge from entry. If so, compute the number
45166 of instructions in the predecessor block. If there
45167 happen to be multiple such blocks, compute the minimum. */
45168 min_prev_count = 4;
45169 FOR_EACH_EDGE (e, ei, bb->preds)
45172 edge_iterator prev_ei;
45174 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45176 min_prev_count = 0;
45179 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
45181 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45183 int count = ix86_count_insn_bb (e->src);
45184 if (count < min_prev_count)
45185 min_prev_count = count;
45191 if (min_prev_count < 4)
45192 min_prev_count += ix86_count_insn_bb (bb);
45194 return min_prev_count;
45197 /* Pad short function to 4 instructions. */
45200 ix86_pad_short_function (void)
45205 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45207 rtx_insn *ret = BB_END (e->src);
45208 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
45210 int insn_count = ix86_count_insn (e->src);
45212 /* Pad short function. */
45213 if (insn_count < 4)
45215 rtx_insn *insn = ret;
45217 /* Find epilogue. */
45220 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
45221 insn = PREV_INSN (insn);
45226 /* Two NOPs count as one instruction. */
45227 insn_count = 2 * (4 - insn_count);
45228 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
45234 /* Fix up a Windows system unwinder issue. If an EH region falls through into
45235 the epilogue, the Windows system unwinder will apply epilogue logic and
45236 produce incorrect offsets. This can be avoided by adding a nop between
45237 the last insn that can throw and the first insn of the epilogue. */
45240 ix86_seh_fixup_eh_fallthru (void)
45245 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45247 rtx_insn *insn, *next;
45249 /* Find the beginning of the epilogue. */
45250 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
45251 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
45256 /* We only care about preceding insns that can throw. */
45257 insn = prev_active_insn (insn);
45258 if (insn == NULL || !can_throw_internal (insn))
45261 /* Do not separate calls from their debug information. */
45262 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
45264 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
45265 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
45270 emit_insn_after (gen_nops (const1_rtx), insn);
45274 /* Given a register number BASE, the lowest of a group of registers, update
45275 regsets IN and OUT with the registers that should be avoided in input
45276 and output operands respectively when trying to avoid generating a modr/m
45277 byte for -fmitigate-rop. */
45280 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
45282 SET_HARD_REG_BIT (out, base);
45283 SET_HARD_REG_BIT (out, base + 1);
45284 SET_HARD_REG_BIT (in, base + 2);
45285 SET_HARD_REG_BIT (in, base + 3);
45288 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
45289 that certain encodings of modr/m bytes do not occur. */
45291 ix86_mitigate_rop (void)
45293 HARD_REG_SET input_risky;
45294 HARD_REG_SET output_risky;
45295 HARD_REG_SET inout_risky;
45297 CLEAR_HARD_REG_SET (output_risky);
45298 CLEAR_HARD_REG_SET (input_risky);
45299 SET_HARD_REG_BIT (output_risky, AX_REG);
45300 SET_HARD_REG_BIT (output_risky, CX_REG);
45301 SET_HARD_REG_BIT (input_risky, BX_REG);
45302 SET_HARD_REG_BIT (input_risky, DX_REG);
45303 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
45304 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
45305 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
45306 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
45307 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
45308 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
45309 COPY_HARD_REG_SET (inout_risky, input_risky);
45310 IOR_HARD_REG_SET (inout_risky, output_risky);
45312 df_note_add_problem ();
45313 /* Fix up what stack-regs did. */
45314 df_insn_rescan_all ();
45317 regrename_init (true);
45318 regrename_analyze (NULL);
45320 auto_vec<du_head_p> cands;
45322 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
45324 if (!NONDEBUG_INSN_P (insn))
45327 if (GET_CODE (PATTERN (insn)) == USE
45328 || GET_CODE (PATTERN (insn)) == CLOBBER)
45331 extract_insn (insn);
45334 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45335 recog_data.n_operands, &opno0,
45338 if (!ix86_rop_should_change_byte_p (modrm))
45341 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
45343 /* This happens when regrename has to fail a block. */
45344 if (!info->op_info)
45347 if (info->op_info[opno0].n_chains != 0)
45349 gcc_assert (info->op_info[opno0].n_chains == 1);
45351 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
45352 if (op0c->target_data_1 + op0c->target_data_2 == 0
45353 && !op0c->cannot_rename)
45354 cands.safe_push (op0c);
45356 op0c->target_data_1++;
45358 if (info->op_info[opno1].n_chains != 0)
45360 gcc_assert (info->op_info[opno1].n_chains == 1);
45362 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
45363 if (op1c->target_data_1 + op1c->target_data_2 == 0
45364 && !op1c->cannot_rename)
45365 cands.safe_push (op1c);
45367 op1c->target_data_2++;
45373 FOR_EACH_VEC_ELT (cands, i, head)
45375 int old_reg, best_reg;
45376 HARD_REG_SET unavailable;
45378 CLEAR_HARD_REG_SET (unavailable);
45379 if (head->target_data_1)
45380 IOR_HARD_REG_SET (unavailable, output_risky);
45381 if (head->target_data_2)
45382 IOR_HARD_REG_SET (unavailable, input_risky);
45385 reg_class superclass = regrename_find_superclass (head, &n_uses,
45387 old_reg = head->regno;
45388 best_reg = find_rename_reg (head, superclass, &unavailable,
45390 bool ok = regrename_do_replace (head, best_reg);
45393 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
45394 reg_names[best_reg], reg_class_names[superclass]);
45398 regrename_finish ();
45405 INIT_REG_SET (&live);
45407 FOR_EACH_BB_FN (bb, cfun)
45411 COPY_REG_SET (&live, DF_LR_OUT (bb));
45412 df_simulate_initialize_backwards (bb, &live);
45414 FOR_BB_INSNS_REVERSE (bb, insn)
45416 if (!NONDEBUG_INSN_P (insn))
45419 df_simulate_one_insn_backwards (bb, insn, &live);
45421 if (GET_CODE (PATTERN (insn)) == USE
45422 || GET_CODE (PATTERN (insn)) == CLOBBER)
45425 extract_insn (insn);
45426 constrain_operands_cached (insn, reload_completed);
45428 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45429 recog_data.n_operands, &opno0,
45432 || !ix86_rop_should_change_byte_p (modrm)
45436 rtx oldreg = recog_data.operand[opno1];
45437 preprocess_constraints (insn);
45438 const operand_alternative *alt = which_op_alt ();
45441 for (i = 0; i < recog_data.n_operands; i++)
45443 && alt[i].earlyclobber
45444 && reg_overlap_mentioned_p (recog_data.operand[i],
45448 if (i < recog_data.n_operands)
45452 fprintf (dump_file,
45453 "attempting to fix modrm byte in insn %d:"
45454 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
45455 reg_class_names[alt[opno1].cl]);
45457 HARD_REG_SET unavailable;
45458 REG_SET_TO_HARD_REG_SET (unavailable, &live);
45459 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
45460 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
45461 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
45462 IOR_HARD_REG_SET (unavailable, output_risky);
45463 IOR_COMPL_HARD_REG_SET (unavailable,
45464 reg_class_contents[alt[opno1].cl]);
45466 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45467 if (!TEST_HARD_REG_BIT (unavailable, i))
45469 if (i == FIRST_PSEUDO_REGISTER)
45472 fprintf (dump_file, ", none available\n");
45476 fprintf (dump_file, " -> %d\n", i);
45477 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
45478 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
45479 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
45484 /* Implement machine specific optimizations. We implement padding of returns
45485 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
45489 /* We are freeing block_for_insn in the toplev to keep compatibility
45490 with old MDEP_REORGS that are not CFG based. Recompute it now. */
45491 compute_bb_for_insn ();
45493 if (flag_mitigate_rop)
45494 ix86_mitigate_rop ();
45496 if (TARGET_SEH && current_function_has_exception_handlers ())
45497 ix86_seh_fixup_eh_fallthru ();
45499 if (optimize && optimize_function_for_speed_p (cfun))
45501 if (TARGET_PAD_SHORT_FUNCTION)
45502 ix86_pad_short_function ();
45503 else if (TARGET_PAD_RETURNS)
45504 ix86_pad_returns ();
45505 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45506 if (TARGET_FOUR_JUMP_LIMIT)
45507 ix86_avoid_jump_mispredicts ();
45512 /* Return nonzero when QImode register that must be represented via REX prefix
45515 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
45518 extract_insn_cached (insn);
45519 for (i = 0; i < recog_data.n_operands; i++)
45520 if (GENERAL_REG_P (recog_data.operand[i])
45521 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
45526 /* Return true when INSN mentions register that must be encoded using REX
45529 x86_extended_reg_mentioned_p (rtx insn)
45531 subrtx_iterator::array_type array;
45532 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45534 const_rtx x = *iter;
45536 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45542 /* If profitable, negate (without causing overflow) integer constant
45543 of mode MODE at location LOC. Return true in this case. */
45545 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45549 if (!CONST_INT_P (*loc))
45555 /* DImode x86_64 constants must fit in 32 bits. */
45556 gcc_assert (x86_64_immediate_operand (*loc, mode));
45567 gcc_unreachable ();
45570 /* Avoid overflows. */
45571 if (mode_signbit_p (mode, *loc))
45574 val = INTVAL (*loc);
45576 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45577 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45578 if ((val < 0 && val != -128)
45581 *loc = GEN_INT (-val);
45588 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45589 optabs would emit if we didn't have TFmode patterns. */
45592 x86_emit_floatuns (rtx operands[2])
45594 rtx_code_label *neglab, *donelab;
45595 rtx i0, i1, f0, in, out;
45596 machine_mode mode, inmode;
45598 inmode = GET_MODE (operands[1]);
45599 gcc_assert (inmode == SImode || inmode == DImode);
45602 in = force_reg (inmode, operands[1]);
45603 mode = GET_MODE (out);
45604 neglab = gen_label_rtx ();
45605 donelab = gen_label_rtx ();
45606 f0 = gen_reg_rtx (mode);
45608 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45610 expand_float (out, in, 0);
45612 emit_jump_insn (gen_jump (donelab));
45615 emit_label (neglab);
45617 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45619 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45621 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45623 expand_float (f0, i0, 0);
45625 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45627 emit_label (donelab);
45630 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45631 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45632 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45633 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45635 /* Get a vector mode of the same size as the original but with elements
45636 twice as wide. This is only guaranteed to apply to integral vectors. */
45638 static inline machine_mode
45639 get_mode_wider_vector (machine_mode o)
45641 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45642 machine_mode n = GET_MODE_WIDER_MODE (o);
45643 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45644 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45648 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45649 fill target with val via vec_duplicate. */
45652 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45658 /* First attempt to recognize VAL as-is. */
45659 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45660 insn = emit_insn (gen_rtx_SET (target, dup));
45661 if (recog_memoized (insn) < 0)
45664 /* If that fails, force VAL into a register. */
45667 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45668 seq = get_insns ();
45671 emit_insn_before (seq, insn);
45673 ok = recog_memoized (insn) >= 0;
45679 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45680 with all elements equal to VAR. Return true if successful. */
45683 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45684 rtx target, rtx val)
45708 return ix86_vector_duplicate_value (mode, target, val);
45713 if (TARGET_SSE || TARGET_3DNOW_A)
45717 val = gen_lowpart (SImode, val);
45718 x = gen_rtx_TRUNCATE (HImode, val);
45719 x = gen_rtx_VEC_DUPLICATE (mode, x);
45720 emit_insn (gen_rtx_SET (target, x));
45732 return ix86_vector_duplicate_value (mode, target, val);
45736 struct expand_vec_perm_d dperm;
45740 memset (&dperm, 0, sizeof (dperm));
45741 dperm.target = target;
45742 dperm.vmode = mode;
45743 dperm.nelt = GET_MODE_NUNITS (mode);
45744 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45745 dperm.one_operand_p = true;
45747 /* Extend to SImode using a paradoxical SUBREG. */
45748 tmp1 = gen_reg_rtx (SImode);
45749 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45751 /* Insert the SImode value as low element of a V4SImode vector. */
45752 tmp2 = gen_reg_rtx (V4SImode);
45753 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45754 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45756 ok = (expand_vec_perm_1 (&dperm)
45757 || expand_vec_perm_broadcast_1 (&dperm));
45765 return ix86_vector_duplicate_value (mode, target, val);
45772 /* Replicate the value once into the next wider mode and recurse. */
45774 machine_mode smode, wsmode, wvmode;
45777 smode = GET_MODE_INNER (mode);
45778 wvmode = get_mode_wider_vector (mode);
45779 wsmode = GET_MODE_INNER (wvmode);
45781 val = convert_modes (wsmode, smode, val, true);
45782 x = expand_simple_binop (wsmode, ASHIFT, val,
45783 GEN_INT (GET_MODE_BITSIZE (smode)),
45784 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45785 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45787 x = gen_reg_rtx (wvmode);
45788 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45790 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45797 return ix86_vector_duplicate_value (mode, target, val);
45800 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45801 rtx x = gen_reg_rtx (hvmode);
45803 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45806 x = gen_rtx_VEC_CONCAT (mode, x, x);
45807 emit_insn (gen_rtx_SET (target, x));
45813 if (TARGET_AVX512BW)
45814 return ix86_vector_duplicate_value (mode, target, val);
45817 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45818 rtx x = gen_reg_rtx (hvmode);
45820 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45823 x = gen_rtx_VEC_CONCAT (mode, x, x);
45824 emit_insn (gen_rtx_SET (target, x));
45833 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45834 whose ONE_VAR element is VAR, and other elements are zero. Return true
45838 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45839 rtx target, rtx var, int one_var)
45841 machine_mode vsimode;
45844 bool use_vector_set = false;
45849 /* For SSE4.1, we normally use vector set. But if the second
45850 element is zero and inter-unit moves are OK, we use movq
45852 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45853 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45859 use_vector_set = TARGET_SSE4_1;
45862 use_vector_set = TARGET_SSE2;
45865 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45872 use_vector_set = TARGET_AVX;
45875 /* Use ix86_expand_vector_set in 64bit mode only. */
45876 use_vector_set = TARGET_AVX && TARGET_64BIT;
45882 if (use_vector_set)
45884 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45885 var = force_reg (GET_MODE_INNER (mode), var);
45886 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45902 var = force_reg (GET_MODE_INNER (mode), var);
45903 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45904 emit_insn (gen_rtx_SET (target, x));
45909 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45910 new_target = gen_reg_rtx (mode);
45912 new_target = target;
45913 var = force_reg (GET_MODE_INNER (mode), var);
45914 x = gen_rtx_VEC_DUPLICATE (mode, var);
45915 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45916 emit_insn (gen_rtx_SET (new_target, x));
45919 /* We need to shuffle the value to the correct position, so
45920 create a new pseudo to store the intermediate result. */
45922 /* With SSE2, we can use the integer shuffle insns. */
45923 if (mode != V4SFmode && TARGET_SSE2)
45925 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45927 GEN_INT (one_var == 1 ? 0 : 1),
45928 GEN_INT (one_var == 2 ? 0 : 1),
45929 GEN_INT (one_var == 3 ? 0 : 1)));
45930 if (target != new_target)
45931 emit_move_insn (target, new_target);
45935 /* Otherwise convert the intermediate result to V4SFmode and
45936 use the SSE1 shuffle instructions. */
45937 if (mode != V4SFmode)
45939 tmp = gen_reg_rtx (V4SFmode);
45940 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45945 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45947 GEN_INT (one_var == 1 ? 0 : 1),
45948 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45949 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45951 if (mode != V4SFmode)
45952 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45953 else if (tmp != target)
45954 emit_move_insn (target, tmp);
45956 else if (target != new_target)
45957 emit_move_insn (target, new_target);
45962 vsimode = V4SImode;
45968 vsimode = V2SImode;
45974 /* Zero extend the variable element to SImode and recurse. */
45975 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
45977 x = gen_reg_rtx (vsimode);
45978 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
45980 gcc_unreachable ();
45982 emit_move_insn (target, gen_lowpart (mode, x));
45990 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45991 consisting of the values in VALS. It is known that all elements
45992 except ONE_VAR are constants. Return true if successful. */
45995 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
45996 rtx target, rtx vals, int one_var)
45998 rtx var = XVECEXP (vals, 0, one_var);
45999 machine_mode wmode;
46002 const_vec = copy_rtx (vals);
46003 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
46004 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
46012 /* For the two element vectors, it's just as easy to use
46013 the general case. */
46017 /* Use ix86_expand_vector_set in 64bit mode only. */
46040 /* There's no way to set one QImode entry easily. Combine
46041 the variable value with its adjacent constant value, and
46042 promote to an HImode set. */
46043 x = XVECEXP (vals, 0, one_var ^ 1);
46046 var = convert_modes (HImode, QImode, var, true);
46047 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
46048 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46049 x = GEN_INT (INTVAL (x) & 0xff);
46053 var = convert_modes (HImode, QImode, var, true);
46054 x = gen_int_mode (INTVAL (x) << 8, HImode);
46056 if (x != const0_rtx)
46057 var = expand_simple_binop (HImode, IOR, var, x, var,
46058 1, OPTAB_LIB_WIDEN);
46060 x = gen_reg_rtx (wmode);
46061 emit_move_insn (x, gen_lowpart (wmode, const_vec));
46062 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
46064 emit_move_insn (target, gen_lowpart (mode, x));
46071 emit_move_insn (target, const_vec);
46072 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46076 /* A subroutine of ix86_expand_vector_init_general. Use vector
46077 concatenate to handle the most general case: all values variable,
46078 and none identical. */
46081 ix86_expand_vector_init_concat (machine_mode mode,
46082 rtx target, rtx *ops, int n)
46084 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
46085 rtx first[16], second[8], third[4];
46137 gcc_unreachable ();
46140 if (!register_operand (ops[1], cmode))
46141 ops[1] = force_reg (cmode, ops[1]);
46142 if (!register_operand (ops[0], cmode))
46143 ops[0] = force_reg (cmode, ops[0]);
46144 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
46164 gcc_unreachable ();
46188 gcc_unreachable ();
46206 gcc_unreachable ();
46211 /* FIXME: We process inputs backward to help RA. PR 36222. */
46214 for (; i > 0; i -= 2, j--)
46216 first[j] = gen_reg_rtx (cmode);
46217 v = gen_rtvec (2, ops[i - 1], ops[i]);
46218 ix86_expand_vector_init (false, first[j],
46219 gen_rtx_PARALLEL (cmode, v));
46225 gcc_assert (hmode != VOIDmode);
46226 gcc_assert (gmode != VOIDmode);
46227 for (i = j = 0; i < n; i += 2, j++)
46229 second[j] = gen_reg_rtx (hmode);
46230 ix86_expand_vector_init_concat (hmode, second [j],
46234 for (i = j = 0; i < n; i += 2, j++)
46236 third[j] = gen_reg_rtx (gmode);
46237 ix86_expand_vector_init_concat (gmode, third[j],
46241 ix86_expand_vector_init_concat (mode, target, third, n);
46245 gcc_assert (hmode != VOIDmode);
46246 for (i = j = 0; i < n; i += 2, j++)
46248 second[j] = gen_reg_rtx (hmode);
46249 ix86_expand_vector_init_concat (hmode, second [j],
46253 ix86_expand_vector_init_concat (mode, target, second, n);
46256 ix86_expand_vector_init_concat (mode, target, first, n);
46260 gcc_unreachable ();
46264 /* A subroutine of ix86_expand_vector_init_general. Use vector
46265 interleave to handle the most general case: all values variable,
46266 and none identical. */
46269 ix86_expand_vector_init_interleave (machine_mode mode,
46270 rtx target, rtx *ops, int n)
46272 machine_mode first_imode, second_imode, third_imode, inner_mode;
46275 rtx (*gen_load_even) (rtx, rtx, rtx);
46276 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
46277 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
46282 gen_load_even = gen_vec_setv8hi;
46283 gen_interleave_first_low = gen_vec_interleave_lowv4si;
46284 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46285 inner_mode = HImode;
46286 first_imode = V4SImode;
46287 second_imode = V2DImode;
46288 third_imode = VOIDmode;
46291 gen_load_even = gen_vec_setv16qi;
46292 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
46293 gen_interleave_second_low = gen_vec_interleave_lowv4si;
46294 inner_mode = QImode;
46295 first_imode = V8HImode;
46296 second_imode = V4SImode;
46297 third_imode = V2DImode;
46300 gcc_unreachable ();
46303 for (i = 0; i < n; i++)
46305 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
46306 op0 = gen_reg_rtx (SImode);
46307 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
46309 /* Insert the SImode value as low element of V4SImode vector. */
46310 op1 = gen_reg_rtx (V4SImode);
46311 op0 = gen_rtx_VEC_MERGE (V4SImode,
46312 gen_rtx_VEC_DUPLICATE (V4SImode,
46314 CONST0_RTX (V4SImode),
46316 emit_insn (gen_rtx_SET (op1, op0));
46318 /* Cast the V4SImode vector back to a vector in orignal mode. */
46319 op0 = gen_reg_rtx (mode);
46320 emit_move_insn (op0, gen_lowpart (mode, op1));
46322 /* Load even elements into the second position. */
46323 emit_insn (gen_load_even (op0,
46324 force_reg (inner_mode,
46328 /* Cast vector to FIRST_IMODE vector. */
46329 ops[i] = gen_reg_rtx (first_imode);
46330 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
46333 /* Interleave low FIRST_IMODE vectors. */
46334 for (i = j = 0; i < n; i += 2, j++)
46336 op0 = gen_reg_rtx (first_imode);
46337 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
46339 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
46340 ops[j] = gen_reg_rtx (second_imode);
46341 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
46344 /* Interleave low SECOND_IMODE vectors. */
46345 switch (second_imode)
46348 for (i = j = 0; i < n / 2; i += 2, j++)
46350 op0 = gen_reg_rtx (second_imode);
46351 emit_insn (gen_interleave_second_low (op0, ops[i],
46354 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
46356 ops[j] = gen_reg_rtx (third_imode);
46357 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
46359 second_imode = V2DImode;
46360 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46364 op0 = gen_reg_rtx (second_imode);
46365 emit_insn (gen_interleave_second_low (op0, ops[0],
46368 /* Cast the SECOND_IMODE vector back to a vector on original
46370 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
46374 gcc_unreachable ();
46378 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
46379 all values variable, and none identical. */
46382 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
46383 rtx target, rtx vals)
46385 rtx ops[64], op0, op1, op2, op3, op4, op5;
46386 machine_mode half_mode = VOIDmode;
46387 machine_mode quarter_mode = VOIDmode;
46394 if (!mmx_ok && !TARGET_SSE)
46410 n = GET_MODE_NUNITS (mode);
46411 for (i = 0; i < n; i++)
46412 ops[i] = XVECEXP (vals, 0, i);
46413 ix86_expand_vector_init_concat (mode, target, ops, n);
46417 half_mode = V16QImode;
46421 half_mode = V8HImode;
46425 n = GET_MODE_NUNITS (mode);
46426 for (i = 0; i < n; i++)
46427 ops[i] = XVECEXP (vals, 0, i);
46428 op0 = gen_reg_rtx (half_mode);
46429 op1 = gen_reg_rtx (half_mode);
46430 ix86_expand_vector_init_interleave (half_mode, op0, ops,
46432 ix86_expand_vector_init_interleave (half_mode, op1,
46433 &ops [n >> 1], n >> 2);
46434 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
46438 quarter_mode = V16QImode;
46439 half_mode = V32QImode;
46443 quarter_mode = V8HImode;
46444 half_mode = V16HImode;
46448 n = GET_MODE_NUNITS (mode);
46449 for (i = 0; i < n; i++)
46450 ops[i] = XVECEXP (vals, 0, i);
46451 op0 = gen_reg_rtx (quarter_mode);
46452 op1 = gen_reg_rtx (quarter_mode);
46453 op2 = gen_reg_rtx (quarter_mode);
46454 op3 = gen_reg_rtx (quarter_mode);
46455 op4 = gen_reg_rtx (half_mode);
46456 op5 = gen_reg_rtx (half_mode);
46457 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
46459 ix86_expand_vector_init_interleave (quarter_mode, op1,
46460 &ops [n >> 2], n >> 3);
46461 ix86_expand_vector_init_interleave (quarter_mode, op2,
46462 &ops [n >> 1], n >> 3);
46463 ix86_expand_vector_init_interleave (quarter_mode, op3,
46464 &ops [(n >> 1) | (n >> 2)], n >> 3);
46465 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
46466 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
46467 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
46471 if (!TARGET_SSE4_1)
46479 /* Don't use ix86_expand_vector_init_interleave if we can't
46480 move from GPR to SSE register directly. */
46481 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
46484 n = GET_MODE_NUNITS (mode);
46485 for (i = 0; i < n; i++)
46486 ops[i] = XVECEXP (vals, 0, i);
46487 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
46495 gcc_unreachable ();
46499 int i, j, n_elts, n_words, n_elt_per_word;
46500 machine_mode inner_mode;
46501 rtx words[4], shift;
46503 inner_mode = GET_MODE_INNER (mode);
46504 n_elts = GET_MODE_NUNITS (mode);
46505 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46506 n_elt_per_word = n_elts / n_words;
46507 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
46509 for (i = 0; i < n_words; ++i)
46511 rtx word = NULL_RTX;
46513 for (j = 0; j < n_elt_per_word; ++j)
46515 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
46516 elt = convert_modes (word_mode, inner_mode, elt, true);
46522 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
46523 word, 1, OPTAB_LIB_WIDEN);
46524 word = expand_simple_binop (word_mode, IOR, word, elt,
46525 word, 1, OPTAB_LIB_WIDEN);
46533 emit_move_insn (target, gen_lowpart (mode, words[0]));
46534 else if (n_words == 2)
46536 rtx tmp = gen_reg_rtx (mode);
46537 emit_clobber (tmp);
46538 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46539 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46540 emit_move_insn (target, tmp);
46542 else if (n_words == 4)
46544 rtx tmp = gen_reg_rtx (V4SImode);
46545 gcc_assert (word_mode == SImode);
46546 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46547 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46548 emit_move_insn (target, gen_lowpart (mode, tmp));
46551 gcc_unreachable ();
46555 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46556 instructions unless MMX_OK is true. */
46559 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46561 machine_mode mode = GET_MODE (target);
46562 machine_mode inner_mode = GET_MODE_INNER (mode);
46563 int n_elts = GET_MODE_NUNITS (mode);
46564 int n_var = 0, one_var = -1;
46565 bool all_same = true, all_const_zero = true;
46569 for (i = 0; i < n_elts; ++i)
46571 x = XVECEXP (vals, 0, i);
46572 if (!(CONST_SCALAR_INT_P (x)
46573 || CONST_DOUBLE_P (x)
46574 || CONST_FIXED_P (x)))
46575 n_var++, one_var = i;
46576 else if (x != CONST0_RTX (inner_mode))
46577 all_const_zero = false;
46578 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46582 /* Constants are best loaded from the constant pool. */
46585 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46589 /* If all values are identical, broadcast the value. */
46591 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46592 XVECEXP (vals, 0, 0)))
46595 /* Values where only one field is non-constant are best loaded from
46596 the pool and overwritten via move later. */
46600 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46601 XVECEXP (vals, 0, one_var),
46605 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46609 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46613 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46615 machine_mode mode = GET_MODE (target);
46616 machine_mode inner_mode = GET_MODE_INNER (mode);
46617 machine_mode half_mode;
46618 bool use_vec_merge = false;
46620 static rtx (*gen_extract[6][2]) (rtx, rtx)
46622 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46623 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46624 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46625 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46626 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46627 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46629 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46631 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46632 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46633 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46634 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46635 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46636 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46639 machine_mode mmode = VOIDmode;
46640 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46648 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46649 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46651 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46653 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46654 emit_insn (gen_rtx_SET (target, tmp));
46660 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46664 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46665 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46667 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46669 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46670 emit_insn (gen_rtx_SET (target, tmp));
46677 /* For the two element vectors, we implement a VEC_CONCAT with
46678 the extraction of the other element. */
46680 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46681 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46684 op0 = val, op1 = tmp;
46686 op0 = tmp, op1 = val;
46688 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46689 emit_insn (gen_rtx_SET (target, tmp));
46694 use_vec_merge = TARGET_SSE4_1;
46701 use_vec_merge = true;
46705 /* tmp = target = A B C D */
46706 tmp = copy_to_reg (target);
46707 /* target = A A B B */
46708 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46709 /* target = X A B B */
46710 ix86_expand_vector_set (false, target, val, 0);
46711 /* target = A X C D */
46712 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46713 const1_rtx, const0_rtx,
46714 GEN_INT (2+4), GEN_INT (3+4)));
46718 /* tmp = target = A B C D */
46719 tmp = copy_to_reg (target);
46720 /* tmp = X B C D */
46721 ix86_expand_vector_set (false, tmp, val, 0);
46722 /* target = A B X D */
46723 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46724 const0_rtx, const1_rtx,
46725 GEN_INT (0+4), GEN_INT (3+4)));
46729 /* tmp = target = A B C D */
46730 tmp = copy_to_reg (target);
46731 /* tmp = X B C D */
46732 ix86_expand_vector_set (false, tmp, val, 0);
46733 /* target = A B X D */
46734 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46735 const0_rtx, const1_rtx,
46736 GEN_INT (2+4), GEN_INT (0+4)));
46740 gcc_unreachable ();
46745 use_vec_merge = TARGET_SSE4_1;
46749 /* Element 0 handled by vec_merge below. */
46752 use_vec_merge = true;
46758 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46759 store into element 0, then shuffle them back. */
46763 order[0] = GEN_INT (elt);
46764 order[1] = const1_rtx;
46765 order[2] = const2_rtx;
46766 order[3] = GEN_INT (3);
46767 order[elt] = const0_rtx;
46769 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46770 order[1], order[2], order[3]));
46772 ix86_expand_vector_set (false, target, val, 0);
46774 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46775 order[1], order[2], order[3]));
46779 /* For SSE1, we have to reuse the V4SF code. */
46780 rtx t = gen_reg_rtx (V4SFmode);
46781 emit_move_insn (t, gen_lowpart (V4SFmode, target));
46782 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46783 emit_move_insn (target, gen_lowpart (mode, t));
46788 use_vec_merge = TARGET_SSE2;
46791 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46795 use_vec_merge = TARGET_SSE4_1;
46802 half_mode = V16QImode;
46808 half_mode = V8HImode;
46814 half_mode = V4SImode;
46820 half_mode = V2DImode;
46826 half_mode = V4SFmode;
46832 half_mode = V2DFmode;
46838 /* Compute offset. */
46842 gcc_assert (i <= 1);
46844 /* Extract the half. */
46845 tmp = gen_reg_rtx (half_mode);
46846 emit_insn (gen_extract[j][i] (tmp, target));
46848 /* Put val in tmp at elt. */
46849 ix86_expand_vector_set (false, tmp, val, elt);
46852 emit_insn (gen_insert[j][i] (target, target, tmp));
46856 if (TARGET_AVX512F)
46859 gen_blendm = gen_avx512f_blendmv8df;
46864 if (TARGET_AVX512F)
46867 gen_blendm = gen_avx512f_blendmv8di;
46872 if (TARGET_AVX512F)
46875 gen_blendm = gen_avx512f_blendmv16sf;
46880 if (TARGET_AVX512F)
46883 gen_blendm = gen_avx512f_blendmv16si;
46888 if (TARGET_AVX512F && TARGET_AVX512BW)
46891 gen_blendm = gen_avx512bw_blendmv32hi;
46896 if (TARGET_AVX512F && TARGET_AVX512BW)
46899 gen_blendm = gen_avx512bw_blendmv64qi;
46907 if (mmode != VOIDmode)
46909 tmp = gen_reg_rtx (mode);
46910 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46911 emit_insn (gen_blendm (target, tmp, target,
46913 gen_int_mode (1 << elt, mmode))));
46915 else if (use_vec_merge)
46917 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46918 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46919 emit_insn (gen_rtx_SET (target, tmp));
46923 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46925 emit_move_insn (mem, target);
46927 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46928 emit_move_insn (tmp, val);
46930 emit_move_insn (target, mem);
46935 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46937 machine_mode mode = GET_MODE (vec);
46938 machine_mode inner_mode = GET_MODE_INNER (mode);
46939 bool use_vec_extr = false;
46952 use_vec_extr = true;
46956 use_vec_extr = TARGET_SSE4_1;
46968 tmp = gen_reg_rtx (mode);
46969 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46970 GEN_INT (elt), GEN_INT (elt),
46971 GEN_INT (elt+4), GEN_INT (elt+4)));
46975 tmp = gen_reg_rtx (mode);
46976 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
46980 gcc_unreachable ();
46983 use_vec_extr = true;
46988 use_vec_extr = TARGET_SSE4_1;
47002 tmp = gen_reg_rtx (mode);
47003 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
47004 GEN_INT (elt), GEN_INT (elt),
47005 GEN_INT (elt), GEN_INT (elt)));
47009 tmp = gen_reg_rtx (mode);
47010 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
47014 gcc_unreachable ();
47017 use_vec_extr = true;
47022 /* For SSE1, we have to reuse the V4SF code. */
47023 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
47024 gen_lowpart (V4SFmode, vec), elt);
47030 use_vec_extr = TARGET_SSE2;
47033 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47037 use_vec_extr = TARGET_SSE4_1;
47043 tmp = gen_reg_rtx (V4SFmode);
47045 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
47047 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
47048 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47056 tmp = gen_reg_rtx (V2DFmode);
47058 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
47060 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
47061 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47069 tmp = gen_reg_rtx (V16QImode);
47071 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
47073 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
47074 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47082 tmp = gen_reg_rtx (V8HImode);
47084 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
47086 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
47087 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47095 tmp = gen_reg_rtx (V4SImode);
47097 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
47099 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
47100 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47108 tmp = gen_reg_rtx (V2DImode);
47110 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
47112 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
47113 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47119 if (TARGET_AVX512BW)
47121 tmp = gen_reg_rtx (V16HImode);
47123 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
47125 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
47126 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47132 if (TARGET_AVX512BW)
47134 tmp = gen_reg_rtx (V32QImode);
47136 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
47138 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
47139 ix86_expand_vector_extract (false, target, tmp, elt & 31);
47145 tmp = gen_reg_rtx (V8SFmode);
47147 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
47149 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
47150 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47154 tmp = gen_reg_rtx (V4DFmode);
47156 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
47158 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
47159 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47163 tmp = gen_reg_rtx (V8SImode);
47165 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
47167 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
47168 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47172 tmp = gen_reg_rtx (V4DImode);
47174 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
47176 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
47177 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47181 /* ??? Could extract the appropriate HImode element and shift. */
47188 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
47189 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
47191 /* Let the rtl optimizers know about the zero extension performed. */
47192 if (inner_mode == QImode || inner_mode == HImode)
47194 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
47195 target = gen_lowpart (SImode, target);
47198 emit_insn (gen_rtx_SET (target, tmp));
47202 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47204 emit_move_insn (mem, vec);
47206 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47207 emit_move_insn (target, tmp);
47211 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
47212 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
47213 The upper bits of DEST are undefined, though they shouldn't cause
47214 exceptions (some bits from src or all zeros are ok). */
47217 emit_reduc_half (rtx dest, rtx src, int i)
47220 switch (GET_MODE (src))
47224 tem = gen_sse_movhlps (dest, src, src);
47226 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
47227 GEN_INT (1 + 4), GEN_INT (1 + 4));
47230 tem = gen_vec_interleave_highv2df (dest, src, src);
47236 d = gen_reg_rtx (V1TImode);
47237 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
47242 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
47244 tem = gen_avx_shufps256 (dest, src, src,
47245 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
47249 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
47251 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
47259 if (GET_MODE (dest) != V4DImode)
47260 d = gen_reg_rtx (V4DImode);
47261 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
47262 gen_lowpart (V4DImode, src),
47267 d = gen_reg_rtx (V2TImode);
47268 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
47279 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
47280 gen_lowpart (V16SImode, src),
47281 gen_lowpart (V16SImode, src),
47282 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
47283 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
47284 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
47285 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
47286 GEN_INT (0xC), GEN_INT (0xD),
47287 GEN_INT (0xE), GEN_INT (0xF),
47288 GEN_INT (0x10), GEN_INT (0x11),
47289 GEN_INT (0x12), GEN_INT (0x13),
47290 GEN_INT (0x14), GEN_INT (0x15),
47291 GEN_INT (0x16), GEN_INT (0x17));
47293 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
47294 gen_lowpart (V16SImode, src),
47295 GEN_INT (i == 128 ? 0x2 : 0x1),
47299 GEN_INT (i == 128 ? 0x6 : 0x5),
47303 GEN_INT (i == 128 ? 0xA : 0x9),
47307 GEN_INT (i == 128 ? 0xE : 0xD),
47313 gcc_unreachable ();
47317 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
47320 /* Expand a vector reduction. FN is the binary pattern to reduce;
47321 DEST is the destination; IN is the input vector. */
47324 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
47326 rtx half, dst, vec = in;
47327 machine_mode mode = GET_MODE (in);
47330 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
47332 && mode == V8HImode
47333 && fn == gen_uminv8hi3)
47335 emit_insn (gen_sse4_1_phminposuw (dest, in));
47339 for (i = GET_MODE_BITSIZE (mode);
47340 i > GET_MODE_UNIT_BITSIZE (mode);
47343 half = gen_reg_rtx (mode);
47344 emit_reduc_half (half, vec, i);
47345 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
47348 dst = gen_reg_rtx (mode);
47349 emit_insn (fn (dst, half, vec));
47354 /* Target hook for scalar_mode_supported_p. */
47356 ix86_scalar_mode_supported_p (machine_mode mode)
47358 if (DECIMAL_FLOAT_MODE_P (mode))
47359 return default_decimal_float_supported_p ();
47360 else if (mode == TFmode)
47363 return default_scalar_mode_supported_p (mode);
47366 /* Implements target hook vector_mode_supported_p. */
47368 ix86_vector_mode_supported_p (machine_mode mode)
47370 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
47372 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
47374 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
47376 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
47378 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
47380 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
47385 /* Implement target hook libgcc_floating_mode_supported_p. */
47387 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
47397 #ifdef IX86_NO_LIBGCC_TFMODE
47399 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
47400 return TARGET_LONG_DOUBLE_128;
47410 /* Target hook for c_mode_for_suffix. */
47411 static machine_mode
47412 ix86_c_mode_for_suffix (char suffix)
47422 /* Worker function for TARGET_MD_ASM_ADJUST.
47424 We implement asm flag outputs, and maintain source compatibility
47425 with the old cc0-based compiler. */
47428 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
47429 vec<const char *> &constraints,
47430 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
47432 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
47433 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
47435 bool saw_asm_flag = false;
47438 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
47440 const char *con = constraints[i];
47441 if (strncmp (con, "=@cc", 4) != 0)
47444 if (strchr (con, ',') != NULL)
47446 error ("alternatives not allowed in asm flag output");
47450 bool invert = false;
47452 invert = true, con++;
47454 machine_mode mode = CCmode;
47455 rtx_code code = UNKNOWN;
47461 mode = CCAmode, code = EQ;
47462 else if (con[1] == 'e' && con[2] == 0)
47463 mode = CCCmode, code = NE;
47467 mode = CCCmode, code = EQ;
47468 else if (con[1] == 'e' && con[2] == 0)
47469 mode = CCAmode, code = NE;
47473 mode = CCCmode, code = EQ;
47477 mode = CCZmode, code = EQ;
47481 mode = CCGCmode, code = GT;
47482 else if (con[1] == 'e' && con[2] == 0)
47483 mode = CCGCmode, code = GE;
47487 mode = CCGCmode, code = LT;
47488 else if (con[1] == 'e' && con[2] == 0)
47489 mode = CCGCmode, code = LE;
47493 mode = CCOmode, code = EQ;
47497 mode = CCPmode, code = EQ;
47501 mode = CCSmode, code = EQ;
47505 mode = CCZmode, code = EQ;
47508 if (code == UNKNOWN)
47510 error ("unknown asm flag output %qs", constraints[i]);
47514 code = reverse_condition (code);
47516 rtx dest = outputs[i];
47519 /* This is the first asm flag output. Here we put the flags
47520 register in as the real output and adjust the condition to
47522 constraints[i] = "=Bf";
47523 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
47524 saw_asm_flag = true;
47528 /* We don't need the flags register as output twice. */
47529 constraints[i] = "=X";
47530 outputs[i] = gen_rtx_SCRATCH (SImode);
47533 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47534 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47536 machine_mode dest_mode = GET_MODE (dest);
47537 if (!SCALAR_INT_MODE_P (dest_mode))
47539 error ("invalid type for asm flag output");
47543 if (dest_mode == DImode && !TARGET_64BIT)
47544 dest_mode = SImode;
47546 if (dest_mode != QImode)
47548 rtx destqi = gen_reg_rtx (QImode);
47549 emit_insn (gen_rtx_SET (destqi, x));
47551 if (TARGET_ZERO_EXTEND_WITH_AND
47552 && optimize_function_for_speed_p (cfun))
47554 x = force_reg (dest_mode, const0_rtx);
47556 emit_insn (gen_movstrictqi
47557 (gen_lowpart (QImode, x), destqi));
47560 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47563 if (dest_mode != GET_MODE (dest))
47565 rtx tmp = gen_reg_rtx (SImode);
47567 emit_insn (gen_rtx_SET (tmp, x));
47568 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47571 emit_insn (gen_rtx_SET (dest, x));
47573 rtx_insn *seq = get_insns ();
47580 /* If we had no asm flag outputs, clobber the flags. */
47581 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47582 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47587 /* Implements target vector targetm.asm.encode_section_info. */
47589 static void ATTRIBUTE_UNUSED
47590 ix86_encode_section_info (tree decl, rtx rtl, int first)
47592 default_encode_section_info (decl, rtl, first);
47594 if (ix86_in_large_data_p (decl))
47595 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47598 /* Worker function for REVERSE_CONDITION. */
47601 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47603 return (mode != CCFPmode && mode != CCFPUmode
47604 ? reverse_condition (code)
47605 : reverse_condition_maybe_unordered (code));
47608 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47612 output_387_reg_move (rtx insn, rtx *operands)
47614 if (REG_P (operands[0]))
47616 if (REG_P (operands[1])
47617 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47619 if (REGNO (operands[0]) == FIRST_STACK_REG)
47620 return output_387_ffreep (operands, 0);
47621 return "fstp\t%y0";
47623 if (STACK_TOP_P (operands[0]))
47624 return "fld%Z1\t%y1";
47627 else if (MEM_P (operands[0]))
47629 gcc_assert (REG_P (operands[1]));
47630 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47631 return "fstp%Z0\t%y0";
47634 /* There is no non-popping store to memory for XFmode.
47635 So if we need one, follow the store with a load. */
47636 if (GET_MODE (operands[0]) == XFmode)
47637 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47639 return "fst%Z0\t%y0";
47646 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47647 FP status register is set. */
47650 ix86_emit_fp_unordered_jump (rtx label)
47652 rtx reg = gen_reg_rtx (HImode);
47655 emit_insn (gen_x86_fnstsw_1 (reg));
47657 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47659 emit_insn (gen_x86_sahf_1 (reg));
47661 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47662 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47666 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47668 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47669 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47672 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47673 gen_rtx_LABEL_REF (VOIDmode, label),
47675 temp = gen_rtx_SET (pc_rtx, temp);
47677 emit_jump_insn (temp);
47678 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47681 /* Output code to perform a log1p XFmode calculation. */
47683 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47685 rtx_code_label *label1 = gen_label_rtx ();
47686 rtx_code_label *label2 = gen_label_rtx ();
47688 rtx tmp = gen_reg_rtx (XFmode);
47689 rtx tmp2 = gen_reg_rtx (XFmode);
47692 emit_insn (gen_absxf2 (tmp, op1));
47693 test = gen_rtx_GE (VOIDmode, tmp,
47694 const_double_from_real_value (
47695 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47697 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47699 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47700 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47701 emit_jump (label2);
47703 emit_label (label1);
47704 emit_move_insn (tmp, CONST1_RTX (XFmode));
47705 emit_insn (gen_addxf3 (tmp, op1, tmp));
47706 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47707 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47709 emit_label (label2);
47712 /* Emit code for round calculation. */
47713 void ix86_emit_i387_round (rtx op0, rtx op1)
47715 machine_mode inmode = GET_MODE (op1);
47716 machine_mode outmode = GET_MODE (op0);
47717 rtx e1, e2, res, tmp, tmp1, half;
47718 rtx scratch = gen_reg_rtx (HImode);
47719 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47720 rtx_code_label *jump_label = gen_label_rtx ();
47722 rtx (*gen_abs) (rtx, rtx);
47723 rtx (*gen_neg) (rtx, rtx);
47728 gen_abs = gen_abssf2;
47731 gen_abs = gen_absdf2;
47734 gen_abs = gen_absxf2;
47737 gcc_unreachable ();
47743 gen_neg = gen_negsf2;
47746 gen_neg = gen_negdf2;
47749 gen_neg = gen_negxf2;
47752 gen_neg = gen_neghi2;
47755 gen_neg = gen_negsi2;
47758 gen_neg = gen_negdi2;
47761 gcc_unreachable ();
47764 e1 = gen_reg_rtx (inmode);
47765 e2 = gen_reg_rtx (inmode);
47766 res = gen_reg_rtx (outmode);
47768 half = const_double_from_real_value (dconsthalf, inmode);
47770 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47772 /* scratch = fxam(op1) */
47773 emit_insn (gen_rtx_SET (scratch,
47774 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47776 /* e1 = fabs(op1) */
47777 emit_insn (gen_abs (e1, op1));
47779 /* e2 = e1 + 0.5 */
47780 half = force_reg (inmode, half);
47781 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47783 /* res = floor(e2) */
47784 if (inmode != XFmode)
47786 tmp1 = gen_reg_rtx (XFmode);
47788 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47798 rtx tmp0 = gen_reg_rtx (XFmode);
47800 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47802 emit_insn (gen_rtx_SET (res,
47803 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47804 UNSPEC_TRUNC_NOOP)));
47808 emit_insn (gen_frndintxf2_floor (res, tmp1));
47811 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47814 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47817 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47820 gcc_unreachable ();
47823 /* flags = signbit(a) */
47824 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47826 /* if (flags) then res = -res */
47827 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47828 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47829 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47831 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47832 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47833 JUMP_LABEL (insn) = jump_label;
47835 emit_insn (gen_neg (res, res));
47837 emit_label (jump_label);
47838 LABEL_NUSES (jump_label) = 1;
47840 emit_move_insn (op0, res);
47843 /* Output code to perform a Newton-Rhapson approximation of a single precision
47844 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47846 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47848 rtx x0, x1, e0, e1;
47850 x0 = gen_reg_rtx (mode);
47851 e0 = gen_reg_rtx (mode);
47852 e1 = gen_reg_rtx (mode);
47853 x1 = gen_reg_rtx (mode);
47855 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47857 b = force_reg (mode, b);
47859 /* x0 = rcp(b) estimate */
47860 if (mode == V16SFmode || mode == V8DFmode)
47861 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47864 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47868 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47871 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47874 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47877 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47880 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47883 /* Output code to perform a Newton-Rhapson approximation of a
47884 single precision floating point [reciprocal] square root. */
47886 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
47888 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47892 x0 = gen_reg_rtx (mode);
47893 e0 = gen_reg_rtx (mode);
47894 e1 = gen_reg_rtx (mode);
47895 e2 = gen_reg_rtx (mode);
47896 e3 = gen_reg_rtx (mode);
47898 real_from_integer (&r, VOIDmode, -3, SIGNED);
47899 mthree = const_double_from_real_value (r, SFmode);
47901 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47902 mhalf = const_double_from_real_value (r, SFmode);
47903 unspec = UNSPEC_RSQRT;
47905 if (VECTOR_MODE_P (mode))
47907 mthree = ix86_build_const_vector (mode, true, mthree);
47908 mhalf = ix86_build_const_vector (mode, true, mhalf);
47909 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47910 if (GET_MODE_SIZE (mode) == 64)
47911 unspec = UNSPEC_RSQRT14;
47914 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47915 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47917 a = force_reg (mode, a);
47919 /* x0 = rsqrt(a) estimate */
47920 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47923 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47926 rtx zero = force_reg (mode, CONST0_RTX(mode));
47929 /* Handle masked compare. */
47930 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47932 mask = gen_reg_rtx (HImode);
47933 /* Imm value 0x4 corresponds to not-equal comparison. */
47934 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47935 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47939 mask = gen_reg_rtx (mode);
47940 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47941 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47946 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47948 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47951 mthree = force_reg (mode, mthree);
47952 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47954 mhalf = force_reg (mode, mhalf);
47956 /* e3 = -.5 * x0 */
47957 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47959 /* e3 = -.5 * e0 */
47960 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47961 /* ret = e2 * e3 */
47962 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47965 #ifdef TARGET_SOLARIS
47966 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47969 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47972 /* With Binutils 2.15, the "@unwind" marker must be specified on
47973 every occurrence of the ".eh_frame" section, not just the first
47976 && strcmp (name, ".eh_frame") == 0)
47978 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
47979 flags & SECTION_WRITE ? "aw" : "a");
47984 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
47986 solaris_elf_asm_comdat_section (name, flags, decl);
47991 default_elf_asm_named_section (name, flags, decl);
47993 #endif /* TARGET_SOLARIS */
47995 /* Return the mangling of TYPE if it is an extended fundamental type. */
47997 static const char *
47998 ix86_mangle_type (const_tree type)
48000 type = TYPE_MAIN_VARIANT (type);
48002 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
48003 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
48006 switch (TYPE_MODE (type))
48009 /* __float128 is "g". */
48012 /* "long double" or __float80 is "e". */
48019 /* For 32-bit code we can save PIC register setup by using
48020 __stack_chk_fail_local hidden function instead of calling
48021 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
48022 register, so it is better to call __stack_chk_fail directly. */
48024 static tree ATTRIBUTE_UNUSED
48025 ix86_stack_protect_fail (void)
48027 return TARGET_64BIT
48028 ? default_external_stack_protect_fail ()
48029 : default_hidden_stack_protect_fail ();
48032 /* Select a format to encode pointers in exception handling data. CODE
48033 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
48034 true if the symbol may be affected by dynamic relocations.
48036 ??? All x86 object file formats are capable of representing this.
48037 After all, the relocation needed is the same as for the call insn.
48038 Whether or not a particular assembler allows us to enter such, I
48039 guess we'll have to see. */
48041 asm_preferred_eh_data_format (int code, int global)
48045 int type = DW_EH_PE_sdata8;
48047 || ix86_cmodel == CM_SMALL_PIC
48048 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
48049 type = DW_EH_PE_sdata4;
48050 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
48052 if (ix86_cmodel == CM_SMALL
48053 || (ix86_cmodel == CM_MEDIUM && code))
48054 return DW_EH_PE_udata4;
48055 return DW_EH_PE_absptr;
48058 /* Expand copysign from SIGN to the positive value ABS_VALUE
48059 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
48062 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
48064 machine_mode mode = GET_MODE (sign);
48065 rtx sgn = gen_reg_rtx (mode);
48066 if (mask == NULL_RTX)
48068 machine_mode vmode;
48070 if (mode == SFmode)
48072 else if (mode == DFmode)
48077 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
48078 if (!VECTOR_MODE_P (mode))
48080 /* We need to generate a scalar mode mask in this case. */
48081 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48082 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48083 mask = gen_reg_rtx (mode);
48084 emit_insn (gen_rtx_SET (mask, tmp));
48088 mask = gen_rtx_NOT (mode, mask);
48089 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
48090 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
48093 /* Expand fabs (OP0) and return a new rtx that holds the result. The
48094 mask for masking out the sign-bit is stored in *SMASK, if that is
48097 ix86_expand_sse_fabs (rtx op0, rtx *smask)
48099 machine_mode vmode, mode = GET_MODE (op0);
48102 xa = gen_reg_rtx (mode);
48103 if (mode == SFmode)
48105 else if (mode == DFmode)
48109 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
48110 if (!VECTOR_MODE_P (mode))
48112 /* We need to generate a scalar mode mask in this case. */
48113 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48114 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48115 mask = gen_reg_rtx (mode);
48116 emit_insn (gen_rtx_SET (mask, tmp));
48118 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
48126 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
48127 swapping the operands if SWAP_OPERANDS is true. The expanded
48128 code is a forward jump to a newly created label in case the
48129 comparison is true. The generated label rtx is returned. */
48130 static rtx_code_label *
48131 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
48132 bool swap_operands)
48134 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
48135 rtx_code_label *label;
48139 std::swap (op0, op1);
48141 label = gen_label_rtx ();
48142 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
48143 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
48144 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
48145 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
48146 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
48147 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48148 JUMP_LABEL (tmp) = label;
48153 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
48154 using comparison code CODE. Operands are swapped for the comparison if
48155 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
48157 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
48158 bool swap_operands)
48160 rtx (*insn)(rtx, rtx, rtx, rtx);
48161 machine_mode mode = GET_MODE (op0);
48162 rtx mask = gen_reg_rtx (mode);
48165 std::swap (op0, op1);
48167 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
48169 emit_insn (insn (mask, op0, op1,
48170 gen_rtx_fmt_ee (code, mode, op0, op1)));
48174 /* Generate and return a rtx of mode MODE for 2**n where n is the number
48175 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
48177 ix86_gen_TWO52 (machine_mode mode)
48179 REAL_VALUE_TYPE TWO52r;
48182 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
48183 TWO52 = const_double_from_real_value (TWO52r, mode);
48184 TWO52 = force_reg (mode, TWO52);
48189 /* Expand SSE sequence for computing lround from OP1 storing
48192 ix86_expand_lround (rtx op0, rtx op1)
48194 /* C code for the stuff we're doing below:
48195 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
48198 machine_mode mode = GET_MODE (op1);
48199 const struct real_format *fmt;
48200 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48203 /* load nextafter (0.5, 0.0) */
48204 fmt = REAL_MODE_FORMAT (mode);
48205 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48206 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48208 /* adj = copysign (0.5, op1) */
48209 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
48210 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
48212 /* adj = op1 + adj */
48213 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
48215 /* op0 = (imode)adj */
48216 expand_fix (op0, adj, 0);
48219 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
48222 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
48224 /* C code for the stuff we're doing below (for do_floor):
48226 xi -= (double)xi > op1 ? 1 : 0;
48229 machine_mode fmode = GET_MODE (op1);
48230 machine_mode imode = GET_MODE (op0);
48231 rtx ireg, freg, tmp;
48232 rtx_code_label *label;
48234 /* reg = (long)op1 */
48235 ireg = gen_reg_rtx (imode);
48236 expand_fix (ireg, op1, 0);
48238 /* freg = (double)reg */
48239 freg = gen_reg_rtx (fmode);
48240 expand_float (freg, ireg, 0);
48242 /* ireg = (freg > op1) ? ireg - 1 : ireg */
48243 label = ix86_expand_sse_compare_and_jump (UNLE,
48244 freg, op1, !do_floor);
48245 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
48246 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
48247 emit_move_insn (ireg, tmp);
48249 emit_label (label);
48250 LABEL_NUSES (label) = 1;
48252 emit_move_insn (op0, ireg);
48255 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
48256 result in OPERAND0. */
48258 ix86_expand_rint (rtx operand0, rtx operand1)
48260 /* C code for the stuff we're doing below:
48261 xa = fabs (operand1);
48262 if (!isless (xa, 2**52))
48264 xa = xa + 2**52 - 2**52;
48265 return copysign (xa, operand1);
48267 machine_mode mode = GET_MODE (operand0);
48268 rtx res, xa, TWO52, mask;
48269 rtx_code_label *label;
48271 res = gen_reg_rtx (mode);
48272 emit_move_insn (res, operand1);
48274 /* xa = abs (operand1) */
48275 xa = ix86_expand_sse_fabs (res, &mask);
48277 /* if (!isless (xa, TWO52)) goto label; */
48278 TWO52 = ix86_gen_TWO52 (mode);
48279 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48281 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48282 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48284 ix86_sse_copysign_to_positive (res, xa, res, mask);
48286 emit_label (label);
48287 LABEL_NUSES (label) = 1;
48289 emit_move_insn (operand0, res);
48292 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48295 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
48297 /* C code for the stuff we expand below.
48298 double xa = fabs (x), x2;
48299 if (!isless (xa, TWO52))
48301 xa = xa + TWO52 - TWO52;
48302 x2 = copysign (xa, x);
48311 machine_mode mode = GET_MODE (operand0);
48312 rtx xa, TWO52, tmp, one, res, mask;
48313 rtx_code_label *label;
48315 TWO52 = ix86_gen_TWO52 (mode);
48317 /* Temporary for holding the result, initialized to the input
48318 operand to ease control flow. */
48319 res = gen_reg_rtx (mode);
48320 emit_move_insn (res, operand1);
48322 /* xa = abs (operand1) */
48323 xa = ix86_expand_sse_fabs (res, &mask);
48325 /* if (!isless (xa, TWO52)) goto label; */
48326 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48328 /* xa = xa + TWO52 - TWO52; */
48329 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48330 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48332 /* xa = copysign (xa, operand1) */
48333 ix86_sse_copysign_to_positive (xa, xa, res, mask);
48335 /* generate 1.0 or -1.0 */
48336 one = force_reg (mode,
48337 const_double_from_real_value (do_floor
48338 ? dconst1 : dconstm1, mode));
48340 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48341 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48342 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48343 /* We always need to subtract here to preserve signed zero. */
48344 tmp = expand_simple_binop (mode, MINUS,
48345 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48346 emit_move_insn (res, tmp);
48348 emit_label (label);
48349 LABEL_NUSES (label) = 1;
48351 emit_move_insn (operand0, res);
48354 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48357 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
48359 /* C code for the stuff we expand below.
48360 double xa = fabs (x), x2;
48361 if (!isless (xa, TWO52))
48363 x2 = (double)(long)x;
48370 if (HONOR_SIGNED_ZEROS (mode))
48371 return copysign (x2, x);
48374 machine_mode mode = GET_MODE (operand0);
48375 rtx xa, xi, TWO52, tmp, one, res, mask;
48376 rtx_code_label *label;
48378 TWO52 = ix86_gen_TWO52 (mode);
48380 /* Temporary for holding the result, initialized to the input
48381 operand to ease control flow. */
48382 res = gen_reg_rtx (mode);
48383 emit_move_insn (res, operand1);
48385 /* xa = abs (operand1) */
48386 xa = ix86_expand_sse_fabs (res, &mask);
48388 /* if (!isless (xa, TWO52)) goto label; */
48389 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48391 /* xa = (double)(long)x */
48392 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48393 expand_fix (xi, res, 0);
48394 expand_float (xa, xi, 0);
48397 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48399 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48400 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48401 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48402 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
48403 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48404 emit_move_insn (res, tmp);
48406 if (HONOR_SIGNED_ZEROS (mode))
48407 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48409 emit_label (label);
48410 LABEL_NUSES (label) = 1;
48412 emit_move_insn (operand0, res);
48415 /* Expand SSE sequence for computing round from OPERAND1 storing
48416 into OPERAND0. Sequence that works without relying on DImode truncation
48417 via cvttsd2siq that is only available on 64bit targets. */
48419 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
48421 /* C code for the stuff we expand below.
48422 double xa = fabs (x), xa2, x2;
48423 if (!isless (xa, TWO52))
48425 Using the absolute value and copying back sign makes
48426 -0.0 -> -0.0 correct.
48427 xa2 = xa + TWO52 - TWO52;
48432 else if (dxa > 0.5)
48434 x2 = copysign (xa2, x);
48437 machine_mode mode = GET_MODE (operand0);
48438 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
48439 rtx_code_label *label;
48441 TWO52 = ix86_gen_TWO52 (mode);
48443 /* Temporary for holding the result, initialized to the input
48444 operand to ease control flow. */
48445 res = gen_reg_rtx (mode);
48446 emit_move_insn (res, operand1);
48448 /* xa = abs (operand1) */
48449 xa = ix86_expand_sse_fabs (res, &mask);
48451 /* if (!isless (xa, TWO52)) goto label; */
48452 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48454 /* xa2 = xa + TWO52 - TWO52; */
48455 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48456 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
48458 /* dxa = xa2 - xa; */
48459 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
48461 /* generate 0.5, 1.0 and -0.5 */
48462 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
48463 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
48464 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
48468 tmp = gen_reg_rtx (mode);
48469 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
48470 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
48471 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48472 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48473 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
48474 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
48475 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48476 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48478 /* res = copysign (xa2, operand1) */
48479 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
48481 emit_label (label);
48482 LABEL_NUSES (label) = 1;
48484 emit_move_insn (operand0, res);
48487 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48490 ix86_expand_trunc (rtx operand0, rtx operand1)
48492 /* C code for SSE variant we expand below.
48493 double xa = fabs (x), x2;
48494 if (!isless (xa, TWO52))
48496 x2 = (double)(long)x;
48497 if (HONOR_SIGNED_ZEROS (mode))
48498 return copysign (x2, x);
48501 machine_mode mode = GET_MODE (operand0);
48502 rtx xa, xi, TWO52, res, mask;
48503 rtx_code_label *label;
48505 TWO52 = ix86_gen_TWO52 (mode);
48507 /* Temporary for holding the result, initialized to the input
48508 operand to ease control flow. */
48509 res = gen_reg_rtx (mode);
48510 emit_move_insn (res, operand1);
48512 /* xa = abs (operand1) */
48513 xa = ix86_expand_sse_fabs (res, &mask);
48515 /* if (!isless (xa, TWO52)) goto label; */
48516 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48518 /* x = (double)(long)x */
48519 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48520 expand_fix (xi, res, 0);
48521 expand_float (res, xi, 0);
48523 if (HONOR_SIGNED_ZEROS (mode))
48524 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48526 emit_label (label);
48527 LABEL_NUSES (label) = 1;
48529 emit_move_insn (operand0, res);
48532 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48535 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48537 machine_mode mode = GET_MODE (operand0);
48538 rtx xa, mask, TWO52, one, res, smask, tmp;
48539 rtx_code_label *label;
48541 /* C code for SSE variant we expand below.
48542 double xa = fabs (x), x2;
48543 if (!isless (xa, TWO52))
48545 xa2 = xa + TWO52 - TWO52;
48549 x2 = copysign (xa2, x);
48553 TWO52 = ix86_gen_TWO52 (mode);
48555 /* Temporary for holding the result, initialized to the input
48556 operand to ease control flow. */
48557 res = gen_reg_rtx (mode);
48558 emit_move_insn (res, operand1);
48560 /* xa = abs (operand1) */
48561 xa = ix86_expand_sse_fabs (res, &smask);
48563 /* if (!isless (xa, TWO52)) goto label; */
48564 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48566 /* res = xa + TWO52 - TWO52; */
48567 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48568 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48569 emit_move_insn (res, tmp);
48572 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48574 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48575 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48576 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48577 tmp = expand_simple_binop (mode, MINUS,
48578 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48579 emit_move_insn (res, tmp);
48581 /* res = copysign (res, operand1) */
48582 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48584 emit_label (label);
48585 LABEL_NUSES (label) = 1;
48587 emit_move_insn (operand0, res);
48590 /* Expand SSE sequence for computing round from OPERAND1 storing
48593 ix86_expand_round (rtx operand0, rtx operand1)
48595 /* C code for the stuff we're doing below:
48596 double xa = fabs (x);
48597 if (!isless (xa, TWO52))
48599 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48600 return copysign (xa, x);
48602 machine_mode mode = GET_MODE (operand0);
48603 rtx res, TWO52, xa, xi, half, mask;
48604 rtx_code_label *label;
48605 const struct real_format *fmt;
48606 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48608 /* Temporary for holding the result, initialized to the input
48609 operand to ease control flow. */
48610 res = gen_reg_rtx (mode);
48611 emit_move_insn (res, operand1);
48613 TWO52 = ix86_gen_TWO52 (mode);
48614 xa = ix86_expand_sse_fabs (res, &mask);
48615 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48617 /* load nextafter (0.5, 0.0) */
48618 fmt = REAL_MODE_FORMAT (mode);
48619 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48620 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48622 /* xa = xa + 0.5 */
48623 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48624 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48626 /* xa = (double)(int64_t)xa */
48627 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48628 expand_fix (xi, xa, 0);
48629 expand_float (xa, xi, 0);
48631 /* res = copysign (xa, operand1) */
48632 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48634 emit_label (label);
48635 LABEL_NUSES (label) = 1;
48637 emit_move_insn (operand0, res);
48640 /* Expand SSE sequence for computing round
48641 from OP1 storing into OP0 using sse4 round insn. */
48643 ix86_expand_round_sse4 (rtx op0, rtx op1)
48645 machine_mode mode = GET_MODE (op0);
48646 rtx e1, e2, res, half;
48647 const struct real_format *fmt;
48648 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48649 rtx (*gen_copysign) (rtx, rtx, rtx);
48650 rtx (*gen_round) (rtx, rtx, rtx);
48655 gen_copysign = gen_copysignsf3;
48656 gen_round = gen_sse4_1_roundsf2;
48659 gen_copysign = gen_copysigndf3;
48660 gen_round = gen_sse4_1_rounddf2;
48663 gcc_unreachable ();
48666 /* round (a) = trunc (a + copysign (0.5, a)) */
48668 /* load nextafter (0.5, 0.0) */
48669 fmt = REAL_MODE_FORMAT (mode);
48670 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48671 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48672 half = const_double_from_real_value (pred_half, mode);
48674 /* e1 = copysign (0.5, op1) */
48675 e1 = gen_reg_rtx (mode);
48676 emit_insn (gen_copysign (e1, half, op1));
48678 /* e2 = op1 + e1 */
48679 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48681 /* res = trunc (e2) */
48682 res = gen_reg_rtx (mode);
48683 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48685 emit_move_insn (op0, res);
48689 /* Table of valid machine attributes. */
48690 static const struct attribute_spec ix86_attribute_table[] =
48692 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48693 affects_type_identity } */
48694 /* Stdcall attribute says callee is responsible for popping arguments
48695 if they are not variable. */
48696 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48698 /* Fastcall attribute says callee is responsible for popping arguments
48699 if they are not variable. */
48700 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48702 /* Thiscall attribute says callee is responsible for popping arguments
48703 if they are not variable. */
48704 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48706 /* Cdecl attribute says the callee is a normal C declaration */
48707 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48709 /* Regparm attribute specifies how many integer arguments are to be
48710 passed in registers. */
48711 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48713 /* Sseregparm attribute says we are using x86_64 calling conventions
48714 for FP arguments. */
48715 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48717 /* The transactional memory builtins are implicitly regparm or fastcall
48718 depending on the ABI. Override the generic do-nothing attribute that
48719 these builtins were declared with. */
48720 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48722 /* force_align_arg_pointer says this function realigns the stack at entry. */
48723 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48724 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48725 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48726 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48727 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48728 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48731 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48733 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48735 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48736 SUBTARGET_ATTRIBUTE_TABLE,
48738 /* ms_abi and sysv_abi calling convention function attributes. */
48739 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48740 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48741 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48743 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48744 ix86_handle_callee_pop_aggregate_return, true },
48746 { NULL, 0, 0, false, false, false, NULL, false }
48749 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48751 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48756 switch (type_of_cost)
48759 return ix86_cost->scalar_stmt_cost;
48762 return ix86_cost->scalar_load_cost;
48765 return ix86_cost->scalar_store_cost;
48768 return ix86_cost->vec_stmt_cost;
48771 return ix86_cost->vec_align_load_cost;
48774 return ix86_cost->vec_store_cost;
48776 case vec_to_scalar:
48777 return ix86_cost->vec_to_scalar_cost;
48779 case scalar_to_vec:
48780 return ix86_cost->scalar_to_vec_cost;
48782 case unaligned_load:
48783 case unaligned_store:
48784 return ix86_cost->vec_unalign_load_cost;
48786 case cond_branch_taken:
48787 return ix86_cost->cond_taken_branch_cost;
48789 case cond_branch_not_taken:
48790 return ix86_cost->cond_not_taken_branch_cost;
48793 case vec_promote_demote:
48794 return ix86_cost->vec_stmt_cost;
48796 case vec_construct:
48797 elements = TYPE_VECTOR_SUBPARTS (vectype);
48798 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48801 gcc_unreachable ();
48805 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48806 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48807 insn every time. */
48809 static GTY(()) rtx_insn *vselect_insn;
48811 /* Initialize vselect_insn. */
48814 init_vselect_insn (void)
48819 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48820 for (i = 0; i < MAX_VECT_LEN; ++i)
48821 XVECEXP (x, 0, i) = const0_rtx;
48822 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48824 x = gen_rtx_SET (const0_rtx, x);
48826 vselect_insn = emit_insn (x);
48830 /* Construct (set target (vec_select op0 (parallel perm))) and
48831 return true if that's a valid instruction in the active ISA. */
48834 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48835 unsigned nelt, bool testing_p)
48838 rtx x, save_vconcat;
48841 if (vselect_insn == NULL_RTX)
48842 init_vselect_insn ();
48844 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48845 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48846 for (i = 0; i < nelt; ++i)
48847 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48848 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48849 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48850 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48851 SET_DEST (PATTERN (vselect_insn)) = target;
48852 icode = recog_memoized (vselect_insn);
48854 if (icode >= 0 && !testing_p)
48855 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48857 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48858 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48859 INSN_CODE (vselect_insn) = -1;
48864 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48867 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48868 const unsigned char *perm, unsigned nelt,
48871 machine_mode v2mode;
48875 if (vselect_insn == NULL_RTX)
48876 init_vselect_insn ();
48878 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48879 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48880 PUT_MODE (x, v2mode);
48883 ok = expand_vselect (target, x, perm, nelt, testing_p);
48884 XEXP (x, 0) = const0_rtx;
48885 XEXP (x, 1) = const0_rtx;
48889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48890 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48893 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48895 machine_mode mmode, vmode = d->vmode;
48896 unsigned i, mask, nelt = d->nelt;
48897 rtx target, op0, op1, maskop, x;
48898 rtx rperm[32], vperm;
48900 if (d->one_operand_p)
48902 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48903 && (TARGET_AVX512BW
48904 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48906 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48908 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48910 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48915 /* This is a blend, not a permute. Elements must stay in their
48916 respective lanes. */
48917 for (i = 0; i < nelt; ++i)
48919 unsigned e = d->perm[i];
48920 if (!(e == i || e == i + nelt))
48927 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48928 decision should be extracted elsewhere, so that we only try that
48929 sequence once all budget==3 options have been tried. */
48930 target = d->target;
48949 for (i = 0; i < nelt; ++i)
48950 mask |= (d->perm[i] >= nelt) << i;
48954 for (i = 0; i < 2; ++i)
48955 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48960 for (i = 0; i < 4; ++i)
48961 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48966 /* See if bytes move in pairs so we can use pblendw with
48967 an immediate argument, rather than pblendvb with a vector
48969 for (i = 0; i < 16; i += 2)
48970 if (d->perm[i] + 1 != d->perm[i + 1])
48973 for (i = 0; i < nelt; ++i)
48974 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
48977 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
48978 vperm = force_reg (vmode, vperm);
48980 if (GET_MODE_SIZE (vmode) == 16)
48981 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
48983 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
48984 if (target != d->target)
48985 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48989 for (i = 0; i < 8; ++i)
48990 mask |= (d->perm[i * 2] >= 16) << i;
48995 target = gen_reg_rtx (vmode);
48996 op0 = gen_lowpart (vmode, op0);
48997 op1 = gen_lowpart (vmode, op1);
49001 /* See if bytes move in pairs. If not, vpblendvb must be used. */
49002 for (i = 0; i < 32; i += 2)
49003 if (d->perm[i] + 1 != d->perm[i + 1])
49005 /* See if bytes move in quadruplets. If yes, vpblendd
49006 with immediate can be used. */
49007 for (i = 0; i < 32; i += 4)
49008 if (d->perm[i] + 2 != d->perm[i + 2])
49012 /* See if bytes move the same in both lanes. If yes,
49013 vpblendw with immediate can be used. */
49014 for (i = 0; i < 16; i += 2)
49015 if (d->perm[i] + 16 != d->perm[i + 16])
49018 /* Use vpblendw. */
49019 for (i = 0; i < 16; ++i)
49020 mask |= (d->perm[i * 2] >= 32) << i;
49025 /* Use vpblendd. */
49026 for (i = 0; i < 8; ++i)
49027 mask |= (d->perm[i * 4] >= 32) << i;
49032 /* See if words move in pairs. If yes, vpblendd can be used. */
49033 for (i = 0; i < 16; i += 2)
49034 if (d->perm[i] + 1 != d->perm[i + 1])
49038 /* See if words move the same in both lanes. If not,
49039 vpblendvb must be used. */
49040 for (i = 0; i < 8; i++)
49041 if (d->perm[i] + 8 != d->perm[i + 8])
49043 /* Use vpblendvb. */
49044 for (i = 0; i < 32; ++i)
49045 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
49049 target = gen_reg_rtx (vmode);
49050 op0 = gen_lowpart (vmode, op0);
49051 op1 = gen_lowpart (vmode, op1);
49052 goto finish_pblendvb;
49055 /* Use vpblendw. */
49056 for (i = 0; i < 16; ++i)
49057 mask |= (d->perm[i] >= 16) << i;
49061 /* Use vpblendd. */
49062 for (i = 0; i < 8; ++i)
49063 mask |= (d->perm[i * 2] >= 16) << i;
49068 /* Use vpblendd. */
49069 for (i = 0; i < 4; ++i)
49070 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
49075 gcc_unreachable ();
49098 if (mmode != VOIDmode)
49099 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
49101 maskop = GEN_INT (mask);
49103 /* This matches five different patterns with the different modes. */
49104 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
49105 x = gen_rtx_SET (target, x);
49107 if (target != d->target)
49108 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49113 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49114 in terms of the variable form of vpermilps.
49116 Note that we will have already failed the immediate input vpermilps,
49117 which requires that the high and low part shuffle be identical; the
49118 variable form doesn't require that. */
49121 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
49123 rtx rperm[8], vperm;
49126 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
49129 /* We can only permute within the 128-bit lane. */
49130 for (i = 0; i < 8; ++i)
49132 unsigned e = d->perm[i];
49133 if (i < 4 ? e >= 4 : e < 4)
49140 for (i = 0; i < 8; ++i)
49142 unsigned e = d->perm[i];
49144 /* Within each 128-bit lane, the elements of op0 are numbered
49145 from 0 and the elements of op1 are numbered from 4. */
49151 rperm[i] = GEN_INT (e);
49154 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
49155 vperm = force_reg (V8SImode, vperm);
49156 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
49161 /* Return true if permutation D can be performed as VMODE permutation
49165 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
49167 unsigned int i, j, chunk;
49169 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
49170 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
49171 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
49174 if (GET_MODE_NUNITS (vmode) >= d->nelt)
49177 chunk = d->nelt / GET_MODE_NUNITS (vmode);
49178 for (i = 0; i < d->nelt; i += chunk)
49179 if (d->perm[i] & (chunk - 1))
49182 for (j = 1; j < chunk; ++j)
49183 if (d->perm[i] + j != d->perm[i + j])
49189 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49190 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
49193 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
49195 unsigned i, nelt, eltsz, mask;
49196 unsigned char perm[64];
49197 machine_mode vmode = V16QImode;
49198 rtx rperm[64], vperm, target, op0, op1;
49202 if (!d->one_operand_p)
49204 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
49207 && valid_perm_using_mode_p (V2TImode, d))
49212 /* Use vperm2i128 insn. The pattern uses
49213 V4DImode instead of V2TImode. */
49214 target = d->target;
49215 if (d->vmode != V4DImode)
49216 target = gen_reg_rtx (V4DImode);
49217 op0 = gen_lowpart (V4DImode, d->op0);
49218 op1 = gen_lowpart (V4DImode, d->op1);
49220 = GEN_INT ((d->perm[0] / (nelt / 2))
49221 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
49222 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
49223 if (target != d->target)
49224 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49232 if (GET_MODE_SIZE (d->vmode) == 16)
49237 else if (GET_MODE_SIZE (d->vmode) == 32)
49242 /* V4DImode should be already handled through
49243 expand_vselect by vpermq instruction. */
49244 gcc_assert (d->vmode != V4DImode);
49247 if (d->vmode == V8SImode
49248 || d->vmode == V16HImode
49249 || d->vmode == V32QImode)
49251 /* First see if vpermq can be used for
49252 V8SImode/V16HImode/V32QImode. */
49253 if (valid_perm_using_mode_p (V4DImode, d))
49255 for (i = 0; i < 4; i++)
49256 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
49259 target = gen_reg_rtx (V4DImode);
49260 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
49263 emit_move_insn (d->target,
49264 gen_lowpart (d->vmode, target));
49270 /* Next see if vpermd can be used. */
49271 if (valid_perm_using_mode_p (V8SImode, d))
49274 /* Or if vpermps can be used. */
49275 else if (d->vmode == V8SFmode)
49278 if (vmode == V32QImode)
49280 /* vpshufb only works intra lanes, it is not
49281 possible to shuffle bytes in between the lanes. */
49282 for (i = 0; i < nelt; ++i)
49283 if ((d->perm[i] ^ i) & (nelt / 2))
49287 else if (GET_MODE_SIZE (d->vmode) == 64)
49289 if (!TARGET_AVX512BW)
49292 /* If vpermq didn't work, vpshufb won't work either. */
49293 if (d->vmode == V8DFmode || d->vmode == V8DImode)
49297 if (d->vmode == V16SImode
49298 || d->vmode == V32HImode
49299 || d->vmode == V64QImode)
49301 /* First see if vpermq can be used for
49302 V16SImode/V32HImode/V64QImode. */
49303 if (valid_perm_using_mode_p (V8DImode, d))
49305 for (i = 0; i < 8; i++)
49306 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
49309 target = gen_reg_rtx (V8DImode);
49310 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
49313 emit_move_insn (d->target,
49314 gen_lowpart (d->vmode, target));
49320 /* Next see if vpermd can be used. */
49321 if (valid_perm_using_mode_p (V16SImode, d))
49324 /* Or if vpermps can be used. */
49325 else if (d->vmode == V16SFmode)
49327 if (vmode == V64QImode)
49329 /* vpshufb only works intra lanes, it is not
49330 possible to shuffle bytes in between the lanes. */
49331 for (i = 0; i < nelt; ++i)
49332 if ((d->perm[i] ^ i) & (nelt / 4))
49343 if (vmode == V8SImode)
49344 for (i = 0; i < 8; ++i)
49345 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
49346 else if (vmode == V16SImode)
49347 for (i = 0; i < 16; ++i)
49348 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
49351 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49352 if (!d->one_operand_p)
49353 mask = 2 * nelt - 1;
49354 else if (vmode == V16QImode)
49356 else if (vmode == V64QImode)
49357 mask = nelt / 4 - 1;
49359 mask = nelt / 2 - 1;
49361 for (i = 0; i < nelt; ++i)
49363 unsigned j, e = d->perm[i] & mask;
49364 for (j = 0; j < eltsz; ++j)
49365 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
49369 vperm = gen_rtx_CONST_VECTOR (vmode,
49370 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
49371 vperm = force_reg (vmode, vperm);
49373 target = d->target;
49374 if (d->vmode != vmode)
49375 target = gen_reg_rtx (vmode);
49376 op0 = gen_lowpart (vmode, d->op0);
49377 if (d->one_operand_p)
49379 if (vmode == V16QImode)
49380 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
49381 else if (vmode == V32QImode)
49382 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
49383 else if (vmode == V64QImode)
49384 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
49385 else if (vmode == V8SFmode)
49386 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
49387 else if (vmode == V8SImode)
49388 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
49389 else if (vmode == V16SFmode)
49390 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
49391 else if (vmode == V16SImode)
49392 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
49394 gcc_unreachable ();
49398 op1 = gen_lowpart (vmode, d->op1);
49399 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
49401 if (target != d->target)
49402 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49407 /* For V*[QHS]Imode permutations, check if the same permutation
49408 can't be performed in a 2x, 4x or 8x wider inner mode. */
49411 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
49412 struct expand_vec_perm_d *nd)
49415 enum machine_mode mode = VOIDmode;
49419 case V16QImode: mode = V8HImode; break;
49420 case V32QImode: mode = V16HImode; break;
49421 case V64QImode: mode = V32HImode; break;
49422 case V8HImode: mode = V4SImode; break;
49423 case V16HImode: mode = V8SImode; break;
49424 case V32HImode: mode = V16SImode; break;
49425 case V4SImode: mode = V2DImode; break;
49426 case V8SImode: mode = V4DImode; break;
49427 case V16SImode: mode = V8DImode; break;
49428 default: return false;
49430 for (i = 0; i < d->nelt; i += 2)
49431 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
49434 nd->nelt = d->nelt / 2;
49435 for (i = 0; i < nd->nelt; i++)
49436 nd->perm[i] = d->perm[2 * i] / 2;
49437 if (GET_MODE_INNER (mode) != DImode)
49438 canonicalize_vector_int_perm (nd, nd);
49441 nd->one_operand_p = d->one_operand_p;
49442 nd->testing_p = d->testing_p;
49443 if (d->op0 == d->op1)
49444 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
49447 nd->op0 = gen_lowpart (nd->vmode, d->op0);
49448 nd->op1 = gen_lowpart (nd->vmode, d->op1);
49451 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
49453 nd->target = gen_reg_rtx (nd->vmode);
49458 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
49459 in a single instruction. */
49462 expand_vec_perm_1 (struct expand_vec_perm_d *d)
49464 unsigned i, nelt = d->nelt;
49465 struct expand_vec_perm_d nd;
49467 /* Check plain VEC_SELECT first, because AVX has instructions that could
49468 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
49469 input where SEL+CONCAT may not. */
49470 if (d->one_operand_p)
49472 int mask = nelt - 1;
49473 bool identity_perm = true;
49474 bool broadcast_perm = true;
49476 for (i = 0; i < nelt; i++)
49478 nd.perm[i] = d->perm[i] & mask;
49479 if (nd.perm[i] != i)
49480 identity_perm = false;
49482 broadcast_perm = false;
49488 emit_move_insn (d->target, d->op0);
49491 else if (broadcast_perm && TARGET_AVX2)
49493 /* Use vpbroadcast{b,w,d}. */
49494 rtx (*gen) (rtx, rtx) = NULL;
49498 if (TARGET_AVX512BW)
49499 gen = gen_avx512bw_vec_dupv64qi_1;
49502 gen = gen_avx2_pbroadcastv32qi_1;
49505 if (TARGET_AVX512BW)
49506 gen = gen_avx512bw_vec_dupv32hi_1;
49509 gen = gen_avx2_pbroadcastv16hi_1;
49512 if (TARGET_AVX512F)
49513 gen = gen_avx512f_vec_dupv16si_1;
49516 gen = gen_avx2_pbroadcastv8si_1;
49519 gen = gen_avx2_pbroadcastv16qi;
49522 gen = gen_avx2_pbroadcastv8hi;
49525 if (TARGET_AVX512F)
49526 gen = gen_avx512f_vec_dupv16sf_1;
49529 gen = gen_avx2_vec_dupv8sf_1;
49532 if (TARGET_AVX512F)
49533 gen = gen_avx512f_vec_dupv8df_1;
49536 if (TARGET_AVX512F)
49537 gen = gen_avx512f_vec_dupv8di_1;
49539 /* For other modes prefer other shuffles this function creates. */
49545 emit_insn (gen (d->target, d->op0));
49550 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
49553 /* There are plenty of patterns in sse.md that are written for
49554 SEL+CONCAT and are not replicated for a single op. Perhaps
49555 that should be changed, to avoid the nastiness here. */
49557 /* Recognize interleave style patterns, which means incrementing
49558 every other permutation operand. */
49559 for (i = 0; i < nelt; i += 2)
49561 nd.perm[i] = d->perm[i] & mask;
49562 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
49564 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49568 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
49571 for (i = 0; i < nelt; i += 4)
49573 nd.perm[i + 0] = d->perm[i + 0] & mask;
49574 nd.perm[i + 1] = d->perm[i + 1] & mask;
49575 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
49576 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
49579 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49585 /* Finally, try the fully general two operand permute. */
49586 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49590 /* Recognize interleave style patterns with reversed operands. */
49591 if (!d->one_operand_p)
49593 for (i = 0; i < nelt; ++i)
49595 unsigned e = d->perm[i];
49603 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
49608 /* Try the SSE4.1 blend variable merge instructions. */
49609 if (expand_vec_perm_blend (d))
49612 /* Try one of the AVX vpermil variable permutations. */
49613 if (expand_vec_perm_vpermil (d))
49616 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49617 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49618 if (expand_vec_perm_pshufb (d))
49621 /* Try the AVX2 vpalignr instruction. */
49622 if (expand_vec_perm_palignr (d, true))
49625 /* Try the AVX512F vpermi2 instructions. */
49626 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49629 /* See if we can get the same permutation in different vector integer
49631 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49634 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49640 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49641 in terms of a pair of pshuflw + pshufhw instructions. */
49644 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49646 unsigned char perm2[MAX_VECT_LEN];
49650 if (d->vmode != V8HImode || !d->one_operand_p)
49653 /* The two permutations only operate in 64-bit lanes. */
49654 for (i = 0; i < 4; ++i)
49655 if (d->perm[i] >= 4)
49657 for (i = 4; i < 8; ++i)
49658 if (d->perm[i] < 4)
49664 /* Emit the pshuflw. */
49665 memcpy (perm2, d->perm, 4);
49666 for (i = 4; i < 8; ++i)
49668 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49671 /* Emit the pshufhw. */
49672 memcpy (perm2 + 4, d->perm + 4, 4);
49673 for (i = 0; i < 4; ++i)
49675 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49681 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49682 the permutation using the SSSE3 palignr instruction. This succeeds
49683 when all of the elements in PERM fit within one vector and we merely
49684 need to shift them down so that a single vector permutation has a
49685 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49686 the vpalignr instruction itself can perform the requested permutation. */
49689 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49691 unsigned i, nelt = d->nelt;
49692 unsigned min, max, minswap, maxswap;
49693 bool in_order, ok, swap = false;
49695 struct expand_vec_perm_d dcopy;
49697 /* Even with AVX, palignr only operates on 128-bit vectors,
49698 in AVX2 palignr operates on both 128-bit lanes. */
49699 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49700 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49705 minswap = 2 * nelt;
49707 for (i = 0; i < nelt; ++i)
49709 unsigned e = d->perm[i];
49710 unsigned eswap = d->perm[i] ^ nelt;
49711 if (GET_MODE_SIZE (d->vmode) == 32)
49713 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49714 eswap = e ^ (nelt / 2);
49720 if (eswap < minswap)
49722 if (eswap > maxswap)
49726 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49728 if (d->one_operand_p
49730 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49731 ? nelt / 2 : nelt))
49738 /* Given that we have SSSE3, we know we'll be able to implement the
49739 single operand permutation after the palignr with pshufb for
49740 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49742 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49748 dcopy.op0 = d->op1;
49749 dcopy.op1 = d->op0;
49750 for (i = 0; i < nelt; ++i)
49751 dcopy.perm[i] ^= nelt;
49755 for (i = 0; i < nelt; ++i)
49757 unsigned e = dcopy.perm[i];
49758 if (GET_MODE_SIZE (d->vmode) == 32
49760 && (e & (nelt / 2 - 1)) < min)
49761 e = e - min - (nelt / 2);
49768 dcopy.one_operand_p = true;
49770 if (single_insn_only_p && !in_order)
49773 /* For AVX2, test whether we can permute the result in one instruction. */
49778 dcopy.op1 = dcopy.op0;
49779 return expand_vec_perm_1 (&dcopy);
49782 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49783 if (GET_MODE_SIZE (d->vmode) == 16)
49785 target = gen_reg_rtx (TImode);
49786 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49787 gen_lowpart (TImode, dcopy.op0), shift));
49791 target = gen_reg_rtx (V2TImode);
49792 emit_insn (gen_avx2_palignrv2ti (target,
49793 gen_lowpart (V2TImode, dcopy.op1),
49794 gen_lowpart (V2TImode, dcopy.op0),
49798 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49800 /* Test for the degenerate case where the alignment by itself
49801 produces the desired permutation. */
49804 emit_move_insn (d->target, dcopy.op0);
49808 ok = expand_vec_perm_1 (&dcopy);
49809 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49814 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49815 the permutation using the SSE4_1 pblendv instruction. Potentially
49816 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49819 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49821 unsigned i, which, nelt = d->nelt;
49822 struct expand_vec_perm_d dcopy, dcopy1;
49823 machine_mode vmode = d->vmode;
49826 /* Use the same checks as in expand_vec_perm_blend. */
49827 if (d->one_operand_p)
49829 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49831 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49833 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49838 /* Figure out where permutation elements stay not in their
49839 respective lanes. */
49840 for (i = 0, which = 0; i < nelt; ++i)
49842 unsigned e = d->perm[i];
49844 which |= (e < nelt ? 1 : 2);
49846 /* We can pblend the part where elements stay not in their
49847 respective lanes only when these elements are all in one
49848 half of a permutation.
49849 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49850 lanes, but both 8 and 9 >= 8
49851 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49852 respective lanes and 8 >= 8, but 2 not. */
49853 if (which != 1 && which != 2)
49855 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49858 /* First we apply one operand permutation to the part where
49859 elements stay not in their respective lanes. */
49862 dcopy.op0 = dcopy.op1 = d->op1;
49864 dcopy.op0 = dcopy.op1 = d->op0;
49866 dcopy.target = gen_reg_rtx (vmode);
49867 dcopy.one_operand_p = true;
49869 for (i = 0; i < nelt; ++i)
49870 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49872 ok = expand_vec_perm_1 (&dcopy);
49873 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49880 /* Next we put permuted elements into their positions. */
49883 dcopy1.op1 = dcopy.target;
49885 dcopy1.op0 = dcopy.target;
49887 for (i = 0; i < nelt; ++i)
49888 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49890 ok = expand_vec_perm_blend (&dcopy1);
49896 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49898 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49899 a two vector permutation into a single vector permutation by using
49900 an interleave operation to merge the vectors. */
49903 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49905 struct expand_vec_perm_d dremap, dfinal;
49906 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49907 unsigned HOST_WIDE_INT contents;
49908 unsigned char remap[2 * MAX_VECT_LEN];
49910 bool ok, same_halves = false;
49912 if (GET_MODE_SIZE (d->vmode) == 16)
49914 if (d->one_operand_p)
49917 else if (GET_MODE_SIZE (d->vmode) == 32)
49921 /* For 32-byte modes allow even d->one_operand_p.
49922 The lack of cross-lane shuffling in some instructions
49923 might prevent a single insn shuffle. */
49925 dfinal.testing_p = true;
49926 /* If expand_vec_perm_interleave3 can expand this into
49927 a 3 insn sequence, give up and let it be expanded as
49928 3 insn sequence. While that is one insn longer,
49929 it doesn't need a memory operand and in the common
49930 case that both interleave low and high permutations
49931 with the same operands are adjacent needs 4 insns
49932 for both after CSE. */
49933 if (expand_vec_perm_interleave3 (&dfinal))
49939 /* Examine from whence the elements come. */
49941 for (i = 0; i < nelt; ++i)
49942 contents |= HOST_WIDE_INT_1U << d->perm[i];
49944 memset (remap, 0xff, sizeof (remap));
49947 if (GET_MODE_SIZE (d->vmode) == 16)
49949 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49951 /* Split the two input vectors into 4 halves. */
49952 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49957 /* If the elements from the low halves use interleave low, and similarly
49958 for interleave high. If the elements are from mis-matched halves, we
49959 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49960 if ((contents & (h1 | h3)) == contents)
49963 for (i = 0; i < nelt2; ++i)
49966 remap[i + nelt] = i * 2 + 1;
49967 dremap.perm[i * 2] = i;
49968 dremap.perm[i * 2 + 1] = i + nelt;
49970 if (!TARGET_SSE2 && d->vmode == V4SImode)
49971 dremap.vmode = V4SFmode;
49973 else if ((contents & (h2 | h4)) == contents)
49976 for (i = 0; i < nelt2; ++i)
49978 remap[i + nelt2] = i * 2;
49979 remap[i + nelt + nelt2] = i * 2 + 1;
49980 dremap.perm[i * 2] = i + nelt2;
49981 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
49983 if (!TARGET_SSE2 && d->vmode == V4SImode)
49984 dremap.vmode = V4SFmode;
49986 else if ((contents & (h1 | h4)) == contents)
49989 for (i = 0; i < nelt2; ++i)
49992 remap[i + nelt + nelt2] = i + nelt2;
49993 dremap.perm[i] = i;
49994 dremap.perm[i + nelt2] = i + nelt + nelt2;
49999 dremap.vmode = V2DImode;
50001 dremap.perm[0] = 0;
50002 dremap.perm[1] = 3;
50005 else if ((contents & (h2 | h3)) == contents)
50008 for (i = 0; i < nelt2; ++i)
50010 remap[i + nelt2] = i;
50011 remap[i + nelt] = i + nelt2;
50012 dremap.perm[i] = i + nelt2;
50013 dremap.perm[i + nelt2] = i + nelt;
50018 dremap.vmode = V2DImode;
50020 dremap.perm[0] = 1;
50021 dremap.perm[1] = 2;
50029 unsigned int nelt4 = nelt / 4, nzcnt = 0;
50030 unsigned HOST_WIDE_INT q[8];
50031 unsigned int nonzero_halves[4];
50033 /* Split the two input vectors into 8 quarters. */
50034 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
50035 for (i = 1; i < 8; ++i)
50036 q[i] = q[0] << (nelt4 * i);
50037 for (i = 0; i < 4; ++i)
50038 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
50040 nonzero_halves[nzcnt] = i;
50046 gcc_assert (d->one_operand_p);
50047 nonzero_halves[1] = nonzero_halves[0];
50048 same_halves = true;
50050 else if (d->one_operand_p)
50052 gcc_assert (nonzero_halves[0] == 0);
50053 gcc_assert (nonzero_halves[1] == 1);
50058 if (d->perm[0] / nelt2 == nonzero_halves[1])
50060 /* Attempt to increase the likelihood that dfinal
50061 shuffle will be intra-lane. */
50062 std::swap (nonzero_halves[0], nonzero_halves[1]);
50065 /* vperm2f128 or vperm2i128. */
50066 for (i = 0; i < nelt2; ++i)
50068 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
50069 remap[i + nonzero_halves[0] * nelt2] = i;
50070 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
50071 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
50074 if (d->vmode != V8SFmode
50075 && d->vmode != V4DFmode
50076 && d->vmode != V8SImode)
50078 dremap.vmode = V8SImode;
50080 for (i = 0; i < 4; ++i)
50082 dremap.perm[i] = i + nonzero_halves[0] * 4;
50083 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
50087 else if (d->one_operand_p)
50089 else if (TARGET_AVX2
50090 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
50093 for (i = 0; i < nelt4; ++i)
50096 remap[i + nelt] = i * 2 + 1;
50097 remap[i + nelt2] = i * 2 + nelt2;
50098 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
50099 dremap.perm[i * 2] = i;
50100 dremap.perm[i * 2 + 1] = i + nelt;
50101 dremap.perm[i * 2 + nelt2] = i + nelt2;
50102 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
50105 else if (TARGET_AVX2
50106 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
50109 for (i = 0; i < nelt4; ++i)
50111 remap[i + nelt4] = i * 2;
50112 remap[i + nelt + nelt4] = i * 2 + 1;
50113 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
50114 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
50115 dremap.perm[i * 2] = i + nelt4;
50116 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
50117 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
50118 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
50125 /* Use the remapping array set up above to move the elements from their
50126 swizzled locations into their final destinations. */
50128 for (i = 0; i < nelt; ++i)
50130 unsigned e = remap[d->perm[i]];
50131 gcc_assert (e < nelt);
50132 /* If same_halves is true, both halves of the remapped vector are the
50133 same. Avoid cross-lane accesses if possible. */
50134 if (same_halves && i >= nelt2)
50136 gcc_assert (e < nelt2);
50137 dfinal.perm[i] = e + nelt2;
50140 dfinal.perm[i] = e;
50144 dremap.target = gen_reg_rtx (dremap.vmode);
50145 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50147 dfinal.op1 = dfinal.op0;
50148 dfinal.one_operand_p = true;
50150 /* Test if the final remap can be done with a single insn. For V4SFmode or
50151 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
50153 ok = expand_vec_perm_1 (&dfinal);
50154 seq = get_insns ();
50163 if (dremap.vmode != dfinal.vmode)
50165 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
50166 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
50169 ok = expand_vec_perm_1 (&dremap);
50176 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50177 a single vector cross-lane permutation into vpermq followed
50178 by any of the single insn permutations. */
50181 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
50183 struct expand_vec_perm_d dremap, dfinal;
50184 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
50185 unsigned contents[2];
50189 && (d->vmode == V32QImode || d->vmode == V16HImode)
50190 && d->one_operand_p))
50195 for (i = 0; i < nelt2; ++i)
50197 contents[0] |= 1u << (d->perm[i] / nelt4);
50198 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
50201 for (i = 0; i < 2; ++i)
50203 unsigned int cnt = 0;
50204 for (j = 0; j < 4; ++j)
50205 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
50213 dremap.vmode = V4DImode;
50215 dremap.target = gen_reg_rtx (V4DImode);
50216 dremap.op0 = gen_lowpart (V4DImode, d->op0);
50217 dremap.op1 = dremap.op0;
50218 dremap.one_operand_p = true;
50219 for (i = 0; i < 2; ++i)
50221 unsigned int cnt = 0;
50222 for (j = 0; j < 4; ++j)
50223 if ((contents[i] & (1u << j)) != 0)
50224 dremap.perm[2 * i + cnt++] = j;
50225 for (; cnt < 2; ++cnt)
50226 dremap.perm[2 * i + cnt] = 0;
50230 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50231 dfinal.op1 = dfinal.op0;
50232 dfinal.one_operand_p = true;
50233 for (i = 0, j = 0; i < nelt; ++i)
50237 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
50238 if ((d->perm[i] / nelt4) == dremap.perm[j])
50240 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
50241 dfinal.perm[i] |= nelt4;
50243 gcc_unreachable ();
50246 ok = expand_vec_perm_1 (&dremap);
50249 ok = expand_vec_perm_1 (&dfinal);
50255 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
50256 a vector permutation using two instructions, vperm2f128 resp.
50257 vperm2i128 followed by any single in-lane permutation. */
50260 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
50262 struct expand_vec_perm_d dfirst, dsecond;
50263 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
50267 || GET_MODE_SIZE (d->vmode) != 32
50268 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
50272 dsecond.one_operand_p = false;
50273 dsecond.testing_p = true;
50275 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
50276 immediate. For perm < 16 the second permutation uses
50277 d->op0 as first operand, for perm >= 16 it uses d->op1
50278 as first operand. The second operand is the result of
50280 for (perm = 0; perm < 32; perm++)
50282 /* Ignore permutations which do not move anything cross-lane. */
50285 /* The second shuffle for e.g. V4DFmode has
50286 0123 and ABCD operands.
50287 Ignore AB23, as 23 is already in the second lane
50288 of the first operand. */
50289 if ((perm & 0xc) == (1 << 2)) continue;
50290 /* And 01CD, as 01 is in the first lane of the first
50292 if ((perm & 3) == 0) continue;
50293 /* And 4567, as then the vperm2[fi]128 doesn't change
50294 anything on the original 4567 second operand. */
50295 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
50299 /* The second shuffle for e.g. V4DFmode has
50300 4567 and ABCD operands.
50301 Ignore AB67, as 67 is already in the second lane
50302 of the first operand. */
50303 if ((perm & 0xc) == (3 << 2)) continue;
50304 /* And 45CD, as 45 is in the first lane of the first
50306 if ((perm & 3) == 2) continue;
50307 /* And 0123, as then the vperm2[fi]128 doesn't change
50308 anything on the original 0123 first operand. */
50309 if ((perm & 0xf) == (1 << 2)) continue;
50312 for (i = 0; i < nelt; i++)
50314 j = d->perm[i] / nelt2;
50315 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
50316 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
50317 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
50318 dsecond.perm[i] = d->perm[i] & (nelt - 1);
50326 ok = expand_vec_perm_1 (&dsecond);
50337 /* Found a usable second shuffle. dfirst will be
50338 vperm2f128 on d->op0 and d->op1. */
50339 dsecond.testing_p = false;
50341 dfirst.target = gen_reg_rtx (d->vmode);
50342 for (i = 0; i < nelt; i++)
50343 dfirst.perm[i] = (i & (nelt2 - 1))
50344 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
50346 canonicalize_perm (&dfirst);
50347 ok = expand_vec_perm_1 (&dfirst);
50350 /* And dsecond is some single insn shuffle, taking
50351 d->op0 and result of vperm2f128 (if perm < 16) or
50352 d->op1 and result of vperm2f128 (otherwise). */
50354 dsecond.op0 = dsecond.op1;
50355 dsecond.op1 = dfirst.target;
50357 ok = expand_vec_perm_1 (&dsecond);
50363 /* For one operand, the only useful vperm2f128 permutation is 0x01
50365 if (d->one_operand_p)
50372 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50373 a two vector permutation using 2 intra-lane interleave insns
50374 and cross-lane shuffle for 32-byte vectors. */
50377 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
50380 rtx (*gen) (rtx, rtx, rtx);
50382 if (d->one_operand_p)
50384 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
50386 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
50392 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
50394 for (i = 0; i < nelt; i += 2)
50395 if (d->perm[i] != d->perm[0] + i / 2
50396 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
50406 gen = gen_vec_interleave_highv32qi;
50408 gen = gen_vec_interleave_lowv32qi;
50412 gen = gen_vec_interleave_highv16hi;
50414 gen = gen_vec_interleave_lowv16hi;
50418 gen = gen_vec_interleave_highv8si;
50420 gen = gen_vec_interleave_lowv8si;
50424 gen = gen_vec_interleave_highv4di;
50426 gen = gen_vec_interleave_lowv4di;
50430 gen = gen_vec_interleave_highv8sf;
50432 gen = gen_vec_interleave_lowv8sf;
50436 gen = gen_vec_interleave_highv4df;
50438 gen = gen_vec_interleave_lowv4df;
50441 gcc_unreachable ();
50444 emit_insn (gen (d->target, d->op0, d->op1));
50448 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
50449 a single vector permutation using a single intra-lane vector
50450 permutation, vperm2f128 swapping the lanes and vblend* insn blending
50451 the non-swapped and swapped vectors together. */
50454 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
50456 struct expand_vec_perm_d dfirst, dsecond;
50457 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
50460 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
50464 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
50465 || !d->one_operand_p)
50469 for (i = 0; i < nelt; i++)
50470 dfirst.perm[i] = 0xff;
50471 for (i = 0, msk = 0; i < nelt; i++)
50473 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
50474 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
50476 dfirst.perm[j] = d->perm[i];
50480 for (i = 0; i < nelt; i++)
50481 if (dfirst.perm[i] == 0xff)
50482 dfirst.perm[i] = i;
50485 dfirst.target = gen_reg_rtx (dfirst.vmode);
50488 ok = expand_vec_perm_1 (&dfirst);
50489 seq = get_insns ();
50501 dsecond.op0 = dfirst.target;
50502 dsecond.op1 = dfirst.target;
50503 dsecond.one_operand_p = true;
50504 dsecond.target = gen_reg_rtx (dsecond.vmode);
50505 for (i = 0; i < nelt; i++)
50506 dsecond.perm[i] = i ^ nelt2;
50508 ok = expand_vec_perm_1 (&dsecond);
50511 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
50512 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
50516 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
50517 permutation using two vperm2f128, followed by a vshufpd insn blending
50518 the two vectors together. */
50521 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
50523 struct expand_vec_perm_d dfirst, dsecond, dthird;
50526 if (!TARGET_AVX || (d->vmode != V4DFmode))
50536 dfirst.perm[0] = (d->perm[0] & ~1);
50537 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
50538 dfirst.perm[2] = (d->perm[2] & ~1);
50539 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
50540 dsecond.perm[0] = (d->perm[1] & ~1);
50541 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
50542 dsecond.perm[2] = (d->perm[3] & ~1);
50543 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
50544 dthird.perm[0] = (d->perm[0] % 2);
50545 dthird.perm[1] = (d->perm[1] % 2) + 4;
50546 dthird.perm[2] = (d->perm[2] % 2) + 2;
50547 dthird.perm[3] = (d->perm[3] % 2) + 6;
50549 dfirst.target = gen_reg_rtx (dfirst.vmode);
50550 dsecond.target = gen_reg_rtx (dsecond.vmode);
50551 dthird.op0 = dfirst.target;
50552 dthird.op1 = dsecond.target;
50553 dthird.one_operand_p = false;
50555 canonicalize_perm (&dfirst);
50556 canonicalize_perm (&dsecond);
50558 ok = expand_vec_perm_1 (&dfirst)
50559 && expand_vec_perm_1 (&dsecond)
50560 && expand_vec_perm_1 (&dthird);
50567 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
50568 permutation with two pshufb insns and an ior. We should have already
50569 failed all two instruction sequences. */
50572 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
50574 rtx rperm[2][16], vperm, l, h, op, m128;
50575 unsigned int i, nelt, eltsz;
50577 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50579 gcc_assert (!d->one_operand_p);
50585 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50587 /* Generate two permutation masks. If the required element is within
50588 the given vector it is shuffled into the proper lane. If the required
50589 element is in the other vector, force a zero into the lane by setting
50590 bit 7 in the permutation mask. */
50591 m128 = GEN_INT (-128);
50592 for (i = 0; i < nelt; ++i)
50594 unsigned j, e = d->perm[i];
50595 unsigned which = (e >= nelt);
50599 for (j = 0; j < eltsz; ++j)
50601 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50602 rperm[1-which][i*eltsz + j] = m128;
50606 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50607 vperm = force_reg (V16QImode, vperm);
50609 l = gen_reg_rtx (V16QImode);
50610 op = gen_lowpart (V16QImode, d->op0);
50611 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50613 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50614 vperm = force_reg (V16QImode, vperm);
50616 h = gen_reg_rtx (V16QImode);
50617 op = gen_lowpart (V16QImode, d->op1);
50618 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50621 if (d->vmode != V16QImode)
50622 op = gen_reg_rtx (V16QImode);
50623 emit_insn (gen_iorv16qi3 (op, l, h));
50624 if (op != d->target)
50625 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50630 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50631 with two vpshufb insns, vpermq and vpor. We should have already failed
50632 all two or three instruction sequences. */
50635 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50637 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50638 unsigned int i, nelt, eltsz;
50641 || !d->one_operand_p
50642 || (d->vmode != V32QImode && d->vmode != V16HImode))
50649 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50651 /* Generate two permutation masks. If the required element is within
50652 the same lane, it is shuffled in. If the required element from the
50653 other lane, force a zero by setting bit 7 in the permutation mask.
50654 In the other mask the mask has non-negative elements if element
50655 is requested from the other lane, but also moved to the other lane,
50656 so that the result of vpshufb can have the two V2TImode halves
50658 m128 = GEN_INT (-128);
50659 for (i = 0; i < nelt; ++i)
50661 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50662 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50664 for (j = 0; j < eltsz; ++j)
50666 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50667 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50671 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50672 vperm = force_reg (V32QImode, vperm);
50674 h = gen_reg_rtx (V32QImode);
50675 op = gen_lowpart (V32QImode, d->op0);
50676 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50678 /* Swap the 128-byte lanes of h into hp. */
50679 hp = gen_reg_rtx (V4DImode);
50680 op = gen_lowpart (V4DImode, h);
50681 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50684 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50685 vperm = force_reg (V32QImode, vperm);
50687 l = gen_reg_rtx (V32QImode);
50688 op = gen_lowpart (V32QImode, d->op0);
50689 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50692 if (d->vmode != V32QImode)
50693 op = gen_reg_rtx (V32QImode);
50694 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50695 if (op != d->target)
50696 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50701 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50702 and extract-odd permutations of two V32QImode and V16QImode operand
50703 with two vpshufb insns, vpor and vpermq. We should have already
50704 failed all two or three instruction sequences. */
50707 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50709 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50710 unsigned int i, nelt, eltsz;
50713 || d->one_operand_p
50714 || (d->vmode != V32QImode && d->vmode != V16HImode))
50717 for (i = 0; i < d->nelt; ++i)
50718 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50725 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50727 /* Generate two permutation masks. In the first permutation mask
50728 the first quarter will contain indexes for the first half
50729 of the op0, the second quarter will contain bit 7 set, third quarter
50730 will contain indexes for the second half of the op0 and the
50731 last quarter bit 7 set. In the second permutation mask
50732 the first quarter will contain bit 7 set, the second quarter
50733 indexes for the first half of the op1, the third quarter bit 7 set
50734 and last quarter indexes for the second half of the op1.
50735 I.e. the first mask e.g. for V32QImode extract even will be:
50736 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50737 (all values masked with 0xf except for -128) and second mask
50738 for extract even will be
50739 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50740 m128 = GEN_INT (-128);
50741 for (i = 0; i < nelt; ++i)
50743 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50744 unsigned which = d->perm[i] >= nelt;
50745 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50747 for (j = 0; j < eltsz; ++j)
50749 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50750 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50754 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50755 vperm = force_reg (V32QImode, vperm);
50757 l = gen_reg_rtx (V32QImode);
50758 op = gen_lowpart (V32QImode, d->op0);
50759 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50761 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50762 vperm = force_reg (V32QImode, vperm);
50764 h = gen_reg_rtx (V32QImode);
50765 op = gen_lowpart (V32QImode, d->op1);
50766 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50768 ior = gen_reg_rtx (V32QImode);
50769 emit_insn (gen_iorv32qi3 (ior, l, h));
50771 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50772 op = gen_reg_rtx (V4DImode);
50773 ior = gen_lowpart (V4DImode, ior);
50774 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50775 const1_rtx, GEN_INT (3)));
50776 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50781 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50782 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50783 with two "and" and "pack" or two "shift" and "pack" insns. We should
50784 have already failed all two instruction sequences. */
50787 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50789 rtx op, dop0, dop1, t, rperm[16];
50790 unsigned i, odd, c, s, nelt = d->nelt;
50791 bool end_perm = false;
50792 machine_mode half_mode;
50793 rtx (*gen_and) (rtx, rtx, rtx);
50794 rtx (*gen_pack) (rtx, rtx, rtx);
50795 rtx (*gen_shift) (rtx, rtx, rtx);
50797 if (d->one_operand_p)
50803 /* Required for "pack". */
50804 if (!TARGET_SSE4_1)
50808 half_mode = V4SImode;
50809 gen_and = gen_andv4si3;
50810 gen_pack = gen_sse4_1_packusdw;
50811 gen_shift = gen_lshrv4si3;
50814 /* No check as all instructions are SSE2. */
50817 half_mode = V8HImode;
50818 gen_and = gen_andv8hi3;
50819 gen_pack = gen_sse2_packuswb;
50820 gen_shift = gen_lshrv8hi3;
50827 half_mode = V8SImode;
50828 gen_and = gen_andv8si3;
50829 gen_pack = gen_avx2_packusdw;
50830 gen_shift = gen_lshrv8si3;
50838 half_mode = V16HImode;
50839 gen_and = gen_andv16hi3;
50840 gen_pack = gen_avx2_packuswb;
50841 gen_shift = gen_lshrv16hi3;
50845 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50846 general shuffles. */
50850 /* Check that permutation is even or odd. */
50855 for (i = 1; i < nelt; ++i)
50856 if (d->perm[i] != 2 * i + odd)
50862 dop0 = gen_reg_rtx (half_mode);
50863 dop1 = gen_reg_rtx (half_mode);
50866 for (i = 0; i < nelt / 2; i++)
50867 rperm[i] = GEN_INT (c);
50868 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50869 t = force_reg (half_mode, t);
50870 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50871 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50875 emit_insn (gen_shift (dop0,
50876 gen_lowpart (half_mode, d->op0),
50878 emit_insn (gen_shift (dop1,
50879 gen_lowpart (half_mode, d->op1),
50882 /* In AVX2 for 256 bit case we need to permute pack result. */
50883 if (TARGET_AVX2 && end_perm)
50885 op = gen_reg_rtx (d->vmode);
50886 t = gen_reg_rtx (V4DImode);
50887 emit_insn (gen_pack (op, dop0, dop1));
50888 emit_insn (gen_avx2_permv4di_1 (t,
50889 gen_lowpart (V4DImode, op),
50894 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50897 emit_insn (gen_pack (d->target, dop0, dop1));
50902 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50903 and extract-odd permutations of two V64QI operands
50904 with two "shifts", two "truncs" and one "concat" insns for "odd"
50905 and two "truncs" and one concat insn for "even."
50906 Have already failed all two instruction sequences. */
50909 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50911 rtx t1, t2, t3, t4;
50912 unsigned i, odd, nelt = d->nelt;
50914 if (!TARGET_AVX512BW
50915 || d->one_operand_p
50916 || d->vmode != V64QImode)
50919 /* Check that permutation is even or odd. */
50924 for (i = 1; i < nelt; ++i)
50925 if (d->perm[i] != 2 * i + odd)
50934 t1 = gen_reg_rtx (V32HImode);
50935 t2 = gen_reg_rtx (V32HImode);
50936 emit_insn (gen_lshrv32hi3 (t1,
50937 gen_lowpart (V32HImode, d->op0),
50939 emit_insn (gen_lshrv32hi3 (t2,
50940 gen_lowpart (V32HImode, d->op1),
50945 t1 = gen_lowpart (V32HImode, d->op0);
50946 t2 = gen_lowpart (V32HImode, d->op1);
50949 t3 = gen_reg_rtx (V32QImode);
50950 t4 = gen_reg_rtx (V32QImode);
50951 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50952 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50953 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50958 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50959 and extract-odd permutations. */
50962 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50964 rtx t1, t2, t3, t4, t5;
50971 t1 = gen_reg_rtx (V4DFmode);
50972 t2 = gen_reg_rtx (V4DFmode);
50974 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50975 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
50976 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
50978 /* Now an unpck[lh]pd will produce the result required. */
50980 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
50982 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
50988 int mask = odd ? 0xdd : 0x88;
50992 t1 = gen_reg_rtx (V8SFmode);
50993 t2 = gen_reg_rtx (V8SFmode);
50994 t3 = gen_reg_rtx (V8SFmode);
50996 /* Shuffle within the 128-bit lanes to produce:
50997 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
50998 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
51001 /* Shuffle the lanes around to produce:
51002 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
51003 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
51006 /* Shuffle within the 128-bit lanes to produce:
51007 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
51008 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
51010 /* Shuffle within the 128-bit lanes to produce:
51011 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
51012 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
51014 /* Shuffle the lanes around to produce:
51015 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
51016 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
51025 /* These are always directly implementable by expand_vec_perm_1. */
51026 gcc_unreachable ();
51030 return expand_vec_perm_even_odd_pack (d);
51031 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
51032 return expand_vec_perm_pshufb2 (d);
51037 /* We need 2*log2(N)-1 operations to achieve odd/even
51038 with interleave. */
51039 t1 = gen_reg_rtx (V8HImode);
51040 t2 = gen_reg_rtx (V8HImode);
51041 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
51042 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
51043 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
51044 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
51046 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
51048 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
51054 return expand_vec_perm_even_odd_pack (d);
51058 return expand_vec_perm_even_odd_pack (d);
51061 return expand_vec_perm_even_odd_trunc (d);
51066 struct expand_vec_perm_d d_copy = *d;
51067 d_copy.vmode = V4DFmode;
51069 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
51071 d_copy.target = gen_reg_rtx (V4DFmode);
51072 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
51073 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
51074 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51077 emit_move_insn (d->target,
51078 gen_lowpart (V4DImode, d_copy.target));
51087 t1 = gen_reg_rtx (V4DImode);
51088 t2 = gen_reg_rtx (V4DImode);
51090 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
51091 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
51092 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
51094 /* Now an vpunpck[lh]qdq will produce the result required. */
51096 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
51098 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
51105 struct expand_vec_perm_d d_copy = *d;
51106 d_copy.vmode = V8SFmode;
51108 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
51110 d_copy.target = gen_reg_rtx (V8SFmode);
51111 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
51112 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
51113 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51116 emit_move_insn (d->target,
51117 gen_lowpart (V8SImode, d_copy.target));
51126 t1 = gen_reg_rtx (V8SImode);
51127 t2 = gen_reg_rtx (V8SImode);
51128 t3 = gen_reg_rtx (V4DImode);
51129 t4 = gen_reg_rtx (V4DImode);
51130 t5 = gen_reg_rtx (V4DImode);
51132 /* Shuffle the lanes around into
51133 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
51134 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
51135 gen_lowpart (V4DImode, d->op1),
51137 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
51138 gen_lowpart (V4DImode, d->op1),
51141 /* Swap the 2nd and 3rd position in each lane into
51142 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
51143 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
51144 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51145 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
51146 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51148 /* Now an vpunpck[lh]qdq will produce
51149 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
51151 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
51152 gen_lowpart (V4DImode, t2));
51154 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
51155 gen_lowpart (V4DImode, t2));
51157 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
51161 gcc_unreachable ();
51167 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51168 extract-even and extract-odd permutations. */
51171 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
51173 unsigned i, odd, nelt = d->nelt;
51176 if (odd != 0 && odd != 1)
51179 for (i = 1; i < nelt; ++i)
51180 if (d->perm[i] != 2 * i + odd)
51183 return expand_vec_perm_even_odd_1 (d, odd);
51186 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
51187 permutations. We assume that expand_vec_perm_1 has already failed. */
51190 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
51192 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
51193 machine_mode vmode = d->vmode;
51194 unsigned char perm2[4];
51195 rtx op0 = d->op0, dest;
51202 /* These are special-cased in sse.md so that we can optionally
51203 use the vbroadcast instruction. They expand to two insns
51204 if the input happens to be in a register. */
51205 gcc_unreachable ();
51211 /* These are always implementable using standard shuffle patterns. */
51212 gcc_unreachable ();
51216 /* These can be implemented via interleave. We save one insn by
51217 stopping once we have promoted to V4SImode and then use pshufd. */
51223 rtx (*gen) (rtx, rtx, rtx)
51224 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
51225 : gen_vec_interleave_lowv8hi;
51229 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
51230 : gen_vec_interleave_highv8hi;
51235 dest = gen_reg_rtx (vmode);
51236 emit_insn (gen (dest, op0, op0));
51237 vmode = get_mode_wider_vector (vmode);
51238 op0 = gen_lowpart (vmode, dest);
51240 while (vmode != V4SImode);
51242 memset (perm2, elt, 4);
51243 dest = gen_reg_rtx (V4SImode);
51244 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
51247 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
51255 /* For AVX2 broadcasts of the first element vpbroadcast* or
51256 vpermq should be used by expand_vec_perm_1. */
51257 gcc_assert (!TARGET_AVX2 || d->perm[0]);
51261 gcc_unreachable ();
51265 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51266 broadcast permutations. */
51269 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
51271 unsigned i, elt, nelt = d->nelt;
51273 if (!d->one_operand_p)
51277 for (i = 1; i < nelt; ++i)
51278 if (d->perm[i] != elt)
51281 return expand_vec_perm_broadcast_1 (d);
51284 /* Implement arbitrary permutations of two V64QImode operands
51285 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
51287 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
51289 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
51295 struct expand_vec_perm_d ds[2];
51296 rtx rperm[128], vperm, target0, target1;
51297 unsigned int i, nelt;
51298 machine_mode vmode;
51303 for (i = 0; i < 2; i++)
51306 ds[i].vmode = V32HImode;
51308 ds[i].target = gen_reg_rtx (V32HImode);
51309 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
51310 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
51313 /* Prepare permutations such that the first one takes care of
51314 putting the even bytes into the right positions or one higher
51315 positions (ds[0]) and the second one takes care of
51316 putting the odd bytes into the right positions or one below
51319 for (i = 0; i < nelt; i++)
51321 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
51324 rperm[i] = constm1_rtx;
51325 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51329 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51330 rperm[i + 64] = constm1_rtx;
51334 bool ok = expand_vec_perm_1 (&ds[0]);
51336 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
51338 ok = expand_vec_perm_1 (&ds[1]);
51340 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
51342 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
51343 vperm = force_reg (vmode, vperm);
51344 target0 = gen_reg_rtx (V64QImode);
51345 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
51347 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
51348 vperm = force_reg (vmode, vperm);
51349 target1 = gen_reg_rtx (V64QImode);
51350 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
51352 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
51356 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
51357 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
51358 all the shorter instruction sequences. */
51361 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
51363 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
51364 unsigned int i, nelt, eltsz;
51368 || d->one_operand_p
51369 || (d->vmode != V32QImode && d->vmode != V16HImode))
51376 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51378 /* Generate 4 permutation masks. If the required element is within
51379 the same lane, it is shuffled in. If the required element from the
51380 other lane, force a zero by setting bit 7 in the permutation mask.
51381 In the other mask the mask has non-negative elements if element
51382 is requested from the other lane, but also moved to the other lane,
51383 so that the result of vpshufb can have the two V2TImode halves
51385 m128 = GEN_INT (-128);
51386 for (i = 0; i < 32; ++i)
51388 rperm[0][i] = m128;
51389 rperm[1][i] = m128;
51390 rperm[2][i] = m128;
51391 rperm[3][i] = m128;
51397 for (i = 0; i < nelt; ++i)
51399 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51400 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51401 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
51403 for (j = 0; j < eltsz; ++j)
51404 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
51405 used[which] = true;
51408 for (i = 0; i < 2; ++i)
51410 if (!used[2 * i + 1])
51415 vperm = gen_rtx_CONST_VECTOR (V32QImode,
51416 gen_rtvec_v (32, rperm[2 * i + 1]));
51417 vperm = force_reg (V32QImode, vperm);
51418 h[i] = gen_reg_rtx (V32QImode);
51419 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51420 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
51423 /* Swap the 128-byte lanes of h[X]. */
51424 for (i = 0; i < 2; ++i)
51426 if (h[i] == NULL_RTX)
51428 op = gen_reg_rtx (V4DImode);
51429 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
51430 const2_rtx, GEN_INT (3), const0_rtx,
51432 h[i] = gen_lowpart (V32QImode, op);
51435 for (i = 0; i < 2; ++i)
51442 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
51443 vperm = force_reg (V32QImode, vperm);
51444 l[i] = gen_reg_rtx (V32QImode);
51445 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51446 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
51449 for (i = 0; i < 2; ++i)
51453 op = gen_reg_rtx (V32QImode);
51454 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
51461 gcc_assert (l[0] && l[1]);
51463 if (d->vmode != V32QImode)
51464 op = gen_reg_rtx (V32QImode);
51465 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
51466 if (op != d->target)
51467 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51471 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
51472 With all of the interface bits taken care of, perform the expansion
51473 in D and return true on success. */
51476 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
51478 /* Try a single instruction expansion. */
51479 if (expand_vec_perm_1 (d))
51482 /* Try sequences of two instructions. */
51484 if (expand_vec_perm_pshuflw_pshufhw (d))
51487 if (expand_vec_perm_palignr (d, false))
51490 if (expand_vec_perm_interleave2 (d))
51493 if (expand_vec_perm_broadcast (d))
51496 if (expand_vec_perm_vpermq_perm_1 (d))
51499 if (expand_vec_perm_vperm2f128 (d))
51502 if (expand_vec_perm_pblendv (d))
51505 /* Try sequences of three instructions. */
51507 if (expand_vec_perm_even_odd_pack (d))
51510 if (expand_vec_perm_2vperm2f128_vshuf (d))
51513 if (expand_vec_perm_pshufb2 (d))
51516 if (expand_vec_perm_interleave3 (d))
51519 if (expand_vec_perm_vperm2f128_vblend (d))
51522 /* Try sequences of four instructions. */
51524 if (expand_vec_perm_even_odd_trunc (d))
51526 if (expand_vec_perm_vpshufb2_vpermq (d))
51529 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
51532 if (expand_vec_perm_vpermi2_vpshub2 (d))
51535 /* ??? Look for narrow permutations whose element orderings would
51536 allow the promotion to a wider mode. */
51538 /* ??? Look for sequences of interleave or a wider permute that place
51539 the data into the correct lanes for a half-vector shuffle like
51540 pshuf[lh]w or vpermilps. */
51542 /* ??? Look for sequences of interleave that produce the desired results.
51543 The combinatorics of punpck[lh] get pretty ugly... */
51545 if (expand_vec_perm_even_odd (d))
51548 /* Even longer sequences. */
51549 if (expand_vec_perm_vpshufb4_vpermq2 (d))
51552 /* See if we can get the same permutation in different vector integer
51554 struct expand_vec_perm_d nd;
51555 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
51558 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
51565 /* If a permutation only uses one operand, make it clear. Returns true
51566 if the permutation references both operands. */
51569 canonicalize_perm (struct expand_vec_perm_d *d)
51571 int i, which, nelt = d->nelt;
51573 for (i = which = 0; i < nelt; ++i)
51574 which |= (d->perm[i] < nelt ? 1 : 2);
51576 d->one_operand_p = true;
51583 if (!rtx_equal_p (d->op0, d->op1))
51585 d->one_operand_p = false;
51588 /* The elements of PERM do not suggest that only the first operand
51589 is used, but both operands are identical. Allow easier matching
51590 of the permutation by folding the permutation into the single
51595 for (i = 0; i < nelt; ++i)
51596 d->perm[i] &= nelt - 1;
51605 return (which == 3);
51609 ix86_expand_vec_perm_const (rtx operands[4])
51611 struct expand_vec_perm_d d;
51612 unsigned char perm[MAX_VECT_LEN];
51617 d.target = operands[0];
51618 d.op0 = operands[1];
51619 d.op1 = operands[2];
51622 d.vmode = GET_MODE (d.target);
51623 gcc_assert (VECTOR_MODE_P (d.vmode));
51624 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51625 d.testing_p = false;
51627 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51628 gcc_assert (XVECLEN (sel, 0) == nelt);
51629 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51631 for (i = 0; i < nelt; ++i)
51633 rtx e = XVECEXP (sel, 0, i);
51634 int ei = INTVAL (e) & (2 * nelt - 1);
51639 two_args = canonicalize_perm (&d);
51641 if (ix86_expand_vec_perm_const_1 (&d))
51644 /* If the selector says both arguments are needed, but the operands are the
51645 same, the above tried to expand with one_operand_p and flattened selector.
51646 If that didn't work, retry without one_operand_p; we succeeded with that
51648 if (two_args && d.one_operand_p)
51650 d.one_operand_p = false;
51651 memcpy (d.perm, perm, sizeof (perm));
51652 return ix86_expand_vec_perm_const_1 (&d);
51658 /* Implement targetm.vectorize.vec_perm_const_ok. */
51661 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51662 const unsigned char *sel)
51664 struct expand_vec_perm_d d;
51665 unsigned int i, nelt, which;
51669 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51670 d.testing_p = true;
51672 /* Given sufficient ISA support we can just return true here
51673 for selected vector modes. */
51680 if (TARGET_AVX512F)
51681 /* All implementable with a single vpermi2 insn. */
51685 if (TARGET_AVX512BW)
51686 /* All implementable with a single vpermi2 insn. */
51690 if (TARGET_AVX512BW)
51691 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51698 if (TARGET_AVX512VL)
51699 /* All implementable with a single vpermi2 insn. */
51704 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51709 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51716 /* All implementable with a single vpperm insn. */
51719 /* All implementable with 2 pshufb + 1 ior. */
51725 /* All implementable with shufpd or unpck[lh]pd. */
51731 /* Extract the values from the vector CST into the permutation
51733 memcpy (d.perm, sel, nelt);
51734 for (i = which = 0; i < nelt; ++i)
51736 unsigned char e = d.perm[i];
51737 gcc_assert (e < 2 * nelt);
51738 which |= (e < nelt ? 1 : 2);
51741 /* For all elements from second vector, fold the elements to first. */
51743 for (i = 0; i < nelt; ++i)
51746 /* Check whether the mask can be applied to the vector type. */
51747 d.one_operand_p = (which != 3);
51749 /* Implementable with shufps or pshufd. */
51750 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51753 /* Otherwise we have to go through the motions and see if we can
51754 figure out how to generate the requested permutation. */
51755 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51756 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51757 if (!d.one_operand_p)
51758 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51761 ret = ix86_expand_vec_perm_const_1 (&d);
51768 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51770 struct expand_vec_perm_d d;
51776 d.vmode = GET_MODE (targ);
51777 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51778 d.one_operand_p = false;
51779 d.testing_p = false;
51781 for (i = 0; i < nelt; ++i)
51782 d.perm[i] = i * 2 + odd;
51784 /* We'll either be able to implement the permutation directly... */
51785 if (expand_vec_perm_1 (&d))
51788 /* ... or we use the special-case patterns. */
51789 expand_vec_perm_even_odd_1 (&d, odd);
51793 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51795 struct expand_vec_perm_d d;
51796 unsigned i, nelt, base;
51802 d.vmode = GET_MODE (targ);
51803 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51804 d.one_operand_p = false;
51805 d.testing_p = false;
51807 base = high_p ? nelt / 2 : 0;
51808 for (i = 0; i < nelt / 2; ++i)
51810 d.perm[i * 2] = i + base;
51811 d.perm[i * 2 + 1] = i + base + nelt;
51814 /* Note that for AVX this isn't one instruction. */
51815 ok = ix86_expand_vec_perm_const_1 (&d);
51820 /* Expand a vector operation CODE for a V*QImode in terms of the
51821 same operation on V*HImode. */
51824 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51826 machine_mode qimode = GET_MODE (dest);
51827 machine_mode himode;
51828 rtx (*gen_il) (rtx, rtx, rtx);
51829 rtx (*gen_ih) (rtx, rtx, rtx);
51830 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51831 struct expand_vec_perm_d d;
51832 bool ok, full_interleave;
51833 bool uns_p = false;
51840 gen_il = gen_vec_interleave_lowv16qi;
51841 gen_ih = gen_vec_interleave_highv16qi;
51844 himode = V16HImode;
51845 gen_il = gen_avx2_interleave_lowv32qi;
51846 gen_ih = gen_avx2_interleave_highv32qi;
51849 himode = V32HImode;
51850 gen_il = gen_avx512bw_interleave_lowv64qi;
51851 gen_ih = gen_avx512bw_interleave_highv64qi;
51854 gcc_unreachable ();
51857 op2_l = op2_h = op2;
51861 /* Unpack data such that we've got a source byte in each low byte of
51862 each word. We don't care what goes into the high byte of each word.
51863 Rather than trying to get zero in there, most convenient is to let
51864 it be a copy of the low byte. */
51865 op2_l = gen_reg_rtx (qimode);
51866 op2_h = gen_reg_rtx (qimode);
51867 emit_insn (gen_il (op2_l, op2, op2));
51868 emit_insn (gen_ih (op2_h, op2, op2));
51871 op1_l = gen_reg_rtx (qimode);
51872 op1_h = gen_reg_rtx (qimode);
51873 emit_insn (gen_il (op1_l, op1, op1));
51874 emit_insn (gen_ih (op1_h, op1, op1));
51875 full_interleave = qimode == V16QImode;
51883 op1_l = gen_reg_rtx (himode);
51884 op1_h = gen_reg_rtx (himode);
51885 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51886 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51887 full_interleave = true;
51890 gcc_unreachable ();
51893 /* Perform the operation. */
51894 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51896 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51898 gcc_assert (res_l && res_h);
51900 /* Merge the data back into the right place. */
51902 d.op0 = gen_lowpart (qimode, res_l);
51903 d.op1 = gen_lowpart (qimode, res_h);
51905 d.nelt = GET_MODE_NUNITS (qimode);
51906 d.one_operand_p = false;
51907 d.testing_p = false;
51909 if (full_interleave)
51911 /* For SSE2, we used an full interleave, so the desired
51912 results are in the even elements. */
51913 for (i = 0; i < 64; ++i)
51918 /* For AVX, the interleave used above was not cross-lane. So the
51919 extraction is evens but with the second and third quarter swapped.
51920 Happily, that is even one insn shorter than even extraction. */
51921 for (i = 0; i < 64; ++i)
51922 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
51925 ok = ix86_expand_vec_perm_const_1 (&d);
51928 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51929 gen_rtx_fmt_ee (code, qimode, op1, op2));
51932 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51933 if op is CONST_VECTOR with all odd elements equal to their
51934 preceding element. */
51937 const_vector_equal_evenodd_p (rtx op)
51939 machine_mode mode = GET_MODE (op);
51940 int i, nunits = GET_MODE_NUNITS (mode);
51941 if (GET_CODE (op) != CONST_VECTOR
51942 || nunits != CONST_VECTOR_NUNITS (op))
51944 for (i = 0; i < nunits; i += 2)
51945 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51951 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51952 bool uns_p, bool odd_p)
51954 machine_mode mode = GET_MODE (op1);
51955 machine_mode wmode = GET_MODE (dest);
51957 rtx orig_op1 = op1, orig_op2 = op2;
51959 if (!nonimmediate_operand (op1, mode))
51960 op1 = force_reg (mode, op1);
51961 if (!nonimmediate_operand (op2, mode))
51962 op2 = force_reg (mode, op2);
51964 /* We only play even/odd games with vectors of SImode. */
51965 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51967 /* If we're looking for the odd results, shift those members down to
51968 the even slots. For some cpus this is faster than a PSHUFD. */
51971 /* For XOP use vpmacsdqh, but only for smult, as it is only
51973 if (TARGET_XOP && mode == V4SImode && !uns_p)
51975 x = force_reg (wmode, CONST0_RTX (wmode));
51976 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
51980 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
51981 if (!const_vector_equal_evenodd_p (orig_op1))
51982 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
51983 x, NULL, 1, OPTAB_DIRECT);
51984 if (!const_vector_equal_evenodd_p (orig_op2))
51985 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
51986 x, NULL, 1, OPTAB_DIRECT);
51987 op1 = gen_lowpart (mode, op1);
51988 op2 = gen_lowpart (mode, op2);
51991 if (mode == V16SImode)
51994 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
51996 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
51998 else if (mode == V8SImode)
52001 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
52003 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
52006 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
52007 else if (TARGET_SSE4_1)
52008 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
52011 rtx s1, s2, t0, t1, t2;
52013 /* The easiest way to implement this without PMULDQ is to go through
52014 the motions as if we are performing a full 64-bit multiply. With
52015 the exception that we need to do less shuffling of the elements. */
52017 /* Compute the sign-extension, aka highparts, of the two operands. */
52018 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
52019 op1, pc_rtx, pc_rtx);
52020 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
52021 op2, pc_rtx, pc_rtx);
52023 /* Multiply LO(A) * HI(B), and vice-versa. */
52024 t1 = gen_reg_rtx (wmode);
52025 t2 = gen_reg_rtx (wmode);
52026 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
52027 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
52029 /* Multiply LO(A) * LO(B). */
52030 t0 = gen_reg_rtx (wmode);
52031 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
52033 /* Combine and shift the highparts into place. */
52034 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
52035 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
52038 /* Combine high and low parts. */
52039 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
52046 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
52047 bool uns_p, bool high_p)
52049 machine_mode wmode = GET_MODE (dest);
52050 machine_mode mode = GET_MODE (op1);
52051 rtx t1, t2, t3, t4, mask;
52056 t1 = gen_reg_rtx (mode);
52057 t2 = gen_reg_rtx (mode);
52058 if (TARGET_XOP && !uns_p)
52060 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
52061 shuffle the elements once so that all elements are in the right
52062 place for immediate use: { A C B D }. */
52063 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
52064 const1_rtx, GEN_INT (3)));
52065 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
52066 const1_rtx, GEN_INT (3)));
52070 /* Put the elements into place for the multiply. */
52071 ix86_expand_vec_interleave (t1, op1, op1, high_p);
52072 ix86_expand_vec_interleave (t2, op2, op2, high_p);
52075 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
52079 /* Shuffle the elements between the lanes. After this we
52080 have { A B E F | C D G H } for each operand. */
52081 t1 = gen_reg_rtx (V4DImode);
52082 t2 = gen_reg_rtx (V4DImode);
52083 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
52084 const0_rtx, const2_rtx,
52085 const1_rtx, GEN_INT (3)));
52086 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
52087 const0_rtx, const2_rtx,
52088 const1_rtx, GEN_INT (3)));
52090 /* Shuffle the elements within the lanes. After this we
52091 have { A A B B | C C D D } or { E E F F | G G H H }. */
52092 t3 = gen_reg_rtx (V8SImode);
52093 t4 = gen_reg_rtx (V8SImode);
52094 mask = GEN_INT (high_p
52095 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
52096 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
52097 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
52098 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
52100 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
52105 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
52106 uns_p, OPTAB_DIRECT);
52107 t2 = expand_binop (mode,
52108 uns_p ? umul_highpart_optab : smul_highpart_optab,
52109 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
52110 gcc_assert (t1 && t2);
52112 t3 = gen_reg_rtx (mode);
52113 ix86_expand_vec_interleave (t3, t1, t2, high_p);
52114 emit_move_insn (dest, gen_lowpart (wmode, t3));
52122 t1 = gen_reg_rtx (wmode);
52123 t2 = gen_reg_rtx (wmode);
52124 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
52125 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
52127 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
52131 gcc_unreachable ();
52136 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
52138 rtx res_1, res_2, res_3, res_4;
52140 res_1 = gen_reg_rtx (V4SImode);
52141 res_2 = gen_reg_rtx (V4SImode);
52142 res_3 = gen_reg_rtx (V2DImode);
52143 res_4 = gen_reg_rtx (V2DImode);
52144 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
52145 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
52147 /* Move the results in element 2 down to element 1; we don't care
52148 what goes in elements 2 and 3. Then we can merge the parts
52149 back together with an interleave.
52151 Note that two other sequences were tried:
52152 (1) Use interleaves at the start instead of psrldq, which allows
52153 us to use a single shufps to merge things back at the end.
52154 (2) Use shufps here to combine the two vectors, then pshufd to
52155 put the elements in the correct order.
52156 In both cases the cost of the reformatting stall was too high
52157 and the overall sequence slower. */
52159 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
52160 const0_rtx, const2_rtx,
52161 const0_rtx, const0_rtx));
52162 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
52163 const0_rtx, const2_rtx,
52164 const0_rtx, const0_rtx));
52165 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
52167 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
52171 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
52173 machine_mode mode = GET_MODE (op0);
52174 rtx t1, t2, t3, t4, t5, t6;
52176 if (TARGET_AVX512DQ && mode == V8DImode)
52177 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
52178 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
52179 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
52180 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
52181 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
52182 else if (TARGET_XOP && mode == V2DImode)
52184 /* op1: A,B,C,D, op2: E,F,G,H */
52185 op1 = gen_lowpart (V4SImode, op1);
52186 op2 = gen_lowpart (V4SImode, op2);
52188 t1 = gen_reg_rtx (V4SImode);
52189 t2 = gen_reg_rtx (V4SImode);
52190 t3 = gen_reg_rtx (V2DImode);
52191 t4 = gen_reg_rtx (V2DImode);
52194 emit_insn (gen_sse2_pshufd_1 (t1, op1,
52200 /* t2: (B*E),(A*F),(D*G),(C*H) */
52201 emit_insn (gen_mulv4si3 (t2, t1, op2));
52203 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
52204 emit_insn (gen_xop_phadddq (t3, t2));
52206 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
52207 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
52209 /* Multiply lower parts and add all */
52210 t5 = gen_reg_rtx (V2DImode);
52211 emit_insn (gen_vec_widen_umult_even_v4si (t5,
52212 gen_lowpart (V4SImode, op1),
52213 gen_lowpart (V4SImode, op2)));
52214 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
52219 machine_mode nmode;
52220 rtx (*umul) (rtx, rtx, rtx);
52222 if (mode == V2DImode)
52224 umul = gen_vec_widen_umult_even_v4si;
52227 else if (mode == V4DImode)
52229 umul = gen_vec_widen_umult_even_v8si;
52232 else if (mode == V8DImode)
52234 umul = gen_vec_widen_umult_even_v16si;
52238 gcc_unreachable ();
52241 /* Multiply low parts. */
52242 t1 = gen_reg_rtx (mode);
52243 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
52245 /* Shift input vectors right 32 bits so we can multiply high parts. */
52247 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
52248 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
52250 /* Multiply high parts by low parts. */
52251 t4 = gen_reg_rtx (mode);
52252 t5 = gen_reg_rtx (mode);
52253 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
52254 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
52256 /* Combine and shift the highparts back. */
52257 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
52258 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
52260 /* Combine high and low parts. */
52261 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
52264 set_unique_reg_note (get_last_insn (), REG_EQUAL,
52265 gen_rtx_MULT (mode, op1, op2));
52268 /* Return 1 if control tansfer instruction INSN
52269 should be encoded with bnd prefix.
52270 If insn is NULL then return 1 when control
52271 transfer instructions should be prefixed with
52272 bnd by default for current function. */
52275 ix86_bnd_prefixed_insn_p (rtx insn)
52277 /* For call insns check special flag. */
52278 if (insn && CALL_P (insn))
52280 rtx call = get_call_rtx_from (insn);
52282 return CALL_EXPR_WITH_BOUNDS_P (call);
52285 /* All other insns are prefixed only if function is instrumented. */
52286 return chkp_function_instrumented_p (current_function_decl);
52289 /* Calculate integer abs() using only SSE2 instructions. */
52292 ix86_expand_sse2_abs (rtx target, rtx input)
52294 machine_mode mode = GET_MODE (target);
52299 /* For 32-bit signed integer X, the best way to calculate the absolute
52300 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
52302 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
52303 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
52304 NULL, 0, OPTAB_DIRECT);
52305 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
52306 NULL, 0, OPTAB_DIRECT);
52307 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
52308 target, 0, OPTAB_DIRECT);
52311 /* For 16-bit signed integer X, the best way to calculate the absolute
52312 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
52314 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52316 x = expand_simple_binop (mode, SMAX, tmp0, input,
52317 target, 0, OPTAB_DIRECT);
52320 /* For 8-bit signed integer X, the best way to calculate the absolute
52321 value of X is min ((unsigned char) X, (unsigned char) (-X)),
52322 as SSE2 provides the PMINUB insn. */
52324 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52326 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
52327 target, 0, OPTAB_DIRECT);
52331 gcc_unreachable ();
52335 emit_move_insn (target, x);
52338 /* Expand an extract from a vector register through pextr insn.
52339 Return true if successful. */
52342 ix86_expand_pextr (rtx *operands)
52344 rtx dst = operands[0];
52345 rtx src = operands[1];
52347 unsigned int size = INTVAL (operands[2]);
52348 unsigned int pos = INTVAL (operands[3]);
52350 if (SUBREG_P (dst))
52352 /* Reject non-lowpart subregs. */
52353 if (SUBREG_BYTE (dst) > 0)
52355 dst = SUBREG_REG (dst);
52358 if (SUBREG_P (src))
52360 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
52361 src = SUBREG_REG (src);
52364 switch (GET_MODE (src))
52373 machine_mode srcmode, dstmode;
52376 dstmode = mode_for_size (size, MODE_INT, 0);
52381 if (!TARGET_SSE4_1)
52383 srcmode = V16QImode;
52389 srcmode = V8HImode;
52393 if (!TARGET_SSE4_1)
52395 srcmode = V4SImode;
52399 gcc_assert (TARGET_64BIT);
52400 if (!TARGET_SSE4_1)
52402 srcmode = V2DImode;
52409 /* Reject extractions from misaligned positions. */
52410 if (pos & (size-1))
52413 if (GET_MODE (dst) == dstmode)
52416 d = gen_reg_rtx (dstmode);
52418 /* Construct insn pattern. */
52419 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
52420 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
52422 /* Let the rtl optimizers know about the zero extension performed. */
52423 if (dstmode == QImode || dstmode == HImode)
52425 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
52426 d = gen_lowpart (SImode, d);
52429 emit_insn (gen_rtx_SET (d, pat));
52432 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52441 /* Expand an insert into a vector register through pinsr insn.
52442 Return true if successful. */
52445 ix86_expand_pinsr (rtx *operands)
52447 rtx dst = operands[0];
52448 rtx src = operands[3];
52450 unsigned int size = INTVAL (operands[1]);
52451 unsigned int pos = INTVAL (operands[2]);
52453 if (SUBREG_P (dst))
52455 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
52456 dst = SUBREG_REG (dst);
52459 switch (GET_MODE (dst))
52468 machine_mode srcmode, dstmode;
52469 rtx (*pinsr)(rtx, rtx, rtx, rtx);
52472 srcmode = mode_for_size (size, MODE_INT, 0);
52477 if (!TARGET_SSE4_1)
52479 dstmode = V16QImode;
52480 pinsr = gen_sse4_1_pinsrb;
52486 dstmode = V8HImode;
52487 pinsr = gen_sse2_pinsrw;
52491 if (!TARGET_SSE4_1)
52493 dstmode = V4SImode;
52494 pinsr = gen_sse4_1_pinsrd;
52498 gcc_assert (TARGET_64BIT);
52499 if (!TARGET_SSE4_1)
52501 dstmode = V2DImode;
52502 pinsr = gen_sse4_1_pinsrq;
52509 /* Reject insertions to misaligned positions. */
52510 if (pos & (size-1))
52513 if (SUBREG_P (src))
52515 unsigned int srcpos = SUBREG_BYTE (src);
52521 extr_ops[0] = gen_reg_rtx (srcmode);
52522 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
52523 extr_ops[2] = GEN_INT (size);
52524 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
52526 if (!ix86_expand_pextr (extr_ops))
52532 src = gen_lowpart (srcmode, SUBREG_REG (src));
52535 if (GET_MODE (dst) == dstmode)
52538 d = gen_reg_rtx (dstmode);
52540 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
52541 gen_lowpart (srcmode, src),
52542 GEN_INT (1 << (pos / size))));
52544 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52553 /* This function returns the calling abi specific va_list type node.
52554 It returns the FNDECL specific va_list type. */
52557 ix86_fn_abi_va_list (tree fndecl)
52560 return va_list_type_node;
52561 gcc_assert (fndecl != NULL_TREE);
52563 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
52564 return ms_va_list_type_node;
52566 return sysv_va_list_type_node;
52569 /* Returns the canonical va_list type specified by TYPE. If there
52570 is no valid TYPE provided, it return NULL_TREE. */
52573 ix86_canonical_va_list_type (tree type)
52577 /* Resolve references and pointers to va_list type. */
52578 if (TREE_CODE (type) == MEM_REF)
52579 type = TREE_TYPE (type);
52580 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
52581 type = TREE_TYPE (type);
52582 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
52583 type = TREE_TYPE (type);
52585 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
52587 wtype = va_list_type_node;
52588 gcc_assert (wtype != NULL_TREE);
52590 if (TREE_CODE (wtype) == ARRAY_TYPE)
52592 /* If va_list is an array type, the argument may have decayed
52593 to a pointer type, e.g. by being passed to another function.
52594 In that case, unwrap both types so that we can compare the
52595 underlying records. */
52596 if (TREE_CODE (htype) == ARRAY_TYPE
52597 || POINTER_TYPE_P (htype))
52599 wtype = TREE_TYPE (wtype);
52600 htype = TREE_TYPE (htype);
52603 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52604 return va_list_type_node;
52605 wtype = sysv_va_list_type_node;
52606 gcc_assert (wtype != NULL_TREE);
52608 if (TREE_CODE (wtype) == ARRAY_TYPE)
52610 /* If va_list is an array type, the argument may have decayed
52611 to a pointer type, e.g. by being passed to another function.
52612 In that case, unwrap both types so that we can compare the
52613 underlying records. */
52614 if (TREE_CODE (htype) == ARRAY_TYPE
52615 || POINTER_TYPE_P (htype))
52617 wtype = TREE_TYPE (wtype);
52618 htype = TREE_TYPE (htype);
52621 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52622 return sysv_va_list_type_node;
52623 wtype = ms_va_list_type_node;
52624 gcc_assert (wtype != NULL_TREE);
52626 if (TREE_CODE (wtype) == ARRAY_TYPE)
52628 /* If va_list is an array type, the argument may have decayed
52629 to a pointer type, e.g. by being passed to another function.
52630 In that case, unwrap both types so that we can compare the
52631 underlying records. */
52632 if (TREE_CODE (htype) == ARRAY_TYPE
52633 || POINTER_TYPE_P (htype))
52635 wtype = TREE_TYPE (wtype);
52636 htype = TREE_TYPE (htype);
52639 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52640 return ms_va_list_type_node;
52643 return std_canonical_va_list_type (type);
52646 /* Iterate through the target-specific builtin types for va_list.
52647 IDX denotes the iterator, *PTREE is set to the result type of
52648 the va_list builtin, and *PNAME to its internal type.
52649 Returns zero if there is no element for this index, otherwise
52650 IDX should be increased upon the next call.
52651 Note, do not iterate a base builtin's name like __builtin_va_list.
52652 Used from c_common_nodes_and_builtins. */
52655 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52665 *ptree = ms_va_list_type_node;
52666 *pname = "__builtin_ms_va_list";
52670 *ptree = sysv_va_list_type_node;
52671 *pname = "__builtin_sysv_va_list";
52679 #undef TARGET_SCHED_DISPATCH
52680 #define TARGET_SCHED_DISPATCH has_dispatch
52681 #undef TARGET_SCHED_DISPATCH_DO
52682 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52683 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52684 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52685 #undef TARGET_SCHED_REORDER
52686 #define TARGET_SCHED_REORDER ix86_sched_reorder
52687 #undef TARGET_SCHED_ADJUST_PRIORITY
52688 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52689 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52690 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52691 ix86_dependencies_evaluation_hook
52693 /* The size of the dispatch window is the total number of bytes of
52694 object code allowed in a window. */
52695 #define DISPATCH_WINDOW_SIZE 16
52697 /* Number of dispatch windows considered for scheduling. */
52698 #define MAX_DISPATCH_WINDOWS 3
52700 /* Maximum number of instructions in a window. */
52703 /* Maximum number of immediate operands in a window. */
52706 /* Maximum number of immediate bits allowed in a window. */
52707 #define MAX_IMM_SIZE 128
52709 /* Maximum number of 32 bit immediates allowed in a window. */
52710 #define MAX_IMM_32 4
52712 /* Maximum number of 64 bit immediates allowed in a window. */
52713 #define MAX_IMM_64 2
52715 /* Maximum total of loads or prefetches allowed in a window. */
52718 /* Maximum total of stores allowed in a window. */
52719 #define MAX_STORE 1
52725 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52726 enum dispatch_group {
52741 /* Number of allowable groups in a dispatch window. It is an array
52742 indexed by dispatch_group enum. 100 is used as a big number,
52743 because the number of these kind of operations does not have any
52744 effect in dispatch window, but we need them for other reasons in
52746 static unsigned int num_allowable_groups[disp_last] = {
52747 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52750 char group_name[disp_last + 1][16] = {
52751 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52752 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52753 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52756 /* Instruction path. */
52759 path_single, /* Single micro op. */
52760 path_double, /* Double micro op. */
52761 path_multi, /* Instructions with more than 2 micro op.. */
52765 /* sched_insn_info defines a window to the instructions scheduled in
52766 the basic block. It contains a pointer to the insn_info table and
52767 the instruction scheduled.
52769 Windows are allocated for each basic block and are linked
52771 typedef struct sched_insn_info_s {
52773 enum dispatch_group group;
52774 enum insn_path path;
52779 /* Linked list of dispatch windows. This is a two way list of
52780 dispatch windows of a basic block. It contains information about
52781 the number of uops in the window and the total number of
52782 instructions and of bytes in the object code for this dispatch
52784 typedef struct dispatch_windows_s {
52785 int num_insn; /* Number of insn in the window. */
52786 int num_uops; /* Number of uops in the window. */
52787 int window_size; /* Number of bytes in the window. */
52788 int window_num; /* Window number between 0 or 1. */
52789 int num_imm; /* Number of immediates in an insn. */
52790 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52791 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52792 int imm_size; /* Total immediates in the window. */
52793 int num_loads; /* Total memory loads in the window. */
52794 int num_stores; /* Total memory stores in the window. */
52795 int violation; /* Violation exists in window. */
52796 sched_insn_info *window; /* Pointer to the window. */
52797 struct dispatch_windows_s *next;
52798 struct dispatch_windows_s *prev;
52799 } dispatch_windows;
52801 /* Immediate valuse used in an insn. */
52802 typedef struct imm_info_s
52809 static dispatch_windows *dispatch_window_list;
52810 static dispatch_windows *dispatch_window_list1;
52812 /* Get dispatch group of insn. */
52814 static enum dispatch_group
52815 get_mem_group (rtx_insn *insn)
52817 enum attr_memory memory;
52819 if (INSN_CODE (insn) < 0)
52820 return disp_no_group;
52821 memory = get_attr_memory (insn);
52822 if (memory == MEMORY_STORE)
52825 if (memory == MEMORY_LOAD)
52828 if (memory == MEMORY_BOTH)
52829 return disp_load_store;
52831 return disp_no_group;
52834 /* Return true if insn is a compare instruction. */
52837 is_cmp (rtx_insn *insn)
52839 enum attr_type type;
52841 type = get_attr_type (insn);
52842 return (type == TYPE_TEST
52843 || type == TYPE_ICMP
52844 || type == TYPE_FCMP
52845 || GET_CODE (PATTERN (insn)) == COMPARE);
52848 /* Return true if a dispatch violation encountered. */
52851 dispatch_violation (void)
52853 if (dispatch_window_list->next)
52854 return dispatch_window_list->next->violation;
52855 return dispatch_window_list->violation;
52858 /* Return true if insn is a branch instruction. */
52861 is_branch (rtx_insn *insn)
52863 return (CALL_P (insn) || JUMP_P (insn));
52866 /* Return true if insn is a prefetch instruction. */
52869 is_prefetch (rtx_insn *insn)
52871 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52874 /* This function initializes a dispatch window and the list container holding a
52875 pointer to the window. */
52878 init_window (int window_num)
52881 dispatch_windows *new_list;
52883 if (window_num == 0)
52884 new_list = dispatch_window_list;
52886 new_list = dispatch_window_list1;
52888 new_list->num_insn = 0;
52889 new_list->num_uops = 0;
52890 new_list->window_size = 0;
52891 new_list->next = NULL;
52892 new_list->prev = NULL;
52893 new_list->window_num = window_num;
52894 new_list->num_imm = 0;
52895 new_list->num_imm_32 = 0;
52896 new_list->num_imm_64 = 0;
52897 new_list->imm_size = 0;
52898 new_list->num_loads = 0;
52899 new_list->num_stores = 0;
52900 new_list->violation = false;
52902 for (i = 0; i < MAX_INSN; i++)
52904 new_list->window[i].insn = NULL;
52905 new_list->window[i].group = disp_no_group;
52906 new_list->window[i].path = no_path;
52907 new_list->window[i].byte_len = 0;
52908 new_list->window[i].imm_bytes = 0;
52913 /* This function allocates and initializes a dispatch window and the
52914 list container holding a pointer to the window. */
52916 static dispatch_windows *
52917 allocate_window (void)
52919 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52920 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52925 /* This routine initializes the dispatch scheduling information. It
52926 initiates building dispatch scheduler tables and constructs the
52927 first dispatch window. */
52930 init_dispatch_sched (void)
52932 /* Allocate a dispatch list and a window. */
52933 dispatch_window_list = allocate_window ();
52934 dispatch_window_list1 = allocate_window ();
52939 /* This function returns true if a branch is detected. End of a basic block
52940 does not have to be a branch, but here we assume only branches end a
52944 is_end_basic_block (enum dispatch_group group)
52946 return group == disp_branch;
52949 /* This function is called when the end of a window processing is reached. */
52952 process_end_window (void)
52954 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52955 if (dispatch_window_list->next)
52957 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52958 gcc_assert (dispatch_window_list->window_size
52959 + dispatch_window_list1->window_size <= 48);
52965 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52966 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52967 for 48 bytes of instructions. Note that these windows are not dispatch
52968 windows that their sizes are DISPATCH_WINDOW_SIZE. */
52970 static dispatch_windows *
52971 allocate_next_window (int window_num)
52973 if (window_num == 0)
52975 if (dispatch_window_list->next)
52978 return dispatch_window_list;
52981 dispatch_window_list->next = dispatch_window_list1;
52982 dispatch_window_list1->prev = dispatch_window_list;
52984 return dispatch_window_list1;
52987 /* Compute number of immediate operands of an instruction. */
52990 find_constant (rtx in_rtx, imm_info *imm_values)
52992 if (INSN_P (in_rtx))
52993 in_rtx = PATTERN (in_rtx);
52994 subrtx_iterator::array_type array;
52995 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
52996 if (const_rtx x = *iter)
52997 switch (GET_CODE (x))
53002 (imm_values->imm)++;
53003 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
53004 (imm_values->imm32)++;
53006 (imm_values->imm64)++;
53010 case CONST_WIDE_INT:
53011 (imm_values->imm)++;
53012 (imm_values->imm64)++;
53016 if (LABEL_KIND (x) == LABEL_NORMAL)
53018 (imm_values->imm)++;
53019 (imm_values->imm32)++;
53028 /* Return total size of immediate operands of an instruction along with number
53029 of corresponding immediate-operands. It initializes its parameters to zero
53030 befor calling FIND_CONSTANT.
53031 INSN is the input instruction. IMM is the total of immediates.
53032 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
53036 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
53038 imm_info imm_values = {0, 0, 0};
53040 find_constant (insn, &imm_values);
53041 *imm = imm_values.imm;
53042 *imm32 = imm_values.imm32;
53043 *imm64 = imm_values.imm64;
53044 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
53047 /* This function indicates if an operand of an instruction is an
53051 has_immediate (rtx_insn *insn)
53053 int num_imm_operand;
53054 int num_imm32_operand;
53055 int num_imm64_operand;
53058 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53059 &num_imm64_operand);
53063 /* Return single or double path for instructions. */
53065 static enum insn_path
53066 get_insn_path (rtx_insn *insn)
53068 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
53070 if ((int)path == 0)
53071 return path_single;
53073 if ((int)path == 1)
53074 return path_double;
53079 /* Return insn dispatch group. */
53081 static enum dispatch_group
53082 get_insn_group (rtx_insn *insn)
53084 enum dispatch_group group = get_mem_group (insn);
53088 if (is_branch (insn))
53089 return disp_branch;
53094 if (has_immediate (insn))
53097 if (is_prefetch (insn))
53098 return disp_prefetch;
53100 return disp_no_group;
53103 /* Count number of GROUP restricted instructions in a dispatch
53104 window WINDOW_LIST. */
53107 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
53109 enum dispatch_group group = get_insn_group (insn);
53111 int num_imm_operand;
53112 int num_imm32_operand;
53113 int num_imm64_operand;
53115 if (group == disp_no_group)
53118 if (group == disp_imm)
53120 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53121 &num_imm64_operand);
53122 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
53123 || num_imm_operand + window_list->num_imm > MAX_IMM
53124 || (num_imm32_operand > 0
53125 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
53126 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
53127 || (num_imm64_operand > 0
53128 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
53129 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
53130 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
53131 && num_imm64_operand > 0
53132 && ((window_list->num_imm_64 > 0
53133 && window_list->num_insn >= 2)
53134 || window_list->num_insn >= 3)))
53140 if ((group == disp_load_store
53141 && (window_list->num_loads >= MAX_LOAD
53142 || window_list->num_stores >= MAX_STORE))
53143 || ((group == disp_load
53144 || group == disp_prefetch)
53145 && window_list->num_loads >= MAX_LOAD)
53146 || (group == disp_store
53147 && window_list->num_stores >= MAX_STORE))
53153 /* This function returns true if insn satisfies dispatch rules on the
53154 last window scheduled. */
53157 fits_dispatch_window (rtx_insn *insn)
53159 dispatch_windows *window_list = dispatch_window_list;
53160 dispatch_windows *window_list_next = dispatch_window_list->next;
53161 unsigned int num_restrict;
53162 enum dispatch_group group = get_insn_group (insn);
53163 enum insn_path path = get_insn_path (insn);
53166 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
53167 instructions should be given the lowest priority in the
53168 scheduling process in Haifa scheduler to make sure they will be
53169 scheduled in the same dispatch window as the reference to them. */
53170 if (group == disp_jcc || group == disp_cmp)
53173 /* Check nonrestricted. */
53174 if (group == disp_no_group || group == disp_branch)
53177 /* Get last dispatch window. */
53178 if (window_list_next)
53179 window_list = window_list_next;
53181 if (window_list->window_num == 1)
53183 sum = window_list->prev->window_size + window_list->window_size;
53186 || (min_insn_size (insn) + sum) >= 48)
53187 /* Window 1 is full. Go for next window. */
53191 num_restrict = count_num_restricted (insn, window_list);
53193 if (num_restrict > num_allowable_groups[group])
53196 /* See if it fits in the first window. */
53197 if (window_list->window_num == 0)
53199 /* The first widow should have only single and double path
53201 if (path == path_double
53202 && (window_list->num_uops + 2) > MAX_INSN)
53204 else if (path != path_single)
53210 /* Add an instruction INSN with NUM_UOPS micro-operations to the
53211 dispatch window WINDOW_LIST. */
53214 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
53216 int byte_len = min_insn_size (insn);
53217 int num_insn = window_list->num_insn;
53219 sched_insn_info *window = window_list->window;
53220 enum dispatch_group group = get_insn_group (insn);
53221 enum insn_path path = get_insn_path (insn);
53222 int num_imm_operand;
53223 int num_imm32_operand;
53224 int num_imm64_operand;
53226 if (!window_list->violation && group != disp_cmp
53227 && !fits_dispatch_window (insn))
53228 window_list->violation = true;
53230 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53231 &num_imm64_operand);
53233 /* Initialize window with new instruction. */
53234 window[num_insn].insn = insn;
53235 window[num_insn].byte_len = byte_len;
53236 window[num_insn].group = group;
53237 window[num_insn].path = path;
53238 window[num_insn].imm_bytes = imm_size;
53240 window_list->window_size += byte_len;
53241 window_list->num_insn = num_insn + 1;
53242 window_list->num_uops = window_list->num_uops + num_uops;
53243 window_list->imm_size += imm_size;
53244 window_list->num_imm += num_imm_operand;
53245 window_list->num_imm_32 += num_imm32_operand;
53246 window_list->num_imm_64 += num_imm64_operand;
53248 if (group == disp_store)
53249 window_list->num_stores += 1;
53250 else if (group == disp_load
53251 || group == disp_prefetch)
53252 window_list->num_loads += 1;
53253 else if (group == disp_load_store)
53255 window_list->num_stores += 1;
53256 window_list->num_loads += 1;
53260 /* Adds a scheduled instruction, INSN, to the current dispatch window.
53261 If the total bytes of instructions or the number of instructions in
53262 the window exceed allowable, it allocates a new window. */
53265 add_to_dispatch_window (rtx_insn *insn)
53268 dispatch_windows *window_list;
53269 dispatch_windows *next_list;
53270 dispatch_windows *window0_list;
53271 enum insn_path path;
53272 enum dispatch_group insn_group;
53280 if (INSN_CODE (insn) < 0)
53283 byte_len = min_insn_size (insn);
53284 window_list = dispatch_window_list;
53285 next_list = window_list->next;
53286 path = get_insn_path (insn);
53287 insn_group = get_insn_group (insn);
53289 /* Get the last dispatch window. */
53291 window_list = dispatch_window_list->next;
53293 if (path == path_single)
53295 else if (path == path_double)
53298 insn_num_uops = (int) path;
53300 /* If current window is full, get a new window.
53301 Window number zero is full, if MAX_INSN uops are scheduled in it.
53302 Window number one is full, if window zero's bytes plus window
53303 one's bytes is 32, or if the bytes of the new instruction added
53304 to the total makes it greater than 48, or it has already MAX_INSN
53305 instructions in it. */
53306 num_insn = window_list->num_insn;
53307 num_uops = window_list->num_uops;
53308 window_num = window_list->window_num;
53309 insn_fits = fits_dispatch_window (insn);
53311 if (num_insn >= MAX_INSN
53312 || num_uops + insn_num_uops > MAX_INSN
53315 window_num = ~window_num & 1;
53316 window_list = allocate_next_window (window_num);
53319 if (window_num == 0)
53321 add_insn_window (insn, window_list, insn_num_uops);
53322 if (window_list->num_insn >= MAX_INSN
53323 && insn_group == disp_branch)
53325 process_end_window ();
53329 else if (window_num == 1)
53331 window0_list = window_list->prev;
53332 sum = window0_list->window_size + window_list->window_size;
53334 || (byte_len + sum) >= 48)
53336 process_end_window ();
53337 window_list = dispatch_window_list;
53340 add_insn_window (insn, window_list, insn_num_uops);
53343 gcc_unreachable ();
53345 if (is_end_basic_block (insn_group))
53347 /* End of basic block is reached do end-basic-block process. */
53348 process_end_window ();
53353 /* Print the dispatch window, WINDOW_NUM, to FILE. */
53355 DEBUG_FUNCTION static void
53356 debug_dispatch_window_file (FILE *file, int window_num)
53358 dispatch_windows *list;
53361 if (window_num == 0)
53362 list = dispatch_window_list;
53364 list = dispatch_window_list1;
53366 fprintf (file, "Window #%d:\n", list->window_num);
53367 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
53368 list->num_insn, list->num_uops, list->window_size);
53369 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53370 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
53372 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
53374 fprintf (file, " insn info:\n");
53376 for (i = 0; i < MAX_INSN; i++)
53378 if (!list->window[i].insn)
53380 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
53381 i, group_name[list->window[i].group],
53382 i, (void *)list->window[i].insn,
53383 i, list->window[i].path,
53384 i, list->window[i].byte_len,
53385 i, list->window[i].imm_bytes);
53389 /* Print to stdout a dispatch window. */
53391 DEBUG_FUNCTION void
53392 debug_dispatch_window (int window_num)
53394 debug_dispatch_window_file (stdout, window_num);
53397 /* Print INSN dispatch information to FILE. */
53399 DEBUG_FUNCTION static void
53400 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
53403 enum insn_path path;
53404 enum dispatch_group group;
53406 int num_imm_operand;
53407 int num_imm32_operand;
53408 int num_imm64_operand;
53410 if (INSN_CODE (insn) < 0)
53413 byte_len = min_insn_size (insn);
53414 path = get_insn_path (insn);
53415 group = get_insn_group (insn);
53416 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53417 &num_imm64_operand);
53419 fprintf (file, " insn info:\n");
53420 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
53421 group_name[group], path, byte_len);
53422 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53423 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
53426 /* Print to STDERR the status of the ready list with respect to
53427 dispatch windows. */
53429 DEBUG_FUNCTION void
53430 debug_ready_dispatch (void)
53433 int no_ready = number_in_ready ();
53435 fprintf (stdout, "Number of ready: %d\n", no_ready);
53437 for (i = 0; i < no_ready; i++)
53438 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
53441 /* This routine is the driver of the dispatch scheduler. */
53444 do_dispatch (rtx_insn *insn, int mode)
53446 if (mode == DISPATCH_INIT)
53447 init_dispatch_sched ();
53448 else if (mode == ADD_TO_DISPATCH_WINDOW)
53449 add_to_dispatch_window (insn);
53452 /* Return TRUE if Dispatch Scheduling is supported. */
53455 has_dispatch (rtx_insn *insn, int action)
53457 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
53458 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
53464 case IS_DISPATCH_ON:
53469 return is_cmp (insn);
53471 case DISPATCH_VIOLATION:
53472 return dispatch_violation ();
53474 case FITS_DISPATCH_WINDOW:
53475 return fits_dispatch_window (insn);
53481 /* Implementation of reassociation_width target hook used by
53482 reassoc phase to identify parallelism level in reassociated
53483 tree. Statements tree_code is passed in OPC. Arguments type
53486 Currently parallel reassociation is enabled for Atom
53487 processors only and we set reassociation width to be 2
53488 because Atom may issue up to 2 instructions per cycle.
53490 Return value should be fixed if parallel reassociation is
53491 enabled for other processors. */
53494 ix86_reassociation_width (unsigned int, machine_mode mode)
53497 if (VECTOR_MODE_P (mode))
53499 if (TARGET_VECTOR_PARALLEL_EXECUTION)
53506 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
53508 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
53509 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
53514 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
53515 place emms and femms instructions. */
53517 static machine_mode
53518 ix86_preferred_simd_mode (machine_mode mode)
53526 return TARGET_AVX512BW ? V64QImode :
53527 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
53529 return TARGET_AVX512BW ? V32HImode :
53530 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
53532 return TARGET_AVX512F ? V16SImode :
53533 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
53535 return TARGET_AVX512F ? V8DImode :
53536 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
53539 if (TARGET_AVX512F)
53541 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53547 if (!TARGET_VECTORIZE_DOUBLE)
53549 else if (TARGET_AVX512F)
53551 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53553 else if (TARGET_SSE2)
53562 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
53563 vectors. If AVX512F is enabled then try vectorizing with 512bit,
53564 256bit and 128bit vectors. */
53566 static unsigned int
53567 ix86_autovectorize_vector_sizes (void)
53569 return TARGET_AVX512F ? 64 | 32 | 16 :
53570 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
53573 /* Implemenation of targetm.vectorize.get_mask_mode. */
53575 static machine_mode
53576 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
53578 unsigned elem_size = vector_size / nunits;
53580 /* Scalar mask case. */
53581 if ((TARGET_AVX512F && vector_size == 64)
53582 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
53584 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
53585 return smallest_mode_for_size (nunits, MODE_INT);
53588 machine_mode elem_mode
53589 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
53591 gcc_assert (elem_size * nunits == vector_size);
53593 return mode_for_vector (elem_mode, nunits);
53598 /* Return class of registers which could be used for pseudo of MODE
53599 and of class RCLASS for spilling instead of memory. Return NO_REGS
53600 if it is not possible or non-profitable. */
53602 ix86_spill_class (reg_class_t rclass, machine_mode mode)
53604 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
53605 && (mode == SImode || (TARGET_64BIT && mode == DImode))
53606 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
53607 return ALL_SSE_REGS;
53611 /* Implement targetm.vectorize.init_cost. */
53614 ix86_init_cost (struct loop *)
53616 unsigned *cost = XNEWVEC (unsigned, 3);
53617 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53621 /* Implement targetm.vectorize.add_stmt_cost. */
53624 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53625 struct _stmt_vec_info *stmt_info, int misalign,
53626 enum vect_cost_model_location where)
53628 unsigned *cost = (unsigned *) data;
53629 unsigned retval = 0;
53631 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53632 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53634 /* Statements in an inner loop relative to the loop being
53635 vectorized are weighted more heavily. The value here is
53636 arbitrary and could potentially be improved with analysis. */
53637 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53638 count *= 50; /* FIXME. */
53640 retval = (unsigned) (count * stmt_cost);
53642 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53643 for Silvermont as it has out of order integer pipeline and can execute
53644 2 scalar instruction per tick, but has in order SIMD pipeline. */
53645 if (TARGET_SILVERMONT || TARGET_INTEL)
53646 if (stmt_info && stmt_info->stmt)
53648 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53649 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53650 retval = (retval * 17) / 10;
53653 cost[where] += retval;
53658 /* Implement targetm.vectorize.finish_cost. */
53661 ix86_finish_cost (void *data, unsigned *prologue_cost,
53662 unsigned *body_cost, unsigned *epilogue_cost)
53664 unsigned *cost = (unsigned *) data;
53665 *prologue_cost = cost[vect_prologue];
53666 *body_cost = cost[vect_body];
53667 *epilogue_cost = cost[vect_epilogue];
53670 /* Implement targetm.vectorize.destroy_cost_data. */
53673 ix86_destroy_cost_data (void *data)
53678 /* Validate target specific memory model bits in VAL. */
53680 static unsigned HOST_WIDE_INT
53681 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53683 enum memmodel model = memmodel_from_int (val);
53686 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53688 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53690 warning (OPT_Winvalid_memory_model,
53691 "Unknown architecture specific memory model");
53692 return MEMMODEL_SEQ_CST;
53694 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53695 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53697 warning (OPT_Winvalid_memory_model,
53698 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53699 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53701 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53703 warning (OPT_Winvalid_memory_model,
53704 "HLE_RELEASE not used with RELEASE or stronger memory model");
53705 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53710 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53711 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53712 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53713 or number of vecsize_mangle variants that should be emitted. */
53716 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53717 struct cgraph_simd_clone *clonei,
53718 tree base_type, int num)
53722 if (clonei->simdlen
53723 && (clonei->simdlen < 2
53724 || clonei->simdlen > 16
53725 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53727 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53728 "unsupported simdlen %d", clonei->simdlen);
53732 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53733 if (TREE_CODE (ret_type) != VOID_TYPE)
53734 switch (TYPE_MODE (ret_type))
53746 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53747 "unsupported return type %qT for simd\n", ret_type);
53754 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53755 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53756 switch (TYPE_MODE (TREE_TYPE (t)))
53768 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53769 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53773 if (clonei->cilk_elemental)
53775 /* Parse here processor clause. If not present, default to 'b'. */
53776 clonei->vecsize_mangle = 'b';
53778 else if (!TREE_PUBLIC (node->decl))
53780 /* If the function isn't exported, we can pick up just one ISA
53783 clonei->vecsize_mangle = 'd';
53784 else if (TARGET_AVX)
53785 clonei->vecsize_mangle = 'c';
53787 clonei->vecsize_mangle = 'b';
53792 clonei->vecsize_mangle = "bcd"[num];
53795 switch (clonei->vecsize_mangle)
53798 clonei->vecsize_int = 128;
53799 clonei->vecsize_float = 128;
53802 clonei->vecsize_int = 128;
53803 clonei->vecsize_float = 256;
53806 clonei->vecsize_int = 256;
53807 clonei->vecsize_float = 256;
53810 if (clonei->simdlen == 0)
53812 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53813 clonei->simdlen = clonei->vecsize_int;
53815 clonei->simdlen = clonei->vecsize_float;
53816 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53817 if (clonei->simdlen > 16)
53818 clonei->simdlen = 16;
53823 /* Add target attribute to SIMD clone NODE if needed. */
53826 ix86_simd_clone_adjust (struct cgraph_node *node)
53828 const char *str = NULL;
53829 gcc_assert (node->decl == cfun->decl);
53830 switch (node->simdclone->vecsize_mangle)
53845 gcc_unreachable ();
53850 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53851 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53854 ix86_reset_previous_fndecl ();
53855 ix86_set_current_function (node->decl);
53858 /* If SIMD clone NODE can't be used in a vectorized loop
53859 in current function, return -1, otherwise return a badness of using it
53860 (0 if it is most desirable from vecsize_mangle point of view, 1
53861 slightly less desirable, etc.). */
53864 ix86_simd_clone_usable (struct cgraph_node *node)
53866 switch (node->simdclone->vecsize_mangle)
53873 return TARGET_AVX2 ? 2 : 1;
53877 return TARGET_AVX2 ? 1 : 0;
53884 gcc_unreachable ();
53888 /* This function adjusts the unroll factor based on
53889 the hardware capabilities. For ex, bdver3 has
53890 a loop buffer which makes unrolling of smaller
53891 loops less important. This function decides the
53892 unroll factor using number of memory references
53893 (value 32 is used) as a heuristic. */
53896 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53901 unsigned mem_count = 0;
53903 if (!TARGET_ADJUST_UNROLL)
53906 /* Count the number of memory references within the loop body.
53907 This value determines the unrolling factor for bdver3 and bdver4
53909 subrtx_iterator::array_type array;
53910 bbs = get_loop_body (loop);
53911 for (i = 0; i < loop->num_nodes; i++)
53912 FOR_BB_INSNS (bbs[i], insn)
53913 if (NONDEBUG_INSN_P (insn))
53914 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53915 if (const_rtx x = *iter)
53918 machine_mode mode = GET_MODE (x);
53919 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53927 if (mem_count && mem_count <=32)
53928 return 32/mem_count;
53934 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53937 ix86_float_exceptions_rounding_supported_p (void)
53939 /* For x87 floating point with standard excess precision handling,
53940 there is no adddf3 pattern (since x87 floating point only has
53941 XFmode operations) so the default hook implementation gets this
53943 return TARGET_80387 || TARGET_SSE_MATH;
53946 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53949 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53951 if (!TARGET_80387 && !TARGET_SSE_MATH)
53953 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53956 tree fenv_index_type = build_index_type (size_int (6));
53957 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53958 tree fenv_var = create_tmp_var_raw (fenv_type);
53959 TREE_ADDRESSABLE (fenv_var) = 1;
53960 tree fenv_ptr = build_pointer_type (fenv_type);
53961 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53962 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53963 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53964 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53965 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53966 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53967 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
53968 tree hold_fnclex = build_call_expr (fnclex, 0);
53969 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
53970 NULL_TREE, NULL_TREE);
53971 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
53973 *clear = build_call_expr (fnclex, 0);
53974 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
53975 tree fnstsw_call = build_call_expr (fnstsw, 0);
53976 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
53977 sw_var, fnstsw_call);
53978 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
53979 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
53980 exceptions_var, exceptions_x87);
53981 *update = build2 (COMPOUND_EXPR, integer_type_node,
53982 sw_mod, update_mod);
53983 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
53984 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
53986 if (TARGET_SSE_MATH)
53988 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
53989 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
53990 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
53991 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
53992 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
53993 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
53994 mxcsr_orig_var, stmxcsr_hold_call);
53995 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
53997 build_int_cst (unsigned_type_node, 0x1f80));
53998 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
53999 build_int_cst (unsigned_type_node, 0xffffffc0));
54000 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
54001 mxcsr_mod_var, hold_mod_val);
54002 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
54003 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
54004 hold_assign_orig, hold_assign_mod);
54005 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
54006 ldmxcsr_hold_call);
54008 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
54011 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
54013 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
54014 ldmxcsr_clear_call);
54016 *clear = ldmxcsr_clear_call;
54017 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
54018 tree exceptions_sse = fold_convert (integer_type_node,
54019 stxmcsr_update_call);
54022 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
54023 exceptions_var, exceptions_sse);
54024 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
54025 exceptions_var, exceptions_mod);
54026 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
54027 exceptions_assign);
54030 *update = build2 (MODIFY_EXPR, integer_type_node,
54031 exceptions_var, exceptions_sse);
54032 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
54033 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54034 ldmxcsr_update_call);
54036 tree atomic_feraiseexcept
54037 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
54038 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
54039 1, exceptions_var);
54040 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54041 atomic_feraiseexcept_call);
54044 /* Return mode to be used for bounds or VOIDmode
54045 if bounds are not supported. */
54047 static enum machine_mode
54048 ix86_mpx_bound_mode ()
54050 /* Do not support pointer checker if MPX
54054 if (flag_check_pointer_bounds)
54055 warning (0, "Pointer Checker requires MPX support on this target."
54056 " Use -mmpx options to enable MPX.");
54063 /* Return constant used to statically initialize constant bounds.
54065 This function is used to create special bound values. For now
54066 only INIT bounds and NONE bounds are expected. More special
54067 values may be added later. */
54070 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
54072 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
54073 : build_zero_cst (pointer_sized_int_node);
54074 tree high = ub ? build_zero_cst (pointer_sized_int_node)
54075 : build_minus_one_cst (pointer_sized_int_node);
54077 /* This function is supposed to be used to create INIT and
54078 NONE bounds only. */
54079 gcc_assert ((lb == 0 && ub == -1)
54080 || (lb == -1 && ub == 0));
54082 return build_complex (NULL, low, high);
54085 /* Generate a list of statements STMTS to initialize pointer bounds
54086 variable VAR with bounds LB and UB. Return the number of generated
54090 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
54092 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
54093 tree lhs, modify, var_p;
54095 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
54096 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
54098 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
54099 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
54100 append_to_statement_list (modify, stmts);
54102 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
54103 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
54104 TYPE_SIZE_UNIT (pointer_sized_int_node)));
54105 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
54106 append_to_statement_list (modify, stmts);
54111 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
54112 /* For i386, common symbol is local only for non-PIE binaries. For
54113 x86-64, common symbol is local only for non-PIE binaries or linker
54114 supports copy reloc in PIE binaries. */
54117 ix86_binds_local_p (const_tree exp)
54119 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
54122 && HAVE_LD_PIE_COPYRELOC != 0)));
54126 /* If MEM is in the form of [base+offset], extract the two parts
54127 of address and set to BASE and OFFSET, otherwise return false. */
54130 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
54134 gcc_assert (MEM_P (mem));
54136 addr = XEXP (mem, 0);
54138 if (GET_CODE (addr) == CONST)
54139 addr = XEXP (addr, 0);
54141 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
54144 *offset = const0_rtx;
54148 if (GET_CODE (addr) == PLUS
54149 && (REG_P (XEXP (addr, 0))
54150 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
54151 && CONST_INT_P (XEXP (addr, 1)))
54153 *base = XEXP (addr, 0);
54154 *offset = XEXP (addr, 1);
54161 /* Given OPERANDS of consecutive load/store, check if we can merge
54162 them into move multiple. LOAD is true if they are load instructions.
54163 MODE is the mode of memory operands. */
54166 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
54167 enum machine_mode mode)
54169 HOST_WIDE_INT offval_1, offval_2, msize;
54170 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
54174 mem_1 = operands[1];
54175 mem_2 = operands[3];
54176 reg_1 = operands[0];
54177 reg_2 = operands[2];
54181 mem_1 = operands[0];
54182 mem_2 = operands[2];
54183 reg_1 = operands[1];
54184 reg_2 = operands[3];
54187 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
54189 if (REGNO (reg_1) != REGNO (reg_2))
54192 /* Check if the addresses are in the form of [base+offset]. */
54193 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
54195 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
54198 /* Check if the bases are the same. */
54199 if (!rtx_equal_p (base_1, base_2))
54202 offval_1 = INTVAL (offset_1);
54203 offval_2 = INTVAL (offset_2);
54204 msize = GET_MODE_SIZE (mode);
54205 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
54206 if (offval_1 + msize != offval_2)
54212 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
54215 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
54216 optimization_type opt_type)
54230 return opt_type == OPTIMIZE_FOR_SPEED;
54233 if (SSE_FLOAT_MODE_P (mode1)
54235 && !flag_trapping_math
54237 return opt_type == OPTIMIZE_FOR_SPEED;
54243 if (SSE_FLOAT_MODE_P (mode1)
54245 && !flag_trapping_math
54248 return opt_type == OPTIMIZE_FOR_SPEED;
54251 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
54258 /* Address space support.
54260 This is not "far pointers" in the 16-bit sense, but an easy way
54261 to use %fs and %gs segment prefixes. Therefore:
54263 (a) All address spaces have the same modes,
54264 (b) All address spaces have the same addresss forms,
54265 (c) While %fs and %gs are technically subsets of the generic
54266 address space, they are probably not subsets of each other.
54267 (d) Since we have no access to the segment base register values
54268 without resorting to a system call, we cannot convert a
54269 non-default address space to a default address space.
54270 Therefore we do not claim %fs or %gs are subsets of generic.
54272 Therefore we can (mostly) use the default hooks. */
54274 /* All use of segmentation is assumed to make address 0 valid. */
54277 ix86_addr_space_zero_address_valid (addr_space_t as)
54279 return as != ADDR_SPACE_GENERIC;
54281 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
54282 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
54284 /* Initialize the GCC target structure. */
54285 #undef TARGET_RETURN_IN_MEMORY
54286 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
54288 #undef TARGET_LEGITIMIZE_ADDRESS
54289 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
54291 #undef TARGET_ATTRIBUTE_TABLE
54292 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
54293 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
54294 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
54295 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54296 # undef TARGET_MERGE_DECL_ATTRIBUTES
54297 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
54300 #undef TARGET_COMP_TYPE_ATTRIBUTES
54301 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
54303 #undef TARGET_INIT_BUILTINS
54304 #define TARGET_INIT_BUILTINS ix86_init_builtins
54305 #undef TARGET_BUILTIN_DECL
54306 #define TARGET_BUILTIN_DECL ix86_builtin_decl
54307 #undef TARGET_EXPAND_BUILTIN
54308 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
54310 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
54311 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
54312 ix86_builtin_vectorized_function
54314 #undef TARGET_VECTORIZE_BUILTIN_GATHER
54315 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
54317 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
54318 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
54320 #undef TARGET_BUILTIN_RECIPROCAL
54321 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
54323 #undef TARGET_ASM_FUNCTION_EPILOGUE
54324 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
54326 #undef TARGET_ENCODE_SECTION_INFO
54327 #ifndef SUBTARGET_ENCODE_SECTION_INFO
54328 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
54330 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
54333 #undef TARGET_ASM_OPEN_PAREN
54334 #define TARGET_ASM_OPEN_PAREN ""
54335 #undef TARGET_ASM_CLOSE_PAREN
54336 #define TARGET_ASM_CLOSE_PAREN ""
54338 #undef TARGET_ASM_BYTE_OP
54339 #define TARGET_ASM_BYTE_OP ASM_BYTE
54341 #undef TARGET_ASM_ALIGNED_HI_OP
54342 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
54343 #undef TARGET_ASM_ALIGNED_SI_OP
54344 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
54346 #undef TARGET_ASM_ALIGNED_DI_OP
54347 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
54350 #undef TARGET_PROFILE_BEFORE_PROLOGUE
54351 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
54353 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
54354 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
54356 #undef TARGET_ASM_UNALIGNED_HI_OP
54357 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
54358 #undef TARGET_ASM_UNALIGNED_SI_OP
54359 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
54360 #undef TARGET_ASM_UNALIGNED_DI_OP
54361 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
54363 #undef TARGET_PRINT_OPERAND
54364 #define TARGET_PRINT_OPERAND ix86_print_operand
54365 #undef TARGET_PRINT_OPERAND_ADDRESS
54366 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
54367 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
54368 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
54369 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
54370 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
54372 #undef TARGET_SCHED_INIT_GLOBAL
54373 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
54374 #undef TARGET_SCHED_ADJUST_COST
54375 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
54376 #undef TARGET_SCHED_ISSUE_RATE
54377 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
54378 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
54379 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
54380 ia32_multipass_dfa_lookahead
54381 #undef TARGET_SCHED_MACRO_FUSION_P
54382 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
54383 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
54384 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
54386 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
54387 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
54389 #undef TARGET_MEMMODEL_CHECK
54390 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
54392 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
54393 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
54396 #undef TARGET_HAVE_TLS
54397 #define TARGET_HAVE_TLS true
54399 #undef TARGET_CANNOT_FORCE_CONST_MEM
54400 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
54401 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
54402 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
54404 #undef TARGET_DELEGITIMIZE_ADDRESS
54405 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
54407 #undef TARGET_MS_BITFIELD_LAYOUT_P
54408 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
54411 #undef TARGET_BINDS_LOCAL_P
54412 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
54414 #undef TARGET_BINDS_LOCAL_P
54415 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
54417 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54418 #undef TARGET_BINDS_LOCAL_P
54419 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
54422 #undef TARGET_ASM_OUTPUT_MI_THUNK
54423 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
54424 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
54425 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
54427 #undef TARGET_ASM_FILE_START
54428 #define TARGET_ASM_FILE_START x86_file_start
54430 #undef TARGET_OPTION_OVERRIDE
54431 #define TARGET_OPTION_OVERRIDE ix86_option_override
54433 #undef TARGET_REGISTER_MOVE_COST
54434 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
54435 #undef TARGET_MEMORY_MOVE_COST
54436 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
54437 #undef TARGET_RTX_COSTS
54438 #define TARGET_RTX_COSTS ix86_rtx_costs
54439 #undef TARGET_ADDRESS_COST
54440 #define TARGET_ADDRESS_COST ix86_address_cost
54442 #undef TARGET_FIXED_CONDITION_CODE_REGS
54443 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
54444 #undef TARGET_CC_MODES_COMPATIBLE
54445 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
54447 #undef TARGET_MACHINE_DEPENDENT_REORG
54448 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
54450 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
54451 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
54453 #undef TARGET_BUILD_BUILTIN_VA_LIST
54454 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
54456 #undef TARGET_FOLD_BUILTIN
54457 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
54459 #undef TARGET_COMPARE_VERSION_PRIORITY
54460 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
54462 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
54463 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
54464 ix86_generate_version_dispatcher_body
54466 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
54467 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
54468 ix86_get_function_versions_dispatcher
54470 #undef TARGET_ENUM_VA_LIST_P
54471 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
54473 #undef TARGET_FN_ABI_VA_LIST
54474 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
54476 #undef TARGET_CANONICAL_VA_LIST_TYPE
54477 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
54479 #undef TARGET_EXPAND_BUILTIN_VA_START
54480 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
54482 #undef TARGET_MD_ASM_ADJUST
54483 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
54485 #undef TARGET_PROMOTE_PROTOTYPES
54486 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
54487 #undef TARGET_SETUP_INCOMING_VARARGS
54488 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
54489 #undef TARGET_MUST_PASS_IN_STACK
54490 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
54491 #undef TARGET_FUNCTION_ARG_ADVANCE
54492 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
54493 #undef TARGET_FUNCTION_ARG
54494 #define TARGET_FUNCTION_ARG ix86_function_arg
54495 #undef TARGET_INIT_PIC_REG
54496 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
54497 #undef TARGET_USE_PSEUDO_PIC_REG
54498 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
54499 #undef TARGET_FUNCTION_ARG_BOUNDARY
54500 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
54501 #undef TARGET_PASS_BY_REFERENCE
54502 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
54503 #undef TARGET_INTERNAL_ARG_POINTER
54504 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
54505 #undef TARGET_UPDATE_STACK_BOUNDARY
54506 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
54507 #undef TARGET_GET_DRAP_RTX
54508 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
54509 #undef TARGET_STRICT_ARGUMENT_NAMING
54510 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
54511 #undef TARGET_STATIC_CHAIN
54512 #define TARGET_STATIC_CHAIN ix86_static_chain
54513 #undef TARGET_TRAMPOLINE_INIT
54514 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
54515 #undef TARGET_RETURN_POPS_ARGS
54516 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
54518 #undef TARGET_LEGITIMATE_COMBINED_INSN
54519 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
54521 #undef TARGET_ASAN_SHADOW_OFFSET
54522 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
54524 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
54525 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
54527 #undef TARGET_SCALAR_MODE_SUPPORTED_P
54528 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
54530 #undef TARGET_VECTOR_MODE_SUPPORTED_P
54531 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
54533 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
54534 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
54535 ix86_libgcc_floating_mode_supported_p
54537 #undef TARGET_C_MODE_FOR_SUFFIX
54538 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
54541 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
54542 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
54545 #ifdef SUBTARGET_INSERT_ATTRIBUTES
54546 #undef TARGET_INSERT_ATTRIBUTES
54547 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
54550 #undef TARGET_MANGLE_TYPE
54551 #define TARGET_MANGLE_TYPE ix86_mangle_type
54554 #undef TARGET_STACK_PROTECT_FAIL
54555 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
54558 #undef TARGET_FUNCTION_VALUE
54559 #define TARGET_FUNCTION_VALUE ix86_function_value
54561 #undef TARGET_FUNCTION_VALUE_REGNO_P
54562 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
54564 #undef TARGET_PROMOTE_FUNCTION_MODE
54565 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
54567 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
54568 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
54570 #undef TARGET_MEMBER_TYPE_FORCES_BLK
54571 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
54573 #undef TARGET_INSTANTIATE_DECLS
54574 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
54576 #undef TARGET_SECONDARY_RELOAD
54577 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
54579 #undef TARGET_CLASS_MAX_NREGS
54580 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
54582 #undef TARGET_PREFERRED_RELOAD_CLASS
54583 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
54584 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
54585 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
54586 #undef TARGET_CLASS_LIKELY_SPILLED_P
54587 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
54589 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
54590 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
54591 ix86_builtin_vectorization_cost
54592 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
54593 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
54594 ix86_vectorize_vec_perm_const_ok
54595 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
54596 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
54597 ix86_preferred_simd_mode
54598 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
54599 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
54600 ix86_autovectorize_vector_sizes
54601 #undef TARGET_VECTORIZE_GET_MASK_MODE
54602 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
54603 #undef TARGET_VECTORIZE_INIT_COST
54604 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
54605 #undef TARGET_VECTORIZE_ADD_STMT_COST
54606 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
54607 #undef TARGET_VECTORIZE_FINISH_COST
54608 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
54609 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
54610 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
54612 #undef TARGET_SET_CURRENT_FUNCTION
54613 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
54615 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
54616 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
54618 #undef TARGET_OPTION_SAVE
54619 #define TARGET_OPTION_SAVE ix86_function_specific_save
54621 #undef TARGET_OPTION_RESTORE
54622 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
54624 #undef TARGET_OPTION_POST_STREAM_IN
54625 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
54627 #undef TARGET_OPTION_PRINT
54628 #define TARGET_OPTION_PRINT ix86_function_specific_print
54630 #undef TARGET_OPTION_FUNCTION_VERSIONS
54631 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
54633 #undef TARGET_CAN_INLINE_P
54634 #define TARGET_CAN_INLINE_P ix86_can_inline_p
54636 #undef TARGET_LEGITIMATE_ADDRESS_P
54637 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
54639 #undef TARGET_LRA_P
54640 #define TARGET_LRA_P hook_bool_void_true
54642 #undef TARGET_REGISTER_PRIORITY
54643 #define TARGET_REGISTER_PRIORITY ix86_register_priority
54645 #undef TARGET_REGISTER_USAGE_LEVELING_P
54646 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
54648 #undef TARGET_LEGITIMATE_CONSTANT_P
54649 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
54651 #undef TARGET_FRAME_POINTER_REQUIRED
54652 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
54654 #undef TARGET_CAN_ELIMINATE
54655 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
54657 #undef TARGET_EXTRA_LIVE_ON_ENTRY
54658 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
54660 #undef TARGET_ASM_CODE_END
54661 #define TARGET_ASM_CODE_END ix86_code_end
54663 #undef TARGET_CONDITIONAL_REGISTER_USAGE
54664 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
54667 #undef TARGET_INIT_LIBFUNCS
54668 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
54671 #undef TARGET_LOOP_UNROLL_ADJUST
54672 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
54674 #undef TARGET_SPILL_CLASS
54675 #define TARGET_SPILL_CLASS ix86_spill_class
54677 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
54678 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
54679 ix86_simd_clone_compute_vecsize_and_simdlen
54681 #undef TARGET_SIMD_CLONE_ADJUST
54682 #define TARGET_SIMD_CLONE_ADJUST \
54683 ix86_simd_clone_adjust
54685 #undef TARGET_SIMD_CLONE_USABLE
54686 #define TARGET_SIMD_CLONE_USABLE \
54687 ix86_simd_clone_usable
54689 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54690 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54691 ix86_float_exceptions_rounding_supported_p
54693 #undef TARGET_MODE_EMIT
54694 #define TARGET_MODE_EMIT ix86_emit_mode_set
54696 #undef TARGET_MODE_NEEDED
54697 #define TARGET_MODE_NEEDED ix86_mode_needed
54699 #undef TARGET_MODE_AFTER
54700 #define TARGET_MODE_AFTER ix86_mode_after
54702 #undef TARGET_MODE_ENTRY
54703 #define TARGET_MODE_ENTRY ix86_mode_entry
54705 #undef TARGET_MODE_EXIT
54706 #define TARGET_MODE_EXIT ix86_mode_exit
54708 #undef TARGET_MODE_PRIORITY
54709 #define TARGET_MODE_PRIORITY ix86_mode_priority
54711 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54712 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54714 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54715 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54717 #undef TARGET_STORE_BOUNDS_FOR_ARG
54718 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54720 #undef TARGET_LOAD_RETURNED_BOUNDS
54721 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54723 #undef TARGET_STORE_RETURNED_BOUNDS
54724 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54726 #undef TARGET_CHKP_BOUND_MODE
54727 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54729 #undef TARGET_BUILTIN_CHKP_FUNCTION
54730 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54732 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54733 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54735 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54736 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54738 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54739 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54741 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54742 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54744 #undef TARGET_OFFLOAD_OPTIONS
54745 #define TARGET_OFFLOAD_OPTIONS \
54746 ix86_offload_options
54748 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54749 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54751 #undef TARGET_OPTAB_SUPPORTED_P
54752 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
54754 struct gcc_target targetm = TARGET_INITIALIZER;
54756 #include "gt-i386.h"