1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
88 (define_c_enum "unspecv" [
98 ;; All vector modes including V?TImode, used in move patterns.
99 (define_mode_iterator V16
100 [(V32QI "TARGET_AVX") V16QI
101 (V16HI "TARGET_AVX") V8HI
102 (V8SI "TARGET_AVX") V4SI
103 (V4DI "TARGET_AVX") V2DI
104 (V2TI "TARGET_AVX") V1TI
105 (V8SF "TARGET_AVX") V4SF
106 (V4DF "TARGET_AVX") V2DF])
109 (define_mode_iterator V
110 [(V32QI "TARGET_AVX") V16QI
111 (V16HI "TARGET_AVX") V8HI
112 (V8SI "TARGET_AVX") V4SI
113 (V4DI "TARGET_AVX") V2DI
114 (V8SF "TARGET_AVX") V4SF
115 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
117 ;; All 128bit vector modes
118 (define_mode_iterator V_128
119 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
121 ;; All 256bit vector modes
122 (define_mode_iterator V_256
123 [V32QI V16HI V8SI V4DI V8SF V4DF])
125 ;; All vector float modes
126 (define_mode_iterator VF
127 [(V8SF "TARGET_AVX") V4SF
128 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
130 ;; All SFmode vector float modes
131 (define_mode_iterator VF1
132 [(V8SF "TARGET_AVX") V4SF])
134 ;; All DFmode vector float modes
135 (define_mode_iterator VF2
136 [(V4DF "TARGET_AVX") V2DF])
138 ;; All 128bit vector float modes
139 (define_mode_iterator VF_128
140 [V4SF (V2DF "TARGET_SSE2")])
142 ;; All 256bit vector float modes
143 (define_mode_iterator VF_256
146 ;; All vector integer modes
147 (define_mode_iterator VI
148 [(V32QI "TARGET_AVX") V16QI
149 (V16HI "TARGET_AVX") V8HI
150 (V8SI "TARGET_AVX") V4SI
151 (V4DI "TARGET_AVX") V2DI])
153 (define_mode_iterator VI_AVX2
154 [(V32QI "TARGET_AVX2") V16QI
155 (V16HI "TARGET_AVX2") V8HI
156 (V8SI "TARGET_AVX2") V4SI
157 (V4DI "TARGET_AVX2") V2DI])
159 ;; All QImode vector integer modes
160 (define_mode_iterator VI1
161 [(V32QI "TARGET_AVX") V16QI])
163 ;; All DImode vector integer modes
164 (define_mode_iterator VI8
165 [(V4DI "TARGET_AVX") V2DI])
167 (define_mode_iterator VI1_AVX2
168 [(V32QI "TARGET_AVX2") V16QI])
170 (define_mode_iterator VI2_AVX2
171 [(V16HI "TARGET_AVX2") V8HI])
173 (define_mode_iterator VI4_AVX2
174 [(V8SI "TARGET_AVX2") V4SI])
176 (define_mode_iterator VI8_AVX2
177 [(V4DI "TARGET_AVX2") V2DI])
179 ;; ??? We should probably use TImode instead.
180 (define_mode_iterator VIMAX_AVX2
181 [(V2TI "TARGET_AVX2") V1TI])
183 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
184 (define_mode_iterator SSESCALARMODE
185 [(V2TI "TARGET_AVX2") TI])
187 (define_mode_iterator VI12_AVX2
188 [(V32QI "TARGET_AVX2") V16QI
189 (V16HI "TARGET_AVX2") V8HI])
191 (define_mode_iterator VI24_AVX2
192 [(V16HI "TARGET_AVX2") V8HI
193 (V8SI "TARGET_AVX2") V4SI])
195 (define_mode_iterator VI124_AVX2
196 [(V32QI "TARGET_AVX2") V16QI
197 (V16HI "TARGET_AVX2") V8HI
198 (V8SI "TARGET_AVX2") V4SI])
200 (define_mode_iterator VI248_AVX2
201 [(V16HI "TARGET_AVX2") V8HI
202 (V8SI "TARGET_AVX2") V4SI
203 (V4DI "TARGET_AVX2") V2DI])
205 (define_mode_iterator VI48_AVX2
206 [(V8SI "TARGET_AVX2") V4SI
207 (V4DI "TARGET_AVX2") V2DI])
209 (define_mode_iterator V48_AVX2
212 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
213 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
215 (define_mode_attr sse2_avx2
216 [(V16QI "sse2") (V32QI "avx2")
217 (V8HI "sse2") (V16HI "avx2")
218 (V4SI "sse2") (V8SI "avx2")
219 (V2DI "sse2") (V4DI "avx2")
220 (V1TI "sse2") (V2TI "avx2")])
222 (define_mode_attr ssse3_avx2
223 [(V16QI "ssse3") (V32QI "avx2")
224 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
225 (V4SI "ssse3") (V8SI "avx2")
226 (V2DI "ssse3") (V4DI "avx2")
227 (TI "ssse3") (V2TI "avx2")])
229 (define_mode_attr sse4_1_avx2
230 [(V16QI "sse4_1") (V32QI "avx2")
231 (V8HI "sse4_1") (V16HI "avx2")
232 (V4SI "sse4_1") (V8SI "avx2")
233 (V2DI "sse4_1") (V4DI "avx2")])
235 (define_mode_attr avx_avx2
236 [(V4SF "avx") (V2DF "avx")
237 (V8SF "avx") (V4DF "avx")
238 (V4SI "avx2") (V2DI "avx2")
239 (V8SI "avx2") (V4DI "avx2")])
241 (define_mode_attr vec_avx2
242 [(V16QI "vec") (V32QI "avx2")
243 (V8HI "vec") (V16HI "avx2")
244 (V4SI "vec") (V8SI "avx2")
245 (V2DI "vec") (V4DI "avx2")])
247 (define_mode_attr ssedoublemode
248 [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
249 (V32QI "V32HI") (V16QI "V16HI")])
251 (define_mode_attr ssebytemode
252 [(V4DI "V32QI") (V2DI "V16QI")])
254 ;; All 128bit vector integer modes
255 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
257 ;; All 256bit vector integer modes
258 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
260 ;; Random 128bit vector integer mode combinations
261 (define_mode_iterator VI12_128 [V16QI V8HI])
262 (define_mode_iterator VI14_128 [V16QI V4SI])
263 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
264 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
265 (define_mode_iterator VI24_128 [V8HI V4SI])
266 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
267 (define_mode_iterator VI48_128 [V4SI V2DI])
269 ;; Random 256bit vector integer mode combinations
270 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
271 (define_mode_iterator VI48_256 [V8SI V4DI])
273 ;; Int-float size matches
274 (define_mode_iterator VI4F_128 [V4SI V4SF])
275 (define_mode_iterator VI8F_128 [V2DI V2DF])
276 (define_mode_iterator VI4F_256 [V8SI V8SF])
277 (define_mode_iterator VI8F_256 [V4DI V4DF])
279 ;; Mapping from float mode to required SSE level
280 (define_mode_attr sse
281 [(SF "sse") (DF "sse2")
282 (V4SF "sse") (V2DF "sse2")
283 (V8SF "avx") (V4DF "avx")])
285 (define_mode_attr sse2
286 [(V16QI "sse2") (V32QI "avx")
287 (V2DI "sse2") (V4DI "avx")])
289 (define_mode_attr sse3
290 [(V16QI "sse3") (V32QI "avx")])
292 (define_mode_attr sse4_1
293 [(V4SF "sse4_1") (V2DF "sse4_1")
294 (V8SF "avx") (V4DF "avx")])
296 (define_mode_attr avxsizesuffix
297 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
298 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
299 (V8SF "256") (V4DF "256")
300 (V4SF "") (V2DF "")])
302 ;; SSE instruction mode
303 (define_mode_attr sseinsnmode
304 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
305 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
306 (V8SF "V8SF") (V4DF "V4DF")
307 (V4SF "V4SF") (V2DF "V2DF")
310 ;; Mapping of vector float modes to an integer mode of the same size
311 (define_mode_attr sseintvecmode
312 [(V8SF "V8SI") (V4DF "V4DI")
313 (V4SF "V4SI") (V2DF "V2DI")
314 (V8SI "V8SI") (V4DI "V4DI")
315 (V4SI "V4SI") (V2DI "V2DI")
316 (V16HI "V16HI") (V8HI "V8HI")
317 (V32QI "V32QI") (V16QI "V16QI")])
319 (define_mode_attr sseintvecmodelower
320 [(V8SF "v8si") (V4DF "v4di")
321 (V4SF "v4si") (V2DF "v2di")
322 (V8SI "v8si") (V4DI "v4di")
323 (V4SI "v4si") (V2DI "v2di")
324 (V16HI "v16hi") (V8HI "v8hi")
325 (V32QI "v32qi") (V16QI "v16qi")])
327 ;; Mapping of vector modes to a vector mode of double size
328 (define_mode_attr ssedoublevecmode
329 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
330 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
331 (V8SF "V16SF") (V4DF "V8DF")
332 (V4SF "V8SF") (V2DF "V4DF")])
334 ;; Mapping of vector modes to a vector mode of half size
335 (define_mode_attr ssehalfvecmode
336 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
337 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
338 (V8SF "V4SF") (V4DF "V2DF")
341 ;; Mapping of vector modes ti packed single mode of the same size
342 (define_mode_attr ssePSmode
343 [(V32QI "V8SF") (V16QI "V4SF")
344 (V16HI "V8SF") (V8HI "V4SF")
345 (V8SI "V8SF") (V4SI "V4SF")
346 (V4DI "V8SF") (V2DI "V4SF")
347 (V2TI "V8SF") (V1TI "V4SF")
348 (V8SF "V8SF") (V4SF "V4SF")
349 (V4DF "V8SF") (V2DF "V4SF")])
351 ;; Mapping of vector modes back to the scalar modes
352 (define_mode_attr ssescalarmode
353 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
354 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
355 (V8SF "SF") (V4DF "DF")
356 (V4SF "SF") (V2DF "DF")])
358 ;; Number of scalar elements in each vector type
359 (define_mode_attr ssescalarnum
360 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
361 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
362 (V8SF "8") (V4DF "4")
363 (V4SF "4") (V2DF "2")])
365 ;; SSE prefix for integer vector modes
366 (define_mode_attr sseintprefix
367 [(V2DI "p") (V2DF "")
370 (V8SI "p") (V8SF "")])
372 ;; SSE scalar suffix for vector modes
373 (define_mode_attr ssescalarmodesuffix
375 (V8SF "ss") (V4DF "sd")
376 (V4SF "ss") (V2DF "sd")
377 (V8SI "ss") (V4DI "sd")
380 ;; Pack/unpack vector modes
381 (define_mode_attr sseunpackmode
382 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
383 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
385 (define_mode_attr ssepackmode
386 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
387 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
389 ;; Mapping of the max integer size for xop rotate immediate constraint
390 (define_mode_attr sserotatemax
391 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
393 ;; Mapping of mode to cast intrinsic name
394 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
396 ;; Instruction suffix for sign and zero extensions.
397 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
399 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
400 (define_mode_attr i128
401 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
402 (V8SI "%~128") (V4DI "%~128")])
405 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
407 ;; Mapping of immediate bits for blend instructions
408 (define_mode_attr blendbits
409 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
411 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
413 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
419 ;; All of these patterns are enabled for SSE1 as well as SSE2.
420 ;; This is essential for maintaining stable calling conventions.
422 (define_expand "mov<mode>"
423 [(set (match_operand:V16 0 "nonimmediate_operand")
424 (match_operand:V16 1 "nonimmediate_operand"))]
427 ix86_expand_vector_move (<MODE>mode, operands);
431 (define_insn "*mov<mode>_internal"
432 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
433 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
435 && (register_operand (operands[0], <MODE>mode)
436 || register_operand (operands[1], <MODE>mode))"
438 switch (which_alternative)
441 return standard_sse_constant_opcode (insn, operands[1]);
444 switch (get_attr_mode (insn))
449 && (misaligned_operand (operands[0], <MODE>mode)
450 || misaligned_operand (operands[1], <MODE>mode)))
451 return "vmovups\t{%1, %0|%0, %1}";
453 return "%vmovaps\t{%1, %0|%0, %1}";
458 && (misaligned_operand (operands[0], <MODE>mode)
459 || misaligned_operand (operands[1], <MODE>mode)))
460 return "vmovupd\t{%1, %0|%0, %1}";
462 return "%vmovapd\t{%1, %0|%0, %1}";
467 && (misaligned_operand (operands[0], <MODE>mode)
468 || misaligned_operand (operands[1], <MODE>mode)))
469 return "vmovdqu\t{%1, %0|%0, %1}";
471 return "%vmovdqa\t{%1, %0|%0, %1}";
480 [(set_attr "type" "sselog1,ssemov,ssemov")
481 (set_attr "prefix" "maybe_vex")
483 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
484 (const_string "<ssePSmode>")
485 (and (eq_attr "alternative" "2")
486 (match_test "TARGET_SSE_TYPELESS_STORES"))
487 (const_string "<ssePSmode>")
488 (match_test "TARGET_AVX")
489 (const_string "<sseinsnmode>")
490 (ior (not (match_test "TARGET_SSE2"))
491 (match_test "optimize_function_for_size_p (cfun)"))
492 (const_string "V4SF")
493 (and (eq_attr "alternative" "0")
494 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
497 (const_string "<sseinsnmode>")))])
499 (define_insn "sse2_movq128"
500 [(set (match_operand:V2DI 0 "register_operand" "=x")
503 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
504 (parallel [(const_int 0)]))
507 "%vmovq\t{%1, %0|%0, %1}"
508 [(set_attr "type" "ssemov")
509 (set_attr "prefix" "maybe_vex")
510 (set_attr "mode" "TI")])
512 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
513 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
514 ;; from memory, we'd prefer to load the memory directly into the %xmm
515 ;; register. To facilitate this happy circumstance, this pattern won't
516 ;; split until after register allocation. If the 64-bit value didn't
517 ;; come from memory, this is the best we can do. This is much better
518 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
521 (define_insn_and_split "movdi_to_sse"
523 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
524 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
525 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
526 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
528 "&& reload_completed"
531 if (register_operand (operands[1], DImode))
533 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
534 Assemble the 64-bit DImode value in an xmm register. */
535 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
536 gen_rtx_SUBREG (SImode, operands[1], 0)));
537 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
538 gen_rtx_SUBREG (SImode, operands[1], 4)));
539 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
542 else if (memory_operand (operands[1], DImode))
543 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
544 operands[1], const0_rtx));
550 [(set (match_operand:V4SF 0 "register_operand")
551 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
552 "TARGET_SSE && reload_completed"
555 (vec_duplicate:V4SF (match_dup 1))
559 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
560 operands[2] = CONST0_RTX (V4SFmode);
564 [(set (match_operand:V2DF 0 "register_operand")
565 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
566 "TARGET_SSE2 && reload_completed"
567 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
569 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
570 operands[2] = CONST0_RTX (DFmode);
573 (define_expand "push<mode>1"
574 [(match_operand:V16 0 "register_operand")]
577 ix86_expand_push (<MODE>mode, operands[0]);
581 (define_expand "movmisalign<mode>"
582 [(set (match_operand:V16 0 "nonimmediate_operand")
583 (match_operand:V16 1 "nonimmediate_operand"))]
586 ix86_expand_vector_move_misalign (<MODE>mode, operands);
590 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
591 [(set (match_operand:VF 0 "register_operand" "=x")
593 [(match_operand:VF 1 "memory_operand" "m")]
597 switch (get_attr_mode (insn))
601 return "%vmovups\t{%1, %0|%0, %1}";
603 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
606 [(set_attr "type" "ssemov")
607 (set_attr "movu" "1")
608 (set_attr "prefix" "maybe_vex")
610 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
611 (const_string "<ssePSmode>")
612 (match_test "TARGET_AVX")
613 (const_string "<MODE>")
614 (match_test "optimize_function_for_size_p (cfun)")
615 (const_string "V4SF")
617 (const_string "<MODE>")))])
619 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
620 [(set (match_operand:VF 0 "memory_operand" "=m")
622 [(match_operand:VF 1 "register_operand" "x")]
626 switch (get_attr_mode (insn))
630 return "%vmovups\t{%1, %0|%0, %1}";
632 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
635 [(set_attr "type" "ssemov")
636 (set_attr "movu" "1")
637 (set_attr "prefix" "maybe_vex")
639 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
640 (match_test "TARGET_SSE_TYPELESS_STORES"))
641 (const_string "<ssePSmode>")
642 (match_test "TARGET_AVX")
643 (const_string "<MODE>")
644 (match_test "optimize_function_for_size_p (cfun)")
645 (const_string "V4SF")
647 (const_string "<MODE>")))])
649 (define_insn "<sse2>_loaddqu<avxsizesuffix>"
650 [(set (match_operand:VI1 0 "register_operand" "=x")
651 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
655 switch (get_attr_mode (insn))
659 return "%vmovups\t{%1, %0|%0, %1}";
661 return "%vmovdqu\t{%1, %0|%0, %1}";
664 [(set_attr "type" "ssemov")
665 (set_attr "movu" "1")
666 (set (attr "prefix_data16")
668 (match_test "TARGET_AVX")
671 (set_attr "prefix" "maybe_vex")
673 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
674 (const_string "<ssePSmode>")
675 (match_test "TARGET_AVX")
676 (const_string "<sseinsnmode>")
677 (match_test "optimize_function_for_size_p (cfun)")
678 (const_string "V4SF")
680 (const_string "<sseinsnmode>")))])
682 (define_insn "<sse2>_storedqu<avxsizesuffix>"
683 [(set (match_operand:VI1 0 "memory_operand" "=m")
684 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
688 switch (get_attr_mode (insn))
692 return "%vmovups\t{%1, %0|%0, %1}";
694 return "%vmovdqu\t{%1, %0|%0, %1}";
697 [(set_attr "type" "ssemov")
698 (set_attr "movu" "1")
699 (set (attr "prefix_data16")
701 (match_test "TARGET_AVX")
704 (set_attr "prefix" "maybe_vex")
706 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
707 (match_test "TARGET_SSE_TYPELESS_STORES"))
708 (const_string "<ssePSmode>")
709 (match_test "TARGET_AVX")
710 (const_string "<sseinsnmode>")
711 (match_test "optimize_function_for_size_p (cfun)")
712 (const_string "V4SF")
714 (const_string "<sseinsnmode>")))])
716 (define_insn "<sse3>_lddqu<avxsizesuffix>"
717 [(set (match_operand:VI1 0 "register_operand" "=x")
718 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
721 "%vlddqu\t{%1, %0|%0, %1}"
722 [(set_attr "type" "ssemov")
723 (set_attr "movu" "1")
724 (set (attr "prefix_data16")
726 (match_test "TARGET_AVX")
729 (set (attr "prefix_rep")
731 (match_test "TARGET_AVX")
734 (set_attr "prefix" "maybe_vex")
735 (set_attr "mode" "<sseinsnmode>")])
737 (define_insn "sse2_movnti<mode>"
738 [(set (match_operand:SWI48 0 "memory_operand" "=m")
739 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
742 "movnti\t{%1, %0|%0, %1}"
743 [(set_attr "type" "ssemov")
744 (set_attr "prefix_data16" "0")
745 (set_attr "mode" "<MODE>")])
747 (define_insn "<sse>_movnt<mode>"
748 [(set (match_operand:VF 0 "memory_operand" "=m")
749 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
752 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
753 [(set_attr "type" "ssemov")
754 (set_attr "prefix" "maybe_vex")
755 (set_attr "mode" "<MODE>")])
757 (define_insn "<sse2>_movnt<mode>"
758 [(set (match_operand:VI8 0 "memory_operand" "=m")
759 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
762 "%vmovntdq\t{%1, %0|%0, %1}"
763 [(set_attr "type" "ssecvt")
764 (set (attr "prefix_data16")
766 (match_test "TARGET_AVX")
769 (set_attr "prefix" "maybe_vex")
770 (set_attr "mode" "<sseinsnmode>")])
772 ; Expand patterns for non-temporal stores. At the moment, only those
773 ; that directly map to insns are defined; it would be possible to
774 ; define patterns for other modes that would expand to several insns.
776 ;; Modes handled by storent patterns.
777 (define_mode_iterator STORENT_MODE
778 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
779 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
780 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
781 (V8SF "TARGET_AVX") V4SF
782 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
784 (define_expand "storent<mode>"
785 [(set (match_operand:STORENT_MODE 0 "memory_operand")
787 [(match_operand:STORENT_MODE 1 "register_operand")]
791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
793 ;; Parallel floating point arithmetic
795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797 (define_expand "<code><mode>2"
798 [(set (match_operand:VF 0 "register_operand")
800 (match_operand:VF 1 "register_operand")))]
802 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
804 (define_insn_and_split "*absneg<mode>2"
805 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
806 (match_operator:VF 3 "absneg_operator"
807 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
808 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
811 "&& reload_completed"
814 enum rtx_code absneg_op;
820 if (MEM_P (operands[1]))
821 op1 = operands[2], op2 = operands[1];
823 op1 = operands[1], op2 = operands[2];
828 if (rtx_equal_p (operands[0], operands[1]))
834 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
835 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
836 t = gen_rtx_SET (VOIDmode, operands[0], t);
840 [(set_attr "isa" "noavx,noavx,avx,avx")])
842 (define_expand "<plusminus_insn><mode>3"
843 [(set (match_operand:VF 0 "register_operand")
845 (match_operand:VF 1 "nonimmediate_operand")
846 (match_operand:VF 2 "nonimmediate_operand")))]
848 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
850 (define_insn "*<plusminus_insn><mode>3"
851 [(set (match_operand:VF 0 "register_operand" "=x,x")
853 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
854 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
855 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
857 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
858 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
859 [(set_attr "isa" "noavx,avx")
860 (set_attr "type" "sseadd")
861 (set_attr "prefix" "orig,vex")
862 (set_attr "mode" "<MODE>")])
864 (define_insn "<sse>_vm<plusminus_insn><mode>3"
865 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
868 (match_operand:VF_128 1 "register_operand" "0,x")
869 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
874 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
875 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
876 [(set_attr "isa" "noavx,avx")
877 (set_attr "type" "sseadd")
878 (set_attr "prefix" "orig,vex")
879 (set_attr "mode" "<ssescalarmode>")])
881 (define_expand "mul<mode>3"
882 [(set (match_operand:VF 0 "register_operand")
884 (match_operand:VF 1 "nonimmediate_operand")
885 (match_operand:VF 2 "nonimmediate_operand")))]
887 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
889 (define_insn "*mul<mode>3"
890 [(set (match_operand:VF 0 "register_operand" "=x,x")
892 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
893 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
894 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
896 mul<ssemodesuffix>\t{%2, %0|%0, %2}
897 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
898 [(set_attr "isa" "noavx,avx")
899 (set_attr "type" "ssemul")
900 (set_attr "prefix" "orig,vex")
901 (set_attr "btver2_decode" "direct,double")
902 (set_attr "mode" "<MODE>")])
904 (define_insn "<sse>_vmmul<mode>3"
905 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
908 (match_operand:VF_128 1 "register_operand" "0,x")
909 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
914 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
915 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
916 [(set_attr "isa" "noavx,avx")
917 (set_attr "type" "ssemul")
918 (set_attr "prefix" "orig,vex")
919 (set_attr "mode" "<ssescalarmode>")])
921 (define_expand "div<mode>3"
922 [(set (match_operand:VF2 0 "register_operand")
923 (div:VF2 (match_operand:VF2 1 "register_operand")
924 (match_operand:VF2 2 "nonimmediate_operand")))]
926 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
928 (define_expand "div<mode>3"
929 [(set (match_operand:VF1 0 "register_operand")
930 (div:VF1 (match_operand:VF1 1 "register_operand")
931 (match_operand:VF1 2 "nonimmediate_operand")))]
934 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
937 && TARGET_RECIP_VEC_DIV
938 && !optimize_insn_for_size_p ()
939 && flag_finite_math_only && !flag_trapping_math
940 && flag_unsafe_math_optimizations)
942 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
947 (define_insn "<sse>_div<mode>3"
948 [(set (match_operand:VF 0 "register_operand" "=x,x")
950 (match_operand:VF 1 "register_operand" "0,x")
951 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
954 div<ssemodesuffix>\t{%2, %0|%0, %2}
955 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
956 [(set_attr "isa" "noavx,avx")
957 (set_attr "type" "ssediv")
958 (set_attr "prefix" "orig,vex")
959 (set_attr "mode" "<MODE>")])
961 (define_insn "<sse>_vmdiv<mode>3"
962 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
965 (match_operand:VF_128 1 "register_operand" "0,x")
966 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
971 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
972 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
973 [(set_attr "isa" "noavx,avx")
974 (set_attr "type" "ssediv")
975 (set_attr "prefix" "orig,vex")
976 (set_attr "btver2_decode" "direct,double")
977 (set_attr "mode" "<ssescalarmode>")])
979 (define_insn "<sse>_rcp<mode>2"
980 [(set (match_operand:VF1 0 "register_operand" "=x")
982 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
984 "%vrcpps\t{%1, %0|%0, %1}"
985 [(set_attr "type" "sse")
986 (set_attr "atom_sse_attr" "rcp")
987 (set_attr "btver2_sse_attr" "rcp")
988 (set_attr "prefix" "maybe_vex")
989 (set_attr "mode" "<MODE>")])
991 (define_insn "sse_vmrcpv4sf2"
992 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
994 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
996 (match_operand:V4SF 2 "register_operand" "0,x")
1000 rcpss\t{%1, %0|%0, %1}
1001 vrcpss\t{%1, %2, %0|%0, %2, %1}"
1002 [(set_attr "isa" "noavx,avx")
1003 (set_attr "type" "sse")
1004 (set_attr "atom_sse_attr" "rcp")
1005 (set_attr "btver2_sse_attr" "rcp")
1006 (set_attr "prefix" "orig,vex")
1007 (set_attr "mode" "SF")])
1009 (define_expand "sqrt<mode>2"
1010 [(set (match_operand:VF2 0 "register_operand")
1011 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1014 (define_expand "sqrt<mode>2"
1015 [(set (match_operand:VF1 0 "register_operand")
1016 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1020 && TARGET_RECIP_VEC_SQRT
1021 && !optimize_insn_for_size_p ()
1022 && flag_finite_math_only && !flag_trapping_math
1023 && flag_unsafe_math_optimizations)
1025 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1030 (define_insn "<sse>_sqrt<mode>2"
1031 [(set (match_operand:VF 0 "register_operand" "=x")
1032 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
1034 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
1035 [(set_attr "type" "sse")
1036 (set_attr "atom_sse_attr" "sqrt")
1037 (set_attr "btver2_sse_attr" "sqrt")
1038 (set_attr "prefix" "maybe_vex")
1039 (set_attr "mode" "<MODE>")])
1041 (define_insn "<sse>_vmsqrt<mode>2"
1042 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1045 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
1046 (match_operand:VF_128 2 "register_operand" "0,x")
1050 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
1051 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1052 [(set_attr "isa" "noavx,avx")
1053 (set_attr "type" "sse")
1054 (set_attr "atom_sse_attr" "sqrt")
1055 (set_attr "btver2_sse_attr" "sqrt")
1056 (set_attr "prefix" "orig,vex")
1057 (set_attr "mode" "<ssescalarmode>")])
1059 (define_expand "rsqrt<mode>2"
1060 [(set (match_operand:VF1 0 "register_operand")
1062 [(match_operand:VF1 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1065 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1069 (define_insn "<sse>_rsqrt<mode>2"
1070 [(set (match_operand:VF1 0 "register_operand" "=x")
1072 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1074 "%vrsqrtps\t{%1, %0|%0, %1}"
1075 [(set_attr "type" "sse")
1076 (set_attr "prefix" "maybe_vex")
1077 (set_attr "mode" "<MODE>")])
1079 (define_insn "sse_vmrsqrtv4sf2"
1080 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1082 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1084 (match_operand:V4SF 2 "register_operand" "0,x")
1088 rsqrtss\t{%1, %0|%0, %1}
1089 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1090 [(set_attr "isa" "noavx,avx")
1091 (set_attr "type" "sse")
1092 (set_attr "prefix" "orig,vex")
1093 (set_attr "mode" "SF")])
1095 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1096 ;; isn't really correct, as those rtl operators aren't defined when
1097 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1099 (define_expand "<code><mode>3"
1100 [(set (match_operand:VF 0 "register_operand")
1102 (match_operand:VF 1 "nonimmediate_operand")
1103 (match_operand:VF 2 "nonimmediate_operand")))]
1106 if (!flag_finite_math_only)
1107 operands[1] = force_reg (<MODE>mode, operands[1]);
1108 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1111 (define_insn "*<code><mode>3_finite"
1112 [(set (match_operand:VF 0 "register_operand" "=x,x")
1114 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1115 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1116 "TARGET_SSE && flag_finite_math_only
1117 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1119 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1120 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1121 [(set_attr "isa" "noavx,avx")
1122 (set_attr "type" "sseadd")
1123 (set_attr "btver2_sse_attr" "maxmin")
1124 (set_attr "prefix" "orig,vex")
1125 (set_attr "mode" "<MODE>")])
1127 (define_insn "*<code><mode>3"
1128 [(set (match_operand:VF 0 "register_operand" "=x,x")
1130 (match_operand:VF 1 "register_operand" "0,x")
1131 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1132 "TARGET_SSE && !flag_finite_math_only"
1134 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1135 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1136 [(set_attr "isa" "noavx,avx")
1137 (set_attr "type" "sseadd")
1138 (set_attr "btver2_sse_attr" "maxmin")
1139 (set_attr "prefix" "orig,vex")
1140 (set_attr "mode" "<MODE>")])
1142 (define_insn "<sse>_vm<code><mode>3"
1143 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1146 (match_operand:VF_128 1 "register_operand" "0,x")
1147 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1152 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1153 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1154 [(set_attr "isa" "noavx,avx")
1155 (set_attr "type" "sse")
1156 (set_attr "btver2_sse_attr" "maxmin")
1157 (set_attr "prefix" "orig,vex")
1158 (set_attr "mode" "<ssescalarmode>")])
1160 ;; These versions of the min/max patterns implement exactly the operations
1161 ;; min = (op1 < op2 ? op1 : op2)
1162 ;; max = (!(op1 < op2) ? op1 : op2)
1163 ;; Their operands are not commutative, and thus they may be used in the
1164 ;; presence of -0.0 and NaN.
1166 (define_insn "*ieee_smin<mode>3"
1167 [(set (match_operand:VF 0 "register_operand" "=x,x")
1169 [(match_operand:VF 1 "register_operand" "0,x")
1170 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1174 min<ssemodesuffix>\t{%2, %0|%0, %2}
1175 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1176 [(set_attr "isa" "noavx,avx")
1177 (set_attr "type" "sseadd")
1178 (set_attr "prefix" "orig,vex")
1179 (set_attr "mode" "<MODE>")])
1181 (define_insn "*ieee_smax<mode>3"
1182 [(set (match_operand:VF 0 "register_operand" "=x,x")
1184 [(match_operand:VF 1 "register_operand" "0,x")
1185 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1189 max<ssemodesuffix>\t{%2, %0|%0, %2}
1190 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1191 [(set_attr "isa" "noavx,avx")
1192 (set_attr "type" "sseadd")
1193 (set_attr "prefix" "orig,vex")
1194 (set_attr "mode" "<MODE>")])
1196 (define_insn "avx_addsubv4df3"
1197 [(set (match_operand:V4DF 0 "register_operand" "=x")
1200 (match_operand:V4DF 1 "register_operand" "x")
1201 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1202 (minus:V4DF (match_dup 1) (match_dup 2))
1205 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1206 [(set_attr "type" "sseadd")
1207 (set_attr "prefix" "vex")
1208 (set_attr "mode" "V4DF")])
1210 (define_insn "sse3_addsubv2df3"
1211 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1214 (match_operand:V2DF 1 "register_operand" "0,x")
1215 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1216 (minus:V2DF (match_dup 1) (match_dup 2))
1220 addsubpd\t{%2, %0|%0, %2}
1221 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1222 [(set_attr "isa" "noavx,avx")
1223 (set_attr "type" "sseadd")
1224 (set_attr "atom_unit" "complex")
1225 (set_attr "prefix" "orig,vex")
1226 (set_attr "mode" "V2DF")])
1228 (define_insn "avx_addsubv8sf3"
1229 [(set (match_operand:V8SF 0 "register_operand" "=x")
1232 (match_operand:V8SF 1 "register_operand" "x")
1233 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1234 (minus:V8SF (match_dup 1) (match_dup 2))
1237 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1238 [(set_attr "type" "sseadd")
1239 (set_attr "prefix" "vex")
1240 (set_attr "mode" "V8SF")])
1242 (define_insn "sse3_addsubv4sf3"
1243 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1246 (match_operand:V4SF 1 "register_operand" "0,x")
1247 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1248 (minus:V4SF (match_dup 1) (match_dup 2))
1252 addsubps\t{%2, %0|%0, %2}
1253 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1254 [(set_attr "isa" "noavx,avx")
1255 (set_attr "type" "sseadd")
1256 (set_attr "prefix" "orig,vex")
1257 (set_attr "prefix_rep" "1,*")
1258 (set_attr "mode" "V4SF")])
1260 (define_insn "avx_h<plusminus_insn>v4df3"
1261 [(set (match_operand:V4DF 0 "register_operand" "=x")
1266 (match_operand:V4DF 1 "register_operand" "x")
1267 (parallel [(const_int 0)]))
1268 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1271 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1272 (parallel [(const_int 0)]))
1273 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1276 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1277 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1279 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1280 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1282 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1283 [(set_attr "type" "sseadd")
1284 (set_attr "prefix" "vex")
1285 (set_attr "mode" "V4DF")])
1287 (define_expand "sse3_haddv2df3"
1288 [(set (match_operand:V2DF 0 "register_operand")
1292 (match_operand:V2DF 1 "register_operand")
1293 (parallel [(const_int 0)]))
1294 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1297 (match_operand:V2DF 2 "nonimmediate_operand")
1298 (parallel [(const_int 0)]))
1299 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1302 (define_insn "*sse3_haddv2df3"
1303 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1307 (match_operand:V2DF 1 "register_operand" "0,x")
1308 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1311 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1314 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1315 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1318 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1320 && INTVAL (operands[3]) != INTVAL (operands[4])
1321 && INTVAL (operands[5]) != INTVAL (operands[6])"
1323 haddpd\t{%2, %0|%0, %2}
1324 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1325 [(set_attr "isa" "noavx,avx")
1326 (set_attr "type" "sseadd")
1327 (set_attr "prefix" "orig,vex")
1328 (set_attr "mode" "V2DF")])
1330 (define_insn "sse3_hsubv2df3"
1331 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1335 (match_operand:V2DF 1 "register_operand" "0,x")
1336 (parallel [(const_int 0)]))
1337 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1340 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1341 (parallel [(const_int 0)]))
1342 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1345 hsubpd\t{%2, %0|%0, %2}
1346 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1347 [(set_attr "isa" "noavx,avx")
1348 (set_attr "type" "sseadd")
1349 (set_attr "prefix" "orig,vex")
1350 (set_attr "mode" "V2DF")])
1352 (define_insn "*sse3_haddv2df3_low"
1353 [(set (match_operand:DF 0 "register_operand" "=x,x")
1356 (match_operand:V2DF 1 "register_operand" "0,x")
1357 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1360 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1362 && INTVAL (operands[2]) != INTVAL (operands[3])"
1364 haddpd\t{%0, %0|%0, %0}
1365 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1366 [(set_attr "isa" "noavx,avx")
1367 (set_attr "type" "sseadd1")
1368 (set_attr "prefix" "orig,vex")
1369 (set_attr "mode" "V2DF")])
1371 (define_insn "*sse3_hsubv2df3_low"
1372 [(set (match_operand:DF 0 "register_operand" "=x,x")
1375 (match_operand:V2DF 1 "register_operand" "0,x")
1376 (parallel [(const_int 0)]))
1379 (parallel [(const_int 1)]))))]
1382 hsubpd\t{%0, %0|%0, %0}
1383 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1384 [(set_attr "isa" "noavx,avx")
1385 (set_attr "type" "sseadd1")
1386 (set_attr "prefix" "orig,vex")
1387 (set_attr "mode" "V2DF")])
1389 (define_insn "avx_h<plusminus_insn>v8sf3"
1390 [(set (match_operand:V8SF 0 "register_operand" "=x")
1396 (match_operand:V8SF 1 "register_operand" "x")
1397 (parallel [(const_int 0)]))
1398 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1400 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1401 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1405 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1406 (parallel [(const_int 0)]))
1407 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1409 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1410 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1414 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1415 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1417 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1418 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1421 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1422 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1424 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1425 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1427 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1428 [(set_attr "type" "sseadd")
1429 (set_attr "prefix" "vex")
1430 (set_attr "mode" "V8SF")])
1432 (define_insn "sse3_h<plusminus_insn>v4sf3"
1433 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1438 (match_operand:V4SF 1 "register_operand" "0,x")
1439 (parallel [(const_int 0)]))
1440 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1442 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1443 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1447 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1448 (parallel [(const_int 0)]))
1449 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1451 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1452 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1455 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1456 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1457 [(set_attr "isa" "noavx,avx")
1458 (set_attr "type" "sseadd")
1459 (set_attr "atom_unit" "complex")
1460 (set_attr "prefix" "orig,vex")
1461 (set_attr "prefix_rep" "1,*")
1462 (set_attr "mode" "V4SF")])
1464 (define_expand "reduc_splus_v4df"
1465 [(match_operand:V4DF 0 "register_operand")
1466 (match_operand:V4DF 1 "register_operand")]
1469 rtx tmp = gen_reg_rtx (V4DFmode);
1470 rtx tmp2 = gen_reg_rtx (V4DFmode);
1471 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1472 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1473 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1477 (define_expand "reduc_splus_v2df"
1478 [(match_operand:V2DF 0 "register_operand")
1479 (match_operand:V2DF 1 "register_operand")]
1482 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1486 (define_expand "reduc_splus_v8sf"
1487 [(match_operand:V8SF 0 "register_operand")
1488 (match_operand:V8SF 1 "register_operand")]
1491 rtx tmp = gen_reg_rtx (V8SFmode);
1492 rtx tmp2 = gen_reg_rtx (V8SFmode);
1493 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1494 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1495 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1496 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1500 (define_expand "reduc_splus_v4sf"
1501 [(match_operand:V4SF 0 "register_operand")
1502 (match_operand:V4SF 1 "register_operand")]
1507 rtx tmp = gen_reg_rtx (V4SFmode);
1508 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1509 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1512 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1516 ;; Modes handled by reduc_sm{in,ax}* patterns.
1517 (define_mode_iterator REDUC_SMINMAX_MODE
1518 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1519 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1520 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1521 (V4SF "TARGET_SSE")])
1523 (define_expand "reduc_<code>_<mode>"
1524 [(smaxmin:REDUC_SMINMAX_MODE
1525 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1526 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1529 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1533 (define_expand "reduc_<code>_<mode>"
1535 (match_operand:VI_256 0 "register_operand")
1536 (match_operand:VI_256 1 "register_operand"))]
1539 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1543 (define_expand "reduc_umin_v8hi"
1545 (match_operand:V8HI 0 "register_operand")
1546 (match_operand:V8HI 1 "register_operand"))]
1549 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1553 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1555 ;; Parallel floating point comparisons
1557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1559 (define_insn "avx_cmp<mode>3"
1560 [(set (match_operand:VF 0 "register_operand" "=x")
1562 [(match_operand:VF 1 "register_operand" "x")
1563 (match_operand:VF 2 "nonimmediate_operand" "xm")
1564 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1567 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1568 [(set_attr "type" "ssecmp")
1569 (set_attr "length_immediate" "1")
1570 (set_attr "prefix" "vex")
1571 (set_attr "mode" "<MODE>")])
1573 (define_insn "avx_vmcmp<mode>3"
1574 [(set (match_operand:VF_128 0 "register_operand" "=x")
1577 [(match_operand:VF_128 1 "register_operand" "x")
1578 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1579 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1584 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1585 [(set_attr "type" "ssecmp")
1586 (set_attr "length_immediate" "1")
1587 (set_attr "prefix" "vex")
1588 (set_attr "mode" "<ssescalarmode>")])
1590 (define_insn "*<sse>_maskcmp<mode>3_comm"
1591 [(set (match_operand:VF 0 "register_operand" "=x,x")
1592 (match_operator:VF 3 "sse_comparison_operator"
1593 [(match_operand:VF 1 "register_operand" "%0,x")
1594 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1596 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1598 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1599 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1600 [(set_attr "isa" "noavx,avx")
1601 (set_attr "type" "ssecmp")
1602 (set_attr "length_immediate" "1")
1603 (set_attr "prefix" "orig,vex")
1604 (set_attr "mode" "<MODE>")])
1606 (define_insn "<sse>_maskcmp<mode>3"
1607 [(set (match_operand:VF 0 "register_operand" "=x,x")
1608 (match_operator:VF 3 "sse_comparison_operator"
1609 [(match_operand:VF 1 "register_operand" "0,x")
1610 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1613 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1614 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1615 [(set_attr "isa" "noavx,avx")
1616 (set_attr "type" "ssecmp")
1617 (set_attr "length_immediate" "1")
1618 (set_attr "prefix" "orig,vex")
1619 (set_attr "mode" "<MODE>")])
1621 (define_insn "<sse>_vmmaskcmp<mode>3"
1622 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1624 (match_operator:VF_128 3 "sse_comparison_operator"
1625 [(match_operand:VF_128 1 "register_operand" "0,x")
1626 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1631 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1632 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1633 [(set_attr "isa" "noavx,avx")
1634 (set_attr "type" "ssecmp")
1635 (set_attr "length_immediate" "1,*")
1636 (set_attr "prefix" "orig,vex")
1637 (set_attr "mode" "<ssescalarmode>")])
1639 (define_insn "<sse>_comi"
1640 [(set (reg:CCFP FLAGS_REG)
1643 (match_operand:<ssevecmode> 0 "register_operand" "x")
1644 (parallel [(const_int 0)]))
1646 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1647 (parallel [(const_int 0)]))))]
1648 "SSE_FLOAT_MODE_P (<MODE>mode)"
1649 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1650 [(set_attr "type" "ssecomi")
1651 (set_attr "prefix" "maybe_vex")
1652 (set_attr "prefix_rep" "0")
1653 (set (attr "prefix_data16")
1654 (if_then_else (eq_attr "mode" "DF")
1656 (const_string "0")))
1657 (set_attr "mode" "<MODE>")])
1659 (define_insn "<sse>_ucomi"
1660 [(set (reg:CCFPU FLAGS_REG)
1663 (match_operand:<ssevecmode> 0 "register_operand" "x")
1664 (parallel [(const_int 0)]))
1666 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1667 (parallel [(const_int 0)]))))]
1668 "SSE_FLOAT_MODE_P (<MODE>mode)"
1669 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1670 [(set_attr "type" "ssecomi")
1671 (set_attr "prefix" "maybe_vex")
1672 (set_attr "prefix_rep" "0")
1673 (set (attr "prefix_data16")
1674 (if_then_else (eq_attr "mode" "DF")
1676 (const_string "0")))
1677 (set_attr "mode" "<MODE>")])
1679 (define_expand "vcond<V_256:mode><VF_256:mode>"
1680 [(set (match_operand:V_256 0 "register_operand")
1682 (match_operator 3 ""
1683 [(match_operand:VF_256 4 "nonimmediate_operand")
1684 (match_operand:VF_256 5 "nonimmediate_operand")])
1685 (match_operand:V_256 1 "general_operand")
1686 (match_operand:V_256 2 "general_operand")))]
1688 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1689 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1691 bool ok = ix86_expand_fp_vcond (operands);
1696 (define_expand "vcond<V_128:mode><VF_128:mode>"
1697 [(set (match_operand:V_128 0 "register_operand")
1699 (match_operator 3 ""
1700 [(match_operand:VF_128 4 "nonimmediate_operand")
1701 (match_operand:VF_128 5 "nonimmediate_operand")])
1702 (match_operand:V_128 1 "general_operand")
1703 (match_operand:V_128 2 "general_operand")))]
1705 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1706 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1708 bool ok = ix86_expand_fp_vcond (operands);
1713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1715 ;; Parallel floating point logical operations
1717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1719 (define_insn "<sse>_andnot<mode>3"
1720 [(set (match_operand:VF 0 "register_operand" "=x,x")
1723 (match_operand:VF 1 "register_operand" "0,x"))
1724 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1727 static char buf[32];
1731 switch (get_attr_mode (insn))
1738 suffix = "<ssemodesuffix>";
1741 switch (which_alternative)
1744 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1747 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1753 snprintf (buf, sizeof (buf), ops, suffix);
1756 [(set_attr "isa" "noavx,avx")
1757 (set_attr "type" "sselog")
1758 (set_attr "prefix" "orig,vex")
1760 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1761 (const_string "<ssePSmode>")
1762 (match_test "TARGET_AVX")
1763 (const_string "<MODE>")
1764 (match_test "optimize_function_for_size_p (cfun)")
1765 (const_string "V4SF")
1767 (const_string "<MODE>")))])
1769 (define_expand "<code><mode>3"
1770 [(set (match_operand:VF 0 "register_operand")
1772 (match_operand:VF 1 "nonimmediate_operand")
1773 (match_operand:VF 2 "nonimmediate_operand")))]
1775 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1777 (define_insn "*<code><mode>3"
1778 [(set (match_operand:VF 0 "register_operand" "=x,x")
1780 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1781 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1782 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1784 static char buf[32];
1788 switch (get_attr_mode (insn))
1795 suffix = "<ssemodesuffix>";
1798 switch (which_alternative)
1801 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1804 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1810 snprintf (buf, sizeof (buf), ops, suffix);
1813 [(set_attr "isa" "noavx,avx")
1814 (set_attr "type" "sselog")
1815 (set_attr "prefix" "orig,vex")
1817 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1818 (const_string "<ssePSmode>")
1819 (match_test "TARGET_AVX")
1820 (const_string "<MODE>")
1821 (match_test "optimize_function_for_size_p (cfun)")
1822 (const_string "V4SF")
1824 (const_string "<MODE>")))])
1826 (define_expand "copysign<mode>3"
1829 (not:VF (match_dup 3))
1830 (match_operand:VF 1 "nonimmediate_operand")))
1832 (and:VF (match_dup 3)
1833 (match_operand:VF 2 "nonimmediate_operand")))
1834 (set (match_operand:VF 0 "register_operand")
1835 (ior:VF (match_dup 4) (match_dup 5)))]
1838 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1840 operands[4] = gen_reg_rtx (<MODE>mode);
1841 operands[5] = gen_reg_rtx (<MODE>mode);
1844 ;; Also define scalar versions. These are used for abs, neg, and
1845 ;; conditional move. Using subregs into vector modes causes register
1846 ;; allocation lossage. These patterns do not allow memory operands
1847 ;; because the native instructions read the full 128-bits.
1849 (define_insn "*andnot<mode>3"
1850 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1853 (match_operand:MODEF 1 "register_operand" "0,x"))
1854 (match_operand:MODEF 2 "register_operand" "x,x")))]
1855 "SSE_FLOAT_MODE_P (<MODE>mode)"
1857 static char buf[32];
1860 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
1862 switch (which_alternative)
1865 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1868 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1874 snprintf (buf, sizeof (buf), ops, suffix);
1877 [(set_attr "isa" "noavx,avx")
1878 (set_attr "type" "sselog")
1879 (set_attr "prefix" "orig,vex")
1881 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1882 (const_string "V4SF")
1883 (match_test "TARGET_AVX")
1884 (const_string "<ssevecmode>")
1885 (match_test "optimize_function_for_size_p (cfun)")
1886 (const_string "V4SF")
1888 (const_string "<ssevecmode>")))])
1890 (define_insn "*andnottf3"
1891 [(set (match_operand:TF 0 "register_operand" "=x,x")
1893 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
1894 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
1897 static char buf[32];
1900 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
1902 switch (which_alternative)
1905 ops = "%s\t{%%2, %%0|%%0, %%2}";
1908 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1914 snprintf (buf, sizeof (buf), ops, tmp);
1917 [(set_attr "isa" "noavx,avx")
1918 (set_attr "type" "sselog")
1919 (set (attr "prefix_data16")
1921 (and (eq_attr "alternative" "0")
1922 (eq_attr "mode" "TI"))
1924 (const_string "*")))
1925 (set_attr "prefix" "orig,vex")
1927 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1928 (const_string "V4SF")
1929 (match_test "TARGET_AVX")
1931 (ior (not (match_test "TARGET_SSE2"))
1932 (match_test "optimize_function_for_size_p (cfun)"))
1933 (const_string "V4SF")
1935 (const_string "TI")))])
1937 (define_insn "*<code><mode>3"
1938 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1940 (match_operand:MODEF 1 "register_operand" "%0,x")
1941 (match_operand:MODEF 2 "register_operand" "x,x")))]
1942 "SSE_FLOAT_MODE_P (<MODE>mode)"
1944 static char buf[32];
1947 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
1949 switch (which_alternative)
1952 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1955 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1961 snprintf (buf, sizeof (buf), ops, suffix);
1964 [(set_attr "isa" "noavx,avx")
1965 (set_attr "type" "sselog")
1966 (set_attr "prefix" "orig,vex")
1968 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1969 (const_string "V4SF")
1970 (match_test "TARGET_AVX")
1971 (const_string "<ssevecmode>")
1972 (match_test "optimize_function_for_size_p (cfun)")
1973 (const_string "V4SF")
1975 (const_string "<ssevecmode>")))])
1977 (define_expand "<code>tf3"
1978 [(set (match_operand:TF 0 "register_operand")
1980 (match_operand:TF 1 "nonimmediate_operand")
1981 (match_operand:TF 2 "nonimmediate_operand")))]
1983 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
1985 (define_insn "*<code>tf3"
1986 [(set (match_operand:TF 0 "register_operand" "=x,x")
1988 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
1989 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
1991 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
1993 static char buf[32];
1996 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
1998 switch (which_alternative)
2001 ops = "%s\t{%%2, %%0|%%0, %%2}";
2004 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2010 snprintf (buf, sizeof (buf), ops, tmp);
2013 [(set_attr "isa" "noavx,avx")
2014 (set_attr "type" "sselog")
2015 (set (attr "prefix_data16")
2017 (and (eq_attr "alternative" "0")
2018 (eq_attr "mode" "TI"))
2020 (const_string "*")))
2021 (set_attr "prefix" "orig,vex")
2023 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2024 (const_string "V4SF")
2025 (match_test "TARGET_AVX")
2027 (ior (not (match_test "TARGET_SSE2"))
2028 (match_test "optimize_function_for_size_p (cfun)"))
2029 (const_string "V4SF")
2031 (const_string "TI")))])
2033 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2035 ;; FMA floating point multiply/accumulate instructions. These include
2036 ;; scalar versions of the instructions as well as vector versions.
2038 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2040 ;; The standard names for scalar FMA are only available with SSE math enabled.
2041 (define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
2042 (DF "TARGET_SSE_MATH")
2043 V4SF V2DF V8SF V4DF])
2045 (define_expand "fma<mode>4"
2046 [(set (match_operand:FMAMODEM 0 "register_operand")
2048 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2049 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2050 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2051 "TARGET_FMA || TARGET_FMA4")
2053 (define_expand "fms<mode>4"
2054 [(set (match_operand:FMAMODEM 0 "register_operand")
2056 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2057 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2058 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2059 "TARGET_FMA || TARGET_FMA4")
2061 (define_expand "fnma<mode>4"
2062 [(set (match_operand:FMAMODEM 0 "register_operand")
2064 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2065 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2066 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2067 "TARGET_FMA || TARGET_FMA4")
2069 (define_expand "fnms<mode>4"
2070 [(set (match_operand:FMAMODEM 0 "register_operand")
2072 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2073 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2074 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2075 "TARGET_FMA || TARGET_FMA4")
2077 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2078 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
2080 (define_expand "fma4i_fmadd_<mode>"
2081 [(set (match_operand:FMAMODE 0 "register_operand")
2083 (match_operand:FMAMODE 1 "nonimmediate_operand")
2084 (match_operand:FMAMODE 2 "nonimmediate_operand")
2085 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2086 "TARGET_FMA || TARGET_FMA4")
2088 (define_insn "*fma_fmadd_<mode>"
2089 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2091 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
2092 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2093 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
2094 "TARGET_FMA || TARGET_FMA4"
2096 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2097 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2098 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2099 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2100 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2101 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2102 (set_attr "type" "ssemuladd")
2103 (set_attr "mode" "<MODE>")])
2105 (define_insn "*fma_fmsub_<mode>"
2106 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2108 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
2109 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2111 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
2112 "TARGET_FMA || TARGET_FMA4"
2114 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2115 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2116 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2117 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2118 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2119 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2120 (set_attr "type" "ssemuladd")
2121 (set_attr "mode" "<MODE>")])
2123 (define_insn "*fma_fnmadd_<mode>"
2124 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2127 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
2128 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2129 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
2130 "TARGET_FMA || TARGET_FMA4"
2132 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2133 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2134 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2135 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2136 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2137 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2138 (set_attr "type" "ssemuladd")
2139 (set_attr "mode" "<MODE>")])
2141 (define_insn "*fma_fnmsub_<mode>"
2142 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2145 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
2146 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2148 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
2149 "TARGET_FMA || TARGET_FMA4"
2151 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2152 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2153 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2154 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2155 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2156 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2157 (set_attr "type" "ssemuladd")
2158 (set_attr "mode" "<MODE>")])
2160 ;; FMA parallel floating point multiply addsub and subadd operations.
2162 ;; It would be possible to represent these without the UNSPEC as
2165 ;; (fma op1 op2 op3)
2166 ;; (fma op1 op2 (neg op3))
2169 ;; But this doesn't seem useful in practice.
2171 (define_expand "fmaddsub_<mode>"
2172 [(set (match_operand:VF 0 "register_operand")
2174 [(match_operand:VF 1 "nonimmediate_operand")
2175 (match_operand:VF 2 "nonimmediate_operand")
2176 (match_operand:VF 3 "nonimmediate_operand")]
2178 "TARGET_FMA || TARGET_FMA4")
2180 (define_insn "*fma_fmaddsub_<mode>"
2181 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
2183 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
2184 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
2185 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
2187 "TARGET_FMA || TARGET_FMA4"
2189 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2190 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2191 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2192 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2193 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2194 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2195 (set_attr "type" "ssemuladd")
2196 (set_attr "mode" "<MODE>")])
2198 (define_insn "*fma_fmsubadd_<mode>"
2199 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
2201 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
2202 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
2204 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
2206 "TARGET_FMA || TARGET_FMA4"
2208 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2209 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2210 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2211 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2212 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2213 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2214 (set_attr "type" "ssemuladd")
2215 (set_attr "mode" "<MODE>")])
2217 ;; FMA3 floating point scalar intrinsics. These merge result with
2218 ;; high-order elements from the destination register.
2220 (define_expand "fmai_vmfmadd_<mode>"
2221 [(set (match_operand:VF_128 0 "register_operand")
2224 (match_operand:VF_128 1 "nonimmediate_operand")
2225 (match_operand:VF_128 2 "nonimmediate_operand")
2226 (match_operand:VF_128 3 "nonimmediate_operand"))
2231 (define_insn "*fmai_fmadd_<mode>"
2232 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2235 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2236 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
2237 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
2242 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2243 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2244 [(set_attr "type" "ssemuladd")
2245 (set_attr "mode" "<MODE>")])
2247 (define_insn "*fmai_fmsub_<mode>"
2248 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2251 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2252 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
2254 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
2259 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2260 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2261 [(set_attr "type" "ssemuladd")
2262 (set_attr "mode" "<MODE>")])
2264 (define_insn "*fmai_fnmadd_<mode>"
2265 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2269 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
2270 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2271 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
2276 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2277 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2278 [(set_attr "type" "ssemuladd")
2279 (set_attr "mode" "<MODE>")])
2281 (define_insn "*fmai_fnmsub_<mode>"
2282 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2286 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
2287 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2289 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
2294 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
2295 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
2296 [(set_attr "type" "ssemuladd")
2297 (set_attr "mode" "<MODE>")])
2299 ;; FMA4 floating point scalar intrinsics. These write the
2300 ;; entire destination register, with the high-order elements zeroed.
2302 (define_expand "fma4i_vmfmadd_<mode>"
2303 [(set (match_operand:VF_128 0 "register_operand")
2306 (match_operand:VF_128 1 "nonimmediate_operand")
2307 (match_operand:VF_128 2 "nonimmediate_operand")
2308 (match_operand:VF_128 3 "nonimmediate_operand"))
2312 "operands[4] = CONST0_RTX (<MODE>mode);")
2314 (define_insn "*fma4i_vmfmadd_<mode>"
2315 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2318 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2319 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2320 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2321 (match_operand:VF_128 4 "const0_operand")
2324 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2325 [(set_attr "type" "ssemuladd")
2326 (set_attr "mode" "<MODE>")])
2328 (define_insn "*fma4i_vmfmsub_<mode>"
2329 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2332 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2333 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2335 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2336 (match_operand:VF_128 4 "const0_operand")
2339 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2340 [(set_attr "type" "ssemuladd")
2341 (set_attr "mode" "<MODE>")])
2343 (define_insn "*fma4i_vmfnmadd_<mode>"
2344 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2348 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2349 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2350 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2351 (match_operand:VF_128 4 "const0_operand")
2354 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2355 [(set_attr "type" "ssemuladd")
2356 (set_attr "mode" "<MODE>")])
2358 (define_insn "*fma4i_vmfnmsub_<mode>"
2359 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2363 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2364 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2366 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2367 (match_operand:VF_128 4 "const0_operand")
2370 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2371 [(set_attr "type" "ssemuladd")
2372 (set_attr "mode" "<MODE>")])
2374 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2376 ;; Parallel single-precision floating point conversion operations
2378 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2380 (define_insn "sse_cvtpi2ps"
2381 [(set (match_operand:V4SF 0 "register_operand" "=x")
2384 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2385 (match_operand:V4SF 1 "register_operand" "0")
2388 "cvtpi2ps\t{%2, %0|%0, %2}"
2389 [(set_attr "type" "ssecvt")
2390 (set_attr "mode" "V4SF")])
2392 (define_insn "sse_cvtps2pi"
2393 [(set (match_operand:V2SI 0 "register_operand" "=y")
2395 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2397 (parallel [(const_int 0) (const_int 1)])))]
2399 "cvtps2pi\t{%1, %0|%0, %1}"
2400 [(set_attr "type" "ssecvt")
2401 (set_attr "unit" "mmx")
2402 (set_attr "mode" "DI")])
2404 (define_insn "sse_cvttps2pi"
2405 [(set (match_operand:V2SI 0 "register_operand" "=y")
2407 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2408 (parallel [(const_int 0) (const_int 1)])))]
2410 "cvttps2pi\t{%1, %0|%0, %1}"
2411 [(set_attr "type" "ssecvt")
2412 (set_attr "unit" "mmx")
2413 (set_attr "prefix_rep" "0")
2414 (set_attr "mode" "SF")])
2416 (define_insn "sse_cvtsi2ss"
2417 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2420 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2421 (match_operand:V4SF 1 "register_operand" "0,0,x")
2425 cvtsi2ss\t{%2, %0|%0, %2}
2426 cvtsi2ss\t{%2, %0|%0, %2}
2427 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2428 [(set_attr "isa" "noavx,noavx,avx")
2429 (set_attr "type" "sseicvt")
2430 (set_attr "athlon_decode" "vector,double,*")
2431 (set_attr "amdfam10_decode" "vector,double,*")
2432 (set_attr "bdver1_decode" "double,direct,*")
2433 (set_attr "btver2_decode" "double,double,double")
2434 (set_attr "prefix" "orig,orig,vex")
2435 (set_attr "mode" "SF")])
2437 (define_insn "sse_cvtsi2ssq"
2438 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2441 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2442 (match_operand:V4SF 1 "register_operand" "0,0,x")
2444 "TARGET_SSE && TARGET_64BIT"
2446 cvtsi2ssq\t{%2, %0|%0, %2}
2447 cvtsi2ssq\t{%2, %0|%0, %2}
2448 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2449 [(set_attr "isa" "noavx,noavx,avx")
2450 (set_attr "type" "sseicvt")
2451 (set_attr "athlon_decode" "vector,double,*")
2452 (set_attr "amdfam10_decode" "vector,double,*")
2453 (set_attr "bdver1_decode" "double,direct,*")
2454 (set_attr "btver2_decode" "double,double,double")
2455 (set_attr "length_vex" "*,*,4")
2456 (set_attr "prefix_rex" "1,1,*")
2457 (set_attr "prefix" "orig,orig,vex")
2458 (set_attr "mode" "SF")])
2460 (define_insn "sse_cvtss2si"
2461 [(set (match_operand:SI 0 "register_operand" "=r,r")
2464 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2465 (parallel [(const_int 0)]))]
2466 UNSPEC_FIX_NOTRUNC))]
2468 "%vcvtss2si\t{%1, %0|%0, %1}"
2469 [(set_attr "type" "sseicvt")
2470 (set_attr "athlon_decode" "double,vector")
2471 (set_attr "bdver1_decode" "double,double")
2472 (set_attr "prefix_rep" "1")
2473 (set_attr "prefix" "maybe_vex")
2474 (set_attr "mode" "SI")])
2476 (define_insn "sse_cvtss2si_2"
2477 [(set (match_operand:SI 0 "register_operand" "=r,r")
2478 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2479 UNSPEC_FIX_NOTRUNC))]
2481 "%vcvtss2si\t{%1, %0|%0, %1}"
2482 [(set_attr "type" "sseicvt")
2483 (set_attr "athlon_decode" "double,vector")
2484 (set_attr "amdfam10_decode" "double,double")
2485 (set_attr "bdver1_decode" "double,double")
2486 (set_attr "prefix_rep" "1")
2487 (set_attr "prefix" "maybe_vex")
2488 (set_attr "mode" "SI")])
2490 (define_insn "sse_cvtss2siq"
2491 [(set (match_operand:DI 0 "register_operand" "=r,r")
2494 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2495 (parallel [(const_int 0)]))]
2496 UNSPEC_FIX_NOTRUNC))]
2497 "TARGET_SSE && TARGET_64BIT"
2498 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2499 [(set_attr "type" "sseicvt")
2500 (set_attr "athlon_decode" "double,vector")
2501 (set_attr "bdver1_decode" "double,double")
2502 (set_attr "prefix_rep" "1")
2503 (set_attr "prefix" "maybe_vex")
2504 (set_attr "mode" "DI")])
2506 (define_insn "sse_cvtss2siq_2"
2507 [(set (match_operand:DI 0 "register_operand" "=r,r")
2508 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2509 UNSPEC_FIX_NOTRUNC))]
2510 "TARGET_SSE && TARGET_64BIT"
2511 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2512 [(set_attr "type" "sseicvt")
2513 (set_attr "athlon_decode" "double,vector")
2514 (set_attr "amdfam10_decode" "double,double")
2515 (set_attr "bdver1_decode" "double,double")
2516 (set_attr "prefix_rep" "1")
2517 (set_attr "prefix" "maybe_vex")
2518 (set_attr "mode" "DI")])
2520 (define_insn "sse_cvttss2si"
2521 [(set (match_operand:SI 0 "register_operand" "=r,r")
2524 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2525 (parallel [(const_int 0)]))))]
2527 "%vcvttss2si\t{%1, %0|%0, %1}"
2528 [(set_attr "type" "sseicvt")
2529 (set_attr "athlon_decode" "double,vector")
2530 (set_attr "amdfam10_decode" "double,double")
2531 (set_attr "bdver1_decode" "double,double")
2532 (set_attr "prefix_rep" "1")
2533 (set_attr "prefix" "maybe_vex")
2534 (set_attr "mode" "SI")])
2536 (define_insn "sse_cvttss2siq"
2537 [(set (match_operand:DI 0 "register_operand" "=r,r")
2540 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2541 (parallel [(const_int 0)]))))]
2542 "TARGET_SSE && TARGET_64BIT"
2543 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2544 [(set_attr "type" "sseicvt")
2545 (set_attr "athlon_decode" "double,vector")
2546 (set_attr "amdfam10_decode" "double,double")
2547 (set_attr "bdver1_decode" "double,double")
2548 (set_attr "prefix_rep" "1")
2549 (set_attr "prefix" "maybe_vex")
2550 (set_attr "mode" "DI")])
2552 (define_insn "float<sseintvecmodelower><mode>2"
2553 [(set (match_operand:VF1 0 "register_operand" "=x")
2555 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2557 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2558 [(set_attr "type" "ssecvt")
2559 (set_attr "prefix" "maybe_vex")
2560 (set_attr "mode" "<sseinsnmode>")])
2562 (define_expand "floatuns<sseintvecmodelower><mode>2"
2563 [(match_operand:VF1 0 "register_operand")
2564 (match_operand:<sseintvecmode> 1 "register_operand")]
2565 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2567 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2571 (define_insn "avx_cvtps2dq256"
2572 [(set (match_operand:V8SI 0 "register_operand" "=x")
2573 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2574 UNSPEC_FIX_NOTRUNC))]
2576 "vcvtps2dq\t{%1, %0|%0, %1}"
2577 [(set_attr "type" "ssecvt")
2578 (set_attr "prefix" "vex")
2579 (set_attr "mode" "OI")])
2581 (define_insn "sse2_cvtps2dq"
2582 [(set (match_operand:V4SI 0 "register_operand" "=x")
2583 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2584 UNSPEC_FIX_NOTRUNC))]
2586 "%vcvtps2dq\t{%1, %0|%0, %1}"
2587 [(set_attr "type" "ssecvt")
2588 (set (attr "prefix_data16")
2590 (match_test "TARGET_AVX")
2592 (const_string "1")))
2593 (set_attr "prefix" "maybe_vex")
2594 (set_attr "mode" "TI")])
2596 (define_insn "fix_truncv8sfv8si2"
2597 [(set (match_operand:V8SI 0 "register_operand" "=x")
2598 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2600 "vcvttps2dq\t{%1, %0|%0, %1}"
2601 [(set_attr "type" "ssecvt")
2602 (set_attr "prefix" "vex")
2603 (set_attr "mode" "OI")])
2605 (define_insn "fix_truncv4sfv4si2"
2606 [(set (match_operand:V4SI 0 "register_operand" "=x")
2607 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2609 "%vcvttps2dq\t{%1, %0|%0, %1}"
2610 [(set_attr "type" "ssecvt")
2611 (set (attr "prefix_rep")
2613 (match_test "TARGET_AVX")
2615 (const_string "1")))
2616 (set (attr "prefix_data16")
2618 (match_test "TARGET_AVX")
2620 (const_string "0")))
2621 (set_attr "prefix_data16" "0")
2622 (set_attr "prefix" "maybe_vex")
2623 (set_attr "mode" "TI")])
2625 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2626 [(match_operand:<sseintvecmode> 0 "register_operand")
2627 (match_operand:VF1 1 "register_operand")]
2631 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2632 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2633 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2634 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2640 ;; Parallel double-precision floating point conversion operations
2642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2644 (define_insn "sse2_cvtpi2pd"
2645 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2646 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2648 "cvtpi2pd\t{%1, %0|%0, %1}"
2649 [(set_attr "type" "ssecvt")
2650 (set_attr "unit" "mmx,*")
2651 (set_attr "prefix_data16" "1,*")
2652 (set_attr "mode" "V2DF")])
2654 (define_insn "sse2_cvtpd2pi"
2655 [(set (match_operand:V2SI 0 "register_operand" "=y")
2656 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2657 UNSPEC_FIX_NOTRUNC))]
2659 "cvtpd2pi\t{%1, %0|%0, %1}"
2660 [(set_attr "type" "ssecvt")
2661 (set_attr "unit" "mmx")
2662 (set_attr "bdver1_decode" "double")
2663 (set_attr "btver2_decode" "direct")
2664 (set_attr "prefix_data16" "1")
2665 (set_attr "mode" "DI")])
2667 (define_insn "sse2_cvttpd2pi"
2668 [(set (match_operand:V2SI 0 "register_operand" "=y")
2669 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2671 "cvttpd2pi\t{%1, %0|%0, %1}"
2672 [(set_attr "type" "ssecvt")
2673 (set_attr "unit" "mmx")
2674 (set_attr "bdver1_decode" "double")
2675 (set_attr "prefix_data16" "1")
2676 (set_attr "mode" "TI")])
2678 (define_insn "sse2_cvtsi2sd"
2679 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2682 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2683 (match_operand:V2DF 1 "register_operand" "0,0,x")
2687 cvtsi2sd\t{%2, %0|%0, %2}
2688 cvtsi2sd\t{%2, %0|%0, %2}
2689 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2690 [(set_attr "isa" "noavx,noavx,avx")
2691 (set_attr "type" "sseicvt")
2692 (set_attr "athlon_decode" "double,direct,*")
2693 (set_attr "amdfam10_decode" "vector,double,*")
2694 (set_attr "bdver1_decode" "double,direct,*")
2695 (set_attr "btver2_decode" "double,double,double")
2696 (set_attr "prefix" "orig,orig,vex")
2697 (set_attr "mode" "DF")])
2699 (define_insn "sse2_cvtsi2sdq"
2700 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2703 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2704 (match_operand:V2DF 1 "register_operand" "0,0,x")
2706 "TARGET_SSE2 && TARGET_64BIT"
2708 cvtsi2sdq\t{%2, %0|%0, %2}
2709 cvtsi2sdq\t{%2, %0|%0, %2}
2710 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2711 [(set_attr "isa" "noavx,noavx,avx")
2712 (set_attr "type" "sseicvt")
2713 (set_attr "athlon_decode" "double,direct,*")
2714 (set_attr "amdfam10_decode" "vector,double,*")
2715 (set_attr "bdver1_decode" "double,direct,*")
2716 (set_attr "length_vex" "*,*,4")
2717 (set_attr "prefix_rex" "1,1,*")
2718 (set_attr "prefix" "orig,orig,vex")
2719 (set_attr "mode" "DF")])
2721 (define_insn "sse2_cvtsd2si"
2722 [(set (match_operand:SI 0 "register_operand" "=r,r")
2725 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2726 (parallel [(const_int 0)]))]
2727 UNSPEC_FIX_NOTRUNC))]
2729 "%vcvtsd2si\t{%1, %0|%0, %1}"
2730 [(set_attr "type" "sseicvt")
2731 (set_attr "athlon_decode" "double,vector")
2732 (set_attr "bdver1_decode" "double,double")
2733 (set_attr "btver2_decode" "double,double")
2734 (set_attr "prefix_rep" "1")
2735 (set_attr "prefix" "maybe_vex")
2736 (set_attr "mode" "SI")])
2738 (define_insn "sse2_cvtsd2si_2"
2739 [(set (match_operand:SI 0 "register_operand" "=r,r")
2740 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2741 UNSPEC_FIX_NOTRUNC))]
2743 "%vcvtsd2si\t{%1, %0|%0, %1}"
2744 [(set_attr "type" "sseicvt")
2745 (set_attr "athlon_decode" "double,vector")
2746 (set_attr "amdfam10_decode" "double,double")
2747 (set_attr "bdver1_decode" "double,double")
2748 (set_attr "prefix_rep" "1")
2749 (set_attr "prefix" "maybe_vex")
2750 (set_attr "mode" "SI")])
2752 (define_insn "sse2_cvtsd2siq"
2753 [(set (match_operand:DI 0 "register_operand" "=r,r")
2756 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2757 (parallel [(const_int 0)]))]
2758 UNSPEC_FIX_NOTRUNC))]
2759 "TARGET_SSE2 && TARGET_64BIT"
2760 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2761 [(set_attr "type" "sseicvt")
2762 (set_attr "athlon_decode" "double,vector")
2763 (set_attr "bdver1_decode" "double,double")
2764 (set_attr "prefix_rep" "1")
2765 (set_attr "prefix" "maybe_vex")
2766 (set_attr "mode" "DI")])
2768 (define_insn "sse2_cvtsd2siq_2"
2769 [(set (match_operand:DI 0 "register_operand" "=r,r")
2770 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2771 UNSPEC_FIX_NOTRUNC))]
2772 "TARGET_SSE2 && TARGET_64BIT"
2773 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2774 [(set_attr "type" "sseicvt")
2775 (set_attr "athlon_decode" "double,vector")
2776 (set_attr "amdfam10_decode" "double,double")
2777 (set_attr "bdver1_decode" "double,double")
2778 (set_attr "prefix_rep" "1")
2779 (set_attr "prefix" "maybe_vex")
2780 (set_attr "mode" "DI")])
2782 (define_insn "sse2_cvttsd2si"
2783 [(set (match_operand:SI 0 "register_operand" "=r,r")
2786 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2787 (parallel [(const_int 0)]))))]
2789 "%vcvttsd2si\t{%1, %0|%0, %1}"
2790 [(set_attr "type" "sseicvt")
2791 (set_attr "athlon_decode" "double,vector")
2792 (set_attr "amdfam10_decode" "double,double")
2793 (set_attr "bdver1_decode" "double,double")
2794 (set_attr "btver2_decode" "double,double")
2795 (set_attr "prefix_rep" "1")
2796 (set_attr "prefix" "maybe_vex")
2797 (set_attr "mode" "SI")])
2799 (define_insn "sse2_cvttsd2siq"
2800 [(set (match_operand:DI 0 "register_operand" "=r,r")
2803 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2804 (parallel [(const_int 0)]))))]
2805 "TARGET_SSE2 && TARGET_64BIT"
2806 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2807 [(set_attr "type" "sseicvt")
2808 (set_attr "athlon_decode" "double,vector")
2809 (set_attr "amdfam10_decode" "double,double")
2810 (set_attr "bdver1_decode" "double,double")
2811 (set_attr "prefix_rep" "1")
2812 (set_attr "prefix" "maybe_vex")
2813 (set_attr "mode" "DI")])
2815 (define_insn "floatv4siv4df2"
2816 [(set (match_operand:V4DF 0 "register_operand" "=x")
2817 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2819 "vcvtdq2pd\t{%1, %0|%0, %1}"
2820 [(set_attr "type" "ssecvt")
2821 (set_attr "prefix" "vex")
2822 (set_attr "mode" "V4DF")])
2824 (define_insn "avx_cvtdq2pd256_2"
2825 [(set (match_operand:V4DF 0 "register_operand" "=x")
2828 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2829 (parallel [(const_int 0) (const_int 1)
2830 (const_int 2) (const_int 3)]))))]
2832 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2833 [(set_attr "type" "ssecvt")
2834 (set_attr "prefix" "vex")
2835 (set_attr "mode" "V4DF")])
2837 (define_insn "sse2_cvtdq2pd"
2838 [(set (match_operand:V2DF 0 "register_operand" "=x")
2841 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2842 (parallel [(const_int 0) (const_int 1)]))))]
2844 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2845 [(set_attr "type" "ssecvt")
2846 (set_attr "prefix" "maybe_vex")
2847 (set_attr "mode" "V2DF")])
2849 (define_insn "avx_cvtpd2dq256"
2850 [(set (match_operand:V4SI 0 "register_operand" "=x")
2851 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2852 UNSPEC_FIX_NOTRUNC))]
2854 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2855 [(set_attr "type" "ssecvt")
2856 (set_attr "prefix" "vex")
2857 (set_attr "mode" "OI")])
2859 (define_expand "avx_cvtpd2dq256_2"
2860 [(set (match_operand:V8SI 0 "register_operand")
2862 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
2866 "operands[2] = CONST0_RTX (V4SImode);")
2868 (define_insn "*avx_cvtpd2dq256_2"
2869 [(set (match_operand:V8SI 0 "register_operand" "=x")
2871 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2873 (match_operand:V4SI 2 "const0_operand")))]
2875 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2876 [(set_attr "type" "ssecvt")
2877 (set_attr "prefix" "vex")
2878 (set_attr "btver2_decode" "vector")
2879 (set_attr "mode" "OI")])
2881 (define_expand "sse2_cvtpd2dq"
2882 [(set (match_operand:V4SI 0 "register_operand")
2884 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
2888 "operands[2] = CONST0_RTX (V2SImode);")
2890 (define_insn "*sse2_cvtpd2dq"
2891 [(set (match_operand:V4SI 0 "register_operand" "=x")
2893 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2895 (match_operand:V2SI 2 "const0_operand")))]
2899 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2901 return "cvtpd2dq\t{%1, %0|%0, %1}";
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "prefix_rep" "1")
2905 (set_attr "prefix_data16" "0")
2906 (set_attr "prefix" "maybe_vex")
2907 (set_attr "mode" "TI")
2908 (set_attr "amdfam10_decode" "double")
2909 (set_attr "athlon_decode" "vector")
2910 (set_attr "bdver1_decode" "double")])
2912 (define_insn "fix_truncv4dfv4si2"
2913 [(set (match_operand:V4SI 0 "register_operand" "=x")
2914 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2916 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2917 [(set_attr "type" "ssecvt")
2918 (set_attr "prefix" "vex")
2919 (set_attr "mode" "OI")])
2921 (define_expand "avx_cvttpd2dq256_2"
2922 [(set (match_operand:V8SI 0 "register_operand")
2924 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
2927 "operands[2] = CONST0_RTX (V4SImode);")
2929 (define_insn "*avx_cvttpd2dq256_2"
2930 [(set (match_operand:V8SI 0 "register_operand" "=x")
2932 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2933 (match_operand:V4SI 2 "const0_operand")))]
2935 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2936 [(set_attr "type" "ssecvt")
2937 (set_attr "prefix" "vex")
2938 (set_attr "btver2_decode" "vector")
2939 (set_attr "mode" "OI")])
2941 (define_expand "sse2_cvttpd2dq"
2942 [(set (match_operand:V4SI 0 "register_operand")
2944 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
2947 "operands[2] = CONST0_RTX (V2SImode);")
2949 (define_insn "*sse2_cvttpd2dq"
2950 [(set (match_operand:V4SI 0 "register_operand" "=x")
2952 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2953 (match_operand:V2SI 2 "const0_operand")))]
2957 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2959 return "cvttpd2dq\t{%1, %0|%0, %1}";
2961 [(set_attr "type" "ssecvt")
2962 (set_attr "amdfam10_decode" "double")
2963 (set_attr "athlon_decode" "vector")
2964 (set_attr "bdver1_decode" "double")
2965 (set_attr "prefix" "maybe_vex")
2966 (set_attr "mode" "TI")])
2968 (define_insn "sse2_cvtsd2ss"
2969 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2972 (float_truncate:V2SF
2973 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2974 (match_operand:V4SF 1 "register_operand" "0,0,x")
2978 cvtsd2ss\t{%2, %0|%0, %2}
2979 cvtsd2ss\t{%2, %0|%0, %2}
2980 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2981 [(set_attr "isa" "noavx,noavx,avx")
2982 (set_attr "type" "ssecvt")
2983 (set_attr "athlon_decode" "vector,double,*")
2984 (set_attr "amdfam10_decode" "vector,double,*")
2985 (set_attr "bdver1_decode" "direct,direct,*")
2986 (set_attr "btver2_decode" "double,double,double")
2987 (set_attr "prefix" "orig,orig,vex")
2988 (set_attr "mode" "SF")])
2990 (define_insn "sse2_cvtss2sd"
2991 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2995 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2996 (parallel [(const_int 0) (const_int 1)])))
2997 (match_operand:V2DF 1 "register_operand" "0,0,x")
3001 cvtss2sd\t{%2, %0|%0, %2}
3002 cvtss2sd\t{%2, %0|%0, %2}
3003 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3004 [(set_attr "isa" "noavx,noavx,avx")
3005 (set_attr "type" "ssecvt")
3006 (set_attr "amdfam10_decode" "vector,double,*")
3007 (set_attr "athlon_decode" "direct,direct,*")
3008 (set_attr "bdver1_decode" "direct,direct,*")
3009 (set_attr "btver2_decode" "double,double,double")
3010 (set_attr "prefix" "orig,orig,vex")
3011 (set_attr "mode" "DF")])
3013 (define_insn "avx_cvtpd2ps256"
3014 [(set (match_operand:V4SF 0 "register_operand" "=x")
3015 (float_truncate:V4SF
3016 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3018 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3019 [(set_attr "type" "ssecvt")
3020 (set_attr "prefix" "vex")
3021 (set_attr "btver2_decode" "vector")
3022 (set_attr "mode" "V4SF")])
3024 (define_expand "sse2_cvtpd2ps"
3025 [(set (match_operand:V4SF 0 "register_operand")
3027 (float_truncate:V2SF
3028 (match_operand:V2DF 1 "nonimmediate_operand"))
3031 "operands[2] = CONST0_RTX (V2SFmode);")
3033 (define_insn "*sse2_cvtpd2ps"
3034 [(set (match_operand:V4SF 0 "register_operand" "=x")
3036 (float_truncate:V2SF
3037 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3038 (match_operand:V2SF 2 "const0_operand")))]
3042 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
3044 return "cvtpd2ps\t{%1, %0|%0, %1}";
3046 [(set_attr "type" "ssecvt")
3047 (set_attr "amdfam10_decode" "double")
3048 (set_attr "athlon_decode" "vector")
3049 (set_attr "bdver1_decode" "double")
3050 (set_attr "prefix_data16" "1")
3051 (set_attr "prefix" "maybe_vex")
3052 (set_attr "mode" "V4SF")])
3054 (define_insn "avx_cvtps2pd256"
3055 [(set (match_operand:V4DF 0 "register_operand" "=x")
3057 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3059 "vcvtps2pd\t{%1, %0|%0, %1}"
3060 [(set_attr "type" "ssecvt")
3061 (set_attr "prefix" "vex")
3062 (set_attr "mode" "V4DF")])
3064 (define_insn "*avx_cvtps2pd256_2"
3065 [(set (match_operand:V4DF 0 "register_operand" "=x")
3068 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3069 (parallel [(const_int 0) (const_int 1)
3070 (const_int 2) (const_int 3)]))))]
3072 "vcvtps2pd\t{%x1, %0|%0, %x1}"
3073 [(set_attr "type" "ssecvt")
3074 (set_attr "prefix" "vex")
3075 (set_attr "mode" "V4DF")])
3077 (define_insn "sse2_cvtps2pd"
3078 [(set (match_operand:V2DF 0 "register_operand" "=x")
3081 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3082 (parallel [(const_int 0) (const_int 1)]))))]
3084 "%vcvtps2pd\t{%1, %0|%0, %q1}"
3085 [(set_attr "type" "ssecvt")
3086 (set_attr "amdfam10_decode" "direct")
3087 (set_attr "athlon_decode" "double")
3088 (set_attr "bdver1_decode" "double")
3089 (set_attr "prefix_data16" "0")
3090 (set_attr "prefix" "maybe_vex")
3091 (set_attr "mode" "V2DF")])
3093 (define_expand "vec_unpacks_hi_v4sf"
3098 (match_operand:V4SF 1 "nonimmediate_operand"))
3099 (parallel [(const_int 6) (const_int 7)
3100 (const_int 2) (const_int 3)])))
3101 (set (match_operand:V2DF 0 "register_operand")
3105 (parallel [(const_int 0) (const_int 1)]))))]
3107 "operands[2] = gen_reg_rtx (V4SFmode);")
3109 (define_expand "vec_unpacks_hi_v8sf"
3112 (match_operand:V8SF 1 "nonimmediate_operand")
3113 (parallel [(const_int 4) (const_int 5)
3114 (const_int 6) (const_int 7)])))
3115 (set (match_operand:V4DF 0 "register_operand")
3119 "operands[2] = gen_reg_rtx (V4SFmode);")
3121 (define_expand "vec_unpacks_lo_v4sf"
3122 [(set (match_operand:V2DF 0 "register_operand")
3125 (match_operand:V4SF 1 "nonimmediate_operand")
3126 (parallel [(const_int 0) (const_int 1)]))))]
3129 (define_expand "vec_unpacks_lo_v8sf"
3130 [(set (match_operand:V4DF 0 "register_operand")
3133 (match_operand:V8SF 1 "nonimmediate_operand")
3134 (parallel [(const_int 0) (const_int 1)
3135 (const_int 2) (const_int 3)]))))]
3138 (define_mode_attr sseunpackfltmode
3139 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
3141 (define_expand "vec_unpacks_float_hi_<mode>"
3142 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3143 (match_operand:VI2_AVX2 1 "register_operand")]
3146 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3148 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
3149 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3150 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3154 (define_expand "vec_unpacks_float_lo_<mode>"
3155 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3156 (match_operand:VI2_AVX2 1 "register_operand")]
3159 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3161 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
3162 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3163 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3167 (define_expand "vec_unpacku_float_hi_<mode>"
3168 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3169 (match_operand:VI2_AVX2 1 "register_operand")]
3172 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3174 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
3175 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3176 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3180 (define_expand "vec_unpacku_float_lo_<mode>"
3181 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3182 (match_operand:VI2_AVX2 1 "register_operand")]
3185 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3187 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
3188 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3189 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3193 (define_expand "vec_unpacks_float_hi_v4si"
3196 (match_operand:V4SI 1 "nonimmediate_operand")
3197 (parallel [(const_int 2) (const_int 3)
3198 (const_int 2) (const_int 3)])))
3199 (set (match_operand:V2DF 0 "register_operand")
3203 (parallel [(const_int 0) (const_int 1)]))))]
3205 "operands[2] = gen_reg_rtx (V4SImode);")
3207 (define_expand "vec_unpacks_float_lo_v4si"
3208 [(set (match_operand:V2DF 0 "register_operand")
3211 (match_operand:V4SI 1 "nonimmediate_operand")
3212 (parallel [(const_int 0) (const_int 1)]))))]
3215 (define_expand "vec_unpacks_float_hi_v8si"
3218 (match_operand:V8SI 1 "nonimmediate_operand")
3219 (parallel [(const_int 4) (const_int 5)
3220 (const_int 6) (const_int 7)])))
3221 (set (match_operand:V4DF 0 "register_operand")
3225 "operands[2] = gen_reg_rtx (V4SImode);")
3227 (define_expand "vec_unpacks_float_lo_v8si"
3228 [(set (match_operand:V4DF 0 "register_operand")
3231 (match_operand:V8SI 1 "nonimmediate_operand")
3232 (parallel [(const_int 0) (const_int 1)
3233 (const_int 2) (const_int 3)]))))]
3236 (define_expand "vec_unpacku_float_hi_v4si"
3239 (match_operand:V4SI 1 "nonimmediate_operand")
3240 (parallel [(const_int 2) (const_int 3)
3241 (const_int 2) (const_int 3)])))
3246 (parallel [(const_int 0) (const_int 1)]))))
3248 (lt:V2DF (match_dup 6) (match_dup 3)))
3250 (and:V2DF (match_dup 7) (match_dup 4)))
3251 (set (match_operand:V2DF 0 "register_operand")
3252 (plus:V2DF (match_dup 6) (match_dup 8)))]
3255 REAL_VALUE_TYPE TWO32r;
3259 real_ldexp (&TWO32r, &dconst1, 32);
3260 x = const_double_from_real_value (TWO32r, DFmode);
3262 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3263 operands[4] = force_reg (V2DFmode,
3264 ix86_build_const_vector (V2DFmode, 1, x));
3266 operands[5] = gen_reg_rtx (V4SImode);
3268 for (i = 6; i < 9; i++)
3269 operands[i] = gen_reg_rtx (V2DFmode);
3272 (define_expand "vec_unpacku_float_lo_v4si"
3276 (match_operand:V4SI 1 "nonimmediate_operand")
3277 (parallel [(const_int 0) (const_int 1)]))))
3279 (lt:V2DF (match_dup 5) (match_dup 3)))
3281 (and:V2DF (match_dup 6) (match_dup 4)))
3282 (set (match_operand:V2DF 0 "register_operand")
3283 (plus:V2DF (match_dup 5) (match_dup 7)))]
3286 REAL_VALUE_TYPE TWO32r;
3290 real_ldexp (&TWO32r, &dconst1, 32);
3291 x = const_double_from_real_value (TWO32r, DFmode);
3293 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3294 operands[4] = force_reg (V2DFmode,
3295 ix86_build_const_vector (V2DFmode, 1, x));
3297 for (i = 5; i < 8; i++)
3298 operands[i] = gen_reg_rtx (V2DFmode);
3301 (define_expand "vec_unpacku_float_hi_v8si"
3302 [(match_operand:V4DF 0 "register_operand")
3303 (match_operand:V8SI 1 "register_operand")]
3306 REAL_VALUE_TYPE TWO32r;
3310 real_ldexp (&TWO32r, &dconst1, 32);
3311 x = const_double_from_real_value (TWO32r, DFmode);
3313 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3314 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3315 tmp[5] = gen_reg_rtx (V4SImode);
3317 for (i = 2; i < 5; i++)
3318 tmp[i] = gen_reg_rtx (V4DFmode);
3319 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3320 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3321 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3322 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3323 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3324 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3328 (define_expand "vec_unpacku_float_lo_v8si"
3329 [(match_operand:V4DF 0 "register_operand")
3330 (match_operand:V8SI 1 "nonimmediate_operand")]
3333 REAL_VALUE_TYPE TWO32r;
3337 real_ldexp (&TWO32r, &dconst1, 32);
3338 x = const_double_from_real_value (TWO32r, DFmode);
3340 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3341 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3343 for (i = 2; i < 5; i++)
3344 tmp[i] = gen_reg_rtx (V4DFmode);
3345 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3346 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3347 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3348 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3349 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3353 (define_expand "vec_pack_trunc_v4df"
3355 (float_truncate:V4SF
3356 (match_operand:V4DF 1 "nonimmediate_operand")))
3358 (float_truncate:V4SF
3359 (match_operand:V4DF 2 "nonimmediate_operand")))
3360 (set (match_operand:V8SF 0 "register_operand")
3366 operands[3] = gen_reg_rtx (V4SFmode);
3367 operands[4] = gen_reg_rtx (V4SFmode);
3370 (define_expand "vec_pack_trunc_v2df"
3371 [(match_operand:V4SF 0 "register_operand")
3372 (match_operand:V2DF 1 "nonimmediate_operand")
3373 (match_operand:V2DF 2 "nonimmediate_operand")]
3378 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3380 tmp0 = gen_reg_rtx (V4DFmode);
3381 tmp1 = force_reg (V2DFmode, operands[1]);
3383 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3384 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3388 tmp0 = gen_reg_rtx (V4SFmode);
3389 tmp1 = gen_reg_rtx (V4SFmode);
3391 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3392 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3393 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3398 (define_expand "vec_pack_sfix_trunc_v4df"
3399 [(match_operand:V8SI 0 "register_operand")
3400 (match_operand:V4DF 1 "nonimmediate_operand")
3401 (match_operand:V4DF 2 "nonimmediate_operand")]
3406 r1 = gen_reg_rtx (V4SImode);
3407 r2 = gen_reg_rtx (V4SImode);
3409 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3410 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3411 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3415 (define_expand "vec_pack_sfix_trunc_v2df"
3416 [(match_operand:V4SI 0 "register_operand")
3417 (match_operand:V2DF 1 "nonimmediate_operand")
3418 (match_operand:V2DF 2 "nonimmediate_operand")]
3423 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3425 tmp0 = gen_reg_rtx (V4DFmode);
3426 tmp1 = force_reg (V2DFmode, operands[1]);
3428 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3429 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3433 tmp0 = gen_reg_rtx (V4SImode);
3434 tmp1 = gen_reg_rtx (V4SImode);
3436 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3437 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3439 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3440 gen_lowpart (V2DImode, tmp0),
3441 gen_lowpart (V2DImode, tmp1)));
3446 (define_mode_attr ssepackfltmode
3447 [(V4DF "V8SI") (V2DF "V4SI")])
3449 (define_expand "vec_pack_ufix_trunc_<mode>"
3450 [(match_operand:<ssepackfltmode> 0 "register_operand")
3451 (match_operand:VF2 1 "register_operand")
3452 (match_operand:VF2 2 "register_operand")]
3456 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3457 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3458 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3459 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3460 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3462 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3463 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3467 tmp[5] = gen_reg_rtx (V8SFmode);
3468 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3469 gen_lowpart (V8SFmode, tmp[3]), 0);
3470 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3472 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3473 operands[0], 0, OPTAB_DIRECT);
3474 if (tmp[6] != operands[0])
3475 emit_move_insn (operands[0], tmp[6]);
3479 (define_expand "vec_pack_sfix_v4df"
3480 [(match_operand:V8SI 0 "register_operand")
3481 (match_operand:V4DF 1 "nonimmediate_operand")
3482 (match_operand:V4DF 2 "nonimmediate_operand")]
3487 r1 = gen_reg_rtx (V4SImode);
3488 r2 = gen_reg_rtx (V4SImode);
3490 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3491 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3492 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3496 (define_expand "vec_pack_sfix_v2df"
3497 [(match_operand:V4SI 0 "register_operand")
3498 (match_operand:V2DF 1 "nonimmediate_operand")
3499 (match_operand:V2DF 2 "nonimmediate_operand")]
3504 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3506 tmp0 = gen_reg_rtx (V4DFmode);
3507 tmp1 = force_reg (V2DFmode, operands[1]);
3509 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3510 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3514 tmp0 = gen_reg_rtx (V4SImode);
3515 tmp1 = gen_reg_rtx (V4SImode);
3517 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3518 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3520 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3521 gen_lowpart (V2DImode, tmp0),
3522 gen_lowpart (V2DImode, tmp1)));
3527 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3529 ;; Parallel single-precision floating point element swizzling
3531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3533 (define_expand "sse_movhlps_exp"
3534 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3537 (match_operand:V4SF 1 "nonimmediate_operand")
3538 (match_operand:V4SF 2 "nonimmediate_operand"))
3539 (parallel [(const_int 6)
3545 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3547 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3549 /* Fix up the destination if needed. */
3550 if (dst != operands[0])
3551 emit_move_insn (operands[0], dst);
3556 (define_insn "sse_movhlps"
3557 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3560 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3561 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3562 (parallel [(const_int 6)
3566 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3568 movhlps\t{%2, %0|%0, %2}
3569 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3570 movlps\t{%H2, %0|%0, %H2}
3571 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3572 %vmovhps\t{%2, %0|%0, %2}"
3573 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3574 (set_attr "type" "ssemov")
3575 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3576 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3578 (define_expand "sse_movlhps_exp"
3579 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3582 (match_operand:V4SF 1 "nonimmediate_operand")
3583 (match_operand:V4SF 2 "nonimmediate_operand"))
3584 (parallel [(const_int 0)
3590 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3592 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3594 /* Fix up the destination if needed. */
3595 if (dst != operands[0])
3596 emit_move_insn (operands[0], dst);
3601 (define_insn "sse_movlhps"
3602 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3605 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3606 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3607 (parallel [(const_int 0)
3611 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3613 movlhps\t{%2, %0|%0, %2}
3614 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3615 movhps\t{%2, %0|%0, %2}
3616 vmovhps\t{%2, %1, %0|%0, %1, %2}
3617 %vmovlps\t{%2, %H0|%H0, %2}"
3618 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3619 (set_attr "type" "ssemov")
3620 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3621 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3623 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3624 (define_insn "avx_unpckhps256"
3625 [(set (match_operand:V8SF 0 "register_operand" "=x")
3628 (match_operand:V8SF 1 "register_operand" "x")
3629 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3630 (parallel [(const_int 2) (const_int 10)
3631 (const_int 3) (const_int 11)
3632 (const_int 6) (const_int 14)
3633 (const_int 7) (const_int 15)])))]
3635 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3636 [(set_attr "type" "sselog")
3637 (set_attr "prefix" "vex")
3638 (set_attr "mode" "V8SF")])
3640 (define_expand "vec_interleave_highv8sf"
3644 (match_operand:V8SF 1 "register_operand" "x")
3645 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3646 (parallel [(const_int 0) (const_int 8)
3647 (const_int 1) (const_int 9)
3648 (const_int 4) (const_int 12)
3649 (const_int 5) (const_int 13)])))
3655 (parallel [(const_int 2) (const_int 10)
3656 (const_int 3) (const_int 11)
3657 (const_int 6) (const_int 14)
3658 (const_int 7) (const_int 15)])))
3659 (set (match_operand:V8SF 0 "register_operand")
3664 (parallel [(const_int 4) (const_int 5)
3665 (const_int 6) (const_int 7)
3666 (const_int 12) (const_int 13)
3667 (const_int 14) (const_int 15)])))]
3670 operands[3] = gen_reg_rtx (V8SFmode);
3671 operands[4] = gen_reg_rtx (V8SFmode);
3674 (define_insn "vec_interleave_highv4sf"
3675 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3678 (match_operand:V4SF 1 "register_operand" "0,x")
3679 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3680 (parallel [(const_int 2) (const_int 6)
3681 (const_int 3) (const_int 7)])))]
3684 unpckhps\t{%2, %0|%0, %2}
3685 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3686 [(set_attr "isa" "noavx,avx")
3687 (set_attr "type" "sselog")
3688 (set_attr "prefix" "orig,vex")
3689 (set_attr "mode" "V4SF")])
3691 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3692 (define_insn "avx_unpcklps256"
3693 [(set (match_operand:V8SF 0 "register_operand" "=x")
3696 (match_operand:V8SF 1 "register_operand" "x")
3697 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3698 (parallel [(const_int 0) (const_int 8)
3699 (const_int 1) (const_int 9)
3700 (const_int 4) (const_int 12)
3701 (const_int 5) (const_int 13)])))]
3703 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3704 [(set_attr "type" "sselog")
3705 (set_attr "prefix" "vex")
3706 (set_attr "mode" "V8SF")])
3708 (define_expand "vec_interleave_lowv8sf"
3712 (match_operand:V8SF 1 "register_operand" "x")
3713 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3714 (parallel [(const_int 0) (const_int 8)
3715 (const_int 1) (const_int 9)
3716 (const_int 4) (const_int 12)
3717 (const_int 5) (const_int 13)])))
3723 (parallel [(const_int 2) (const_int 10)
3724 (const_int 3) (const_int 11)
3725 (const_int 6) (const_int 14)
3726 (const_int 7) (const_int 15)])))
3727 (set (match_operand:V8SF 0 "register_operand")
3732 (parallel [(const_int 0) (const_int 1)
3733 (const_int 2) (const_int 3)
3734 (const_int 8) (const_int 9)
3735 (const_int 10) (const_int 11)])))]
3738 operands[3] = gen_reg_rtx (V8SFmode);
3739 operands[4] = gen_reg_rtx (V8SFmode);
3742 (define_insn "vec_interleave_lowv4sf"
3743 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3746 (match_operand:V4SF 1 "register_operand" "0,x")
3747 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3748 (parallel [(const_int 0) (const_int 4)
3749 (const_int 1) (const_int 5)])))]
3752 unpcklps\t{%2, %0|%0, %2}
3753 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3754 [(set_attr "isa" "noavx,avx")
3755 (set_attr "type" "sselog")
3756 (set_attr "prefix" "orig,vex")
3757 (set_attr "mode" "V4SF")])
3759 ;; These are modeled with the same vec_concat as the others so that we
3760 ;; capture users of shufps that can use the new instructions
3761 (define_insn "avx_movshdup256"
3762 [(set (match_operand:V8SF 0 "register_operand" "=x")
3765 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3767 (parallel [(const_int 1) (const_int 1)
3768 (const_int 3) (const_int 3)
3769 (const_int 5) (const_int 5)
3770 (const_int 7) (const_int 7)])))]
3772 "vmovshdup\t{%1, %0|%0, %1}"
3773 [(set_attr "type" "sse")
3774 (set_attr "prefix" "vex")
3775 (set_attr "mode" "V8SF")])
3777 (define_insn "sse3_movshdup"
3778 [(set (match_operand:V4SF 0 "register_operand" "=x")
3781 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3783 (parallel [(const_int 1)
3788 "%vmovshdup\t{%1, %0|%0, %1}"
3789 [(set_attr "type" "sse")
3790 (set_attr "prefix_rep" "1")
3791 (set_attr "prefix" "maybe_vex")
3792 (set_attr "mode" "V4SF")])
3794 (define_insn "avx_movsldup256"
3795 [(set (match_operand:V8SF 0 "register_operand" "=x")
3798 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3800 (parallel [(const_int 0) (const_int 0)
3801 (const_int 2) (const_int 2)
3802 (const_int 4) (const_int 4)
3803 (const_int 6) (const_int 6)])))]
3805 "vmovsldup\t{%1, %0|%0, %1}"
3806 [(set_attr "type" "sse")
3807 (set_attr "prefix" "vex")
3808 (set_attr "mode" "V8SF")])
3810 (define_insn "sse3_movsldup"
3811 [(set (match_operand:V4SF 0 "register_operand" "=x")
3814 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3816 (parallel [(const_int 0)
3821 "%vmovsldup\t{%1, %0|%0, %1}"
3822 [(set_attr "type" "sse")
3823 (set_attr "prefix_rep" "1")
3824 (set_attr "prefix" "maybe_vex")
3825 (set_attr "mode" "V4SF")])
3827 (define_expand "avx_shufps256"
3828 [(match_operand:V8SF 0 "register_operand")
3829 (match_operand:V8SF 1 "register_operand")
3830 (match_operand:V8SF 2 "nonimmediate_operand")
3831 (match_operand:SI 3 "const_int_operand")]
3834 int mask = INTVAL (operands[3]);
3835 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3836 GEN_INT ((mask >> 0) & 3),
3837 GEN_INT ((mask >> 2) & 3),
3838 GEN_INT (((mask >> 4) & 3) + 8),
3839 GEN_INT (((mask >> 6) & 3) + 8),
3840 GEN_INT (((mask >> 0) & 3) + 4),
3841 GEN_INT (((mask >> 2) & 3) + 4),
3842 GEN_INT (((mask >> 4) & 3) + 12),
3843 GEN_INT (((mask >> 6) & 3) + 12)));
3847 ;; One bit in mask selects 2 elements.
3848 (define_insn "avx_shufps256_1"
3849 [(set (match_operand:V8SF 0 "register_operand" "=x")
3852 (match_operand:V8SF 1 "register_operand" "x")
3853 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3854 (parallel [(match_operand 3 "const_0_to_3_operand" )
3855 (match_operand 4 "const_0_to_3_operand" )
3856 (match_operand 5 "const_8_to_11_operand" )
3857 (match_operand 6 "const_8_to_11_operand" )
3858 (match_operand 7 "const_4_to_7_operand" )
3859 (match_operand 8 "const_4_to_7_operand" )
3860 (match_operand 9 "const_12_to_15_operand")
3861 (match_operand 10 "const_12_to_15_operand")])))]
3863 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3864 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3865 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3866 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3869 mask = INTVAL (operands[3]);
3870 mask |= INTVAL (operands[4]) << 2;
3871 mask |= (INTVAL (operands[5]) - 8) << 4;
3872 mask |= (INTVAL (operands[6]) - 8) << 6;
3873 operands[3] = GEN_INT (mask);
3875 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3877 [(set_attr "type" "sseshuf")
3878 (set_attr "length_immediate" "1")
3879 (set_attr "prefix" "vex")
3880 (set_attr "mode" "V8SF")])
3882 (define_expand "sse_shufps"
3883 [(match_operand:V4SF 0 "register_operand")
3884 (match_operand:V4SF 1 "register_operand")
3885 (match_operand:V4SF 2 "nonimmediate_operand")
3886 (match_operand:SI 3 "const_int_operand")]
3889 int mask = INTVAL (operands[3]);
3890 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3891 GEN_INT ((mask >> 0) & 3),
3892 GEN_INT ((mask >> 2) & 3),
3893 GEN_INT (((mask >> 4) & 3) + 4),
3894 GEN_INT (((mask >> 6) & 3) + 4)));
3898 (define_insn "sse_shufps_<mode>"
3899 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3900 (vec_select:VI4F_128
3901 (vec_concat:<ssedoublevecmode>
3902 (match_operand:VI4F_128 1 "register_operand" "0,x")
3903 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3904 (parallel [(match_operand 3 "const_0_to_3_operand")
3905 (match_operand 4 "const_0_to_3_operand")
3906 (match_operand 5 "const_4_to_7_operand")
3907 (match_operand 6 "const_4_to_7_operand")])))]
3911 mask |= INTVAL (operands[3]) << 0;
3912 mask |= INTVAL (operands[4]) << 2;
3913 mask |= (INTVAL (operands[5]) - 4) << 4;
3914 mask |= (INTVAL (operands[6]) - 4) << 6;
3915 operands[3] = GEN_INT (mask);
3917 switch (which_alternative)
3920 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3922 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3927 [(set_attr "isa" "noavx,avx")
3928 (set_attr "type" "sseshuf")
3929 (set_attr "length_immediate" "1")
3930 (set_attr "prefix" "orig,vex")
3931 (set_attr "mode" "V4SF")])
3933 (define_insn "sse_storehps"
3934 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3936 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3937 (parallel [(const_int 2) (const_int 3)])))]
3940 %vmovhps\t{%1, %0|%0, %1}
3941 %vmovhlps\t{%1, %d0|%d0, %1}
3942 %vmovlps\t{%H1, %d0|%d0, %H1}"
3943 [(set_attr "type" "ssemov")
3944 (set_attr "prefix" "maybe_vex")
3945 (set_attr "mode" "V2SF,V4SF,V2SF")])
3947 (define_expand "sse_loadhps_exp"
3948 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3951 (match_operand:V4SF 1 "nonimmediate_operand")
3952 (parallel [(const_int 0) (const_int 1)]))
3953 (match_operand:V2SF 2 "nonimmediate_operand")))]
3956 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3958 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3960 /* Fix up the destination if needed. */
3961 if (dst != operands[0])
3962 emit_move_insn (operands[0], dst);
3967 (define_insn "sse_loadhps"
3968 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3971 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3972 (parallel [(const_int 0) (const_int 1)]))
3973 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3976 movhps\t{%2, %0|%0, %2}
3977 vmovhps\t{%2, %1, %0|%0, %1, %2}
3978 movlhps\t{%2, %0|%0, %2}
3979 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3980 %vmovlps\t{%2, %H0|%H0, %2}"
3981 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3982 (set_attr "type" "ssemov")
3983 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3984 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3986 (define_insn "sse_storelps"
3987 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3989 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3990 (parallel [(const_int 0) (const_int 1)])))]
3993 %vmovlps\t{%1, %0|%0, %1}
3994 %vmovaps\t{%1, %0|%0, %1}
3995 %vmovlps\t{%1, %d0|%d0, %1}"
3996 [(set_attr "type" "ssemov")
3997 (set_attr "prefix" "maybe_vex")
3998 (set_attr "mode" "V2SF,V4SF,V2SF")])
4000 (define_expand "sse_loadlps_exp"
4001 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4003 (match_operand:V2SF 2 "nonimmediate_operand")
4005 (match_operand:V4SF 1 "nonimmediate_operand")
4006 (parallel [(const_int 2) (const_int 3)]))))]
4009 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4011 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
4013 /* Fix up the destination if needed. */
4014 if (dst != operands[0])
4015 emit_move_insn (operands[0], dst);
4020 (define_insn "sse_loadlps"
4021 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4023 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
4025 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
4026 (parallel [(const_int 2) (const_int 3)]))))]
4029 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
4030 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
4031 movlps\t{%2, %0|%0, %2}
4032 vmovlps\t{%2, %1, %0|%0, %1, %2}
4033 %vmovlps\t{%2, %0|%0, %2}"
4034 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4035 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
4036 (set_attr "length_immediate" "1,1,*,*,*")
4037 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4038 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4040 (define_insn "sse_movss"
4041 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4043 (match_operand:V4SF 2 "register_operand" " x,x")
4044 (match_operand:V4SF 1 "register_operand" " 0,x")
4048 movss\t{%2, %0|%0, %2}
4049 vmovss\t{%2, %1, %0|%0, %1, %2}"
4050 [(set_attr "isa" "noavx,avx")
4051 (set_attr "type" "ssemov")
4052 (set_attr "prefix" "orig,vex")
4053 (set_attr "mode" "SF")])
4055 (define_insn "avx2_vec_dup<mode>"
4056 [(set (match_operand:VF1 0 "register_operand" "=x")
4059 (match_operand:V4SF 1 "register_operand" "x")
4060 (parallel [(const_int 0)]))))]
4062 "vbroadcastss\t{%1, %0|%0, %1}"
4063 [(set_attr "type" "sselog1")
4064 (set_attr "prefix" "vex")
4065 (set_attr "mode" "<MODE>")])
4067 (define_insn "avx2_vec_dupv8sf_1"
4068 [(set (match_operand:V8SF 0 "register_operand" "=x")
4071 (match_operand:V8SF 1 "register_operand" "x")
4072 (parallel [(const_int 0)]))))]
4074 "vbroadcastss\t{%x1, %0|%0, %x1}"
4075 [(set_attr "type" "sselog1")
4076 (set_attr "prefix" "vex")
4077 (set_attr "mode" "V8SF")])
4079 (define_insn "vec_dupv4sf"
4080 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
4082 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
4085 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
4086 vbroadcastss\t{%1, %0|%0, %1}
4087 shufps\t{$0, %0, %0|%0, %0, 0}"
4088 [(set_attr "isa" "avx,avx,noavx")
4089 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
4090 (set_attr "length_immediate" "1,0,1")
4091 (set_attr "prefix_extra" "0,1,*")
4092 (set_attr "prefix" "vex,vex,orig")
4093 (set_attr "mode" "V4SF")])
4095 ;; Although insertps takes register source, we prefer
4096 ;; unpcklps with register source since it is shorter.
4097 (define_insn "*vec_concatv2sf_sse4_1"
4098 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
4100 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
4101 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
4104 unpcklps\t{%2, %0|%0, %2}
4105 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4106 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4107 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4108 %vmovss\t{%1, %0|%0, %1}
4109 punpckldq\t{%2, %0|%0, %2}
4110 movd\t{%1, %0|%0, %1}"
4111 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4112 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
4113 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4114 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
4115 (set_attr "length_immediate" "*,*,1,1,*,*,*")
4116 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4117 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
4119 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4120 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4121 ;; alternatives pretty much forces the MMX alternative to be chosen.
4122 (define_insn "*vec_concatv2sf_sse"
4123 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4125 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4126 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4129 unpcklps\t{%2, %0|%0, %2}
4130 movss\t{%1, %0|%0, %1}
4131 punpckldq\t{%2, %0|%0, %2}
4132 movd\t{%1, %0|%0, %1}"
4133 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4134 (set_attr "mode" "V4SF,SF,DI,DI")])
4136 (define_insn "*vec_concatv4sf"
4137 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
4139 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
4140 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
4143 movlhps\t{%2, %0|%0, %2}
4144 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4145 movhps\t{%2, %0|%0, %2}
4146 vmovhps\t{%2, %1, %0|%0, %1, %2}"
4147 [(set_attr "isa" "noavx,avx,noavx,avx")
4148 (set_attr "type" "ssemov")
4149 (set_attr "prefix" "orig,vex,orig,vex")
4150 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
4152 (define_expand "vec_init<mode>"
4153 [(match_operand:V_128 0 "register_operand")
4157 ix86_expand_vector_init (false, operands[0], operands[1]);
4161 ;; Avoid combining registers from different units in a single alternative,
4162 ;; see comment above inline_secondary_memory_needed function in i386.c
4163 (define_insn "vec_set<mode>_0"
4164 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
4165 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
4167 (vec_duplicate:VI4F_128
4168 (match_operand:<ssescalarmode> 2 "general_operand"
4169 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
4170 (match_operand:VI4F_128 1 "vector_move_operand"
4171 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
4175 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
4176 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4177 %vmovd\t{%2, %0|%0, %2}
4178 movss\t{%2, %0|%0, %2}
4179 movss\t{%2, %0|%0, %2}
4180 vmovss\t{%2, %1, %0|%0, %1, %2}
4181 pinsrd\t{$0, %2, %0|%0, %2, 0}
4182 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4186 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
4188 (cond [(eq_attr "alternative" "0,6,7")
4189 (const_string "sselog")
4190 (eq_attr "alternative" "9")
4191 (const_string "imov")
4192 (eq_attr "alternative" "10")
4193 (const_string "fmov")
4195 (const_string "ssemov")))
4196 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
4197 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
4198 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
4199 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
4201 ;; A subset is vec_setv4sf.
4202 (define_insn "*vec_setv4sf_sse4_1"
4203 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4206 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
4207 (match_operand:V4SF 1 "register_operand" "0,x")
4208 (match_operand:SI 3 "const_int_operand")))]
4210 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4211 < GET_MODE_NUNITS (V4SFmode))"
4213 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4214 switch (which_alternative)
4217 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4219 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4224 [(set_attr "isa" "noavx,avx")
4225 (set_attr "type" "sselog")
4226 (set_attr "prefix_data16" "1,*")
4227 (set_attr "prefix_extra" "1")
4228 (set_attr "length_immediate" "1")
4229 (set_attr "prefix" "orig,vex")
4230 (set_attr "mode" "V4SF")])
4232 (define_insn "sse4_1_insertps"
4233 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4234 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
4235 (match_operand:V4SF 1 "register_operand" "0,x")
4236 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
4240 if (MEM_P (operands[2]))
4242 unsigned count_s = INTVAL (operands[3]) >> 6;
4244 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
4245 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
4247 switch (which_alternative)
4250 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4252 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4257 [(set_attr "isa" "noavx,avx")
4258 (set_attr "type" "sselog")
4259 (set_attr "prefix_data16" "1,*")
4260 (set_attr "prefix_extra" "1")
4261 (set_attr "length_immediate" "1")
4262 (set_attr "prefix" "orig,vex")
4263 (set_attr "mode" "V4SF")])
4266 [(set (match_operand:VI4F_128 0 "memory_operand")
4268 (vec_duplicate:VI4F_128
4269 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
4272 "TARGET_SSE && reload_completed"
4275 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4280 (define_expand "vec_set<mode>"
4281 [(match_operand:V 0 "register_operand")
4282 (match_operand:<ssescalarmode> 1 "register_operand")
4283 (match_operand 2 "const_int_operand")]
4286 ix86_expand_vector_set (false, operands[0], operands[1],
4287 INTVAL (operands[2]));
4291 (define_insn_and_split "*vec_extractv4sf_0"
4292 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4294 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4295 (parallel [(const_int 0)])))]
4296 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4298 "&& reload_completed"
4301 rtx op1 = operands[1];
4303 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4305 op1 = gen_lowpart (SFmode, op1);
4306 emit_move_insn (operands[0], op1);
4310 (define_insn_and_split "*sse4_1_extractps"
4311 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4313 (match_operand:V4SF 1 "register_operand" "x,0,x")
4314 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4317 %vextractps\t{%2, %1, %0|%0, %1, %2}
4320 "&& reload_completed && SSE_REG_P (operands[0])"
4323 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4324 switch (INTVAL (operands[2]))
4328 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4329 operands[2], operands[2],
4330 GEN_INT (INTVAL (operands[2]) + 4),
4331 GEN_INT (INTVAL (operands[2]) + 4)));
4334 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4337 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4342 [(set_attr "isa" "*,noavx,avx")
4343 (set_attr "type" "sselog,*,*")
4344 (set_attr "prefix_data16" "1,*,*")
4345 (set_attr "prefix_extra" "1,*,*")
4346 (set_attr "length_immediate" "1,*,*")
4347 (set_attr "prefix" "maybe_vex,*,*")
4348 (set_attr "mode" "V4SF,*,*")])
4350 (define_insn_and_split "*vec_extract_v4sf_mem"
4351 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4353 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4354 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4357 "&& reload_completed"
4360 int i = INTVAL (operands[2]);
4362 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4366 (define_expand "avx_vextractf128<mode>"
4367 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
4368 (match_operand:V_256 1 "register_operand")
4369 (match_operand:SI 2 "const_0_to_1_operand")]
4372 rtx (*insn)(rtx, rtx);
4374 switch (INTVAL (operands[2]))
4377 insn = gen_vec_extract_lo_<mode>;
4380 insn = gen_vec_extract_hi_<mode>;
4386 emit_insn (insn (operands[0], operands[1]));
4390 (define_insn_and_split "vec_extract_lo_<mode>"
4391 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4392 (vec_select:<ssehalfvecmode>
4393 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4394 (parallel [(const_int 0) (const_int 1)])))]
4395 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4397 "&& reload_completed"
4400 rtx op1 = operands[1];
4402 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4404 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4405 emit_move_insn (operands[0], op1);
4409 (define_insn "vec_extract_hi_<mode>"
4410 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4411 (vec_select:<ssehalfvecmode>
4412 (match_operand:VI8F_256 1 "register_operand" "x,x")
4413 (parallel [(const_int 2) (const_int 3)])))]
4415 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4416 [(set_attr "type" "sselog")
4417 (set_attr "prefix_extra" "1")
4418 (set_attr "length_immediate" "1")
4419 (set_attr "memory" "none,store")
4420 (set_attr "prefix" "vex")
4421 (set_attr "mode" "<sseinsnmode>")])
4423 (define_insn_and_split "vec_extract_lo_<mode>"
4424 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4425 (vec_select:<ssehalfvecmode>
4426 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4427 (parallel [(const_int 0) (const_int 1)
4428 (const_int 2) (const_int 3)])))]
4429 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4431 "&& reload_completed"
4434 rtx op1 = operands[1];
4436 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4438 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4439 emit_move_insn (operands[0], op1);
4443 (define_insn "vec_extract_hi_<mode>"
4444 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4445 (vec_select:<ssehalfvecmode>
4446 (match_operand:VI4F_256 1 "register_operand" "x,x")
4447 (parallel [(const_int 4) (const_int 5)
4448 (const_int 6) (const_int 7)])))]
4450 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4451 [(set_attr "type" "sselog")
4452 (set_attr "prefix_extra" "1")
4453 (set_attr "length_immediate" "1")
4454 (set_attr "memory" "none,store")
4455 (set_attr "prefix" "vex")
4456 (set_attr "mode" "<sseinsnmode>")])
4458 (define_insn_and_split "vec_extract_lo_v16hi"
4459 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4461 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4462 (parallel [(const_int 0) (const_int 1)
4463 (const_int 2) (const_int 3)
4464 (const_int 4) (const_int 5)
4465 (const_int 6) (const_int 7)])))]
4466 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4468 "&& reload_completed"
4471 rtx op1 = operands[1];
4473 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4475 op1 = gen_lowpart (V8HImode, op1);
4476 emit_move_insn (operands[0], op1);
4480 (define_insn "vec_extract_hi_v16hi"
4481 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4483 (match_operand:V16HI 1 "register_operand" "x,x")
4484 (parallel [(const_int 8) (const_int 9)
4485 (const_int 10) (const_int 11)
4486 (const_int 12) (const_int 13)
4487 (const_int 14) (const_int 15)])))]
4489 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4490 [(set_attr "type" "sselog")
4491 (set_attr "prefix_extra" "1")
4492 (set_attr "length_immediate" "1")
4493 (set_attr "memory" "none,store")
4494 (set_attr "prefix" "vex")
4495 (set_attr "mode" "OI")])
4497 (define_insn_and_split "vec_extract_lo_v32qi"
4498 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4500 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4501 (parallel [(const_int 0) (const_int 1)
4502 (const_int 2) (const_int 3)
4503 (const_int 4) (const_int 5)
4504 (const_int 6) (const_int 7)
4505 (const_int 8) (const_int 9)
4506 (const_int 10) (const_int 11)
4507 (const_int 12) (const_int 13)
4508 (const_int 14) (const_int 15)])))]
4509 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4511 "&& reload_completed"
4514 rtx op1 = operands[1];
4516 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4518 op1 = gen_lowpart (V16QImode, op1);
4519 emit_move_insn (operands[0], op1);
4523 (define_insn "vec_extract_hi_v32qi"
4524 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4526 (match_operand:V32QI 1 "register_operand" "x,x")
4527 (parallel [(const_int 16) (const_int 17)
4528 (const_int 18) (const_int 19)
4529 (const_int 20) (const_int 21)
4530 (const_int 22) (const_int 23)
4531 (const_int 24) (const_int 25)
4532 (const_int 26) (const_int 27)
4533 (const_int 28) (const_int 29)
4534 (const_int 30) (const_int 31)])))]
4536 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4537 [(set_attr "type" "sselog")
4538 (set_attr "prefix_extra" "1")
4539 (set_attr "length_immediate" "1")
4540 (set_attr "memory" "none,store")
4541 (set_attr "prefix" "vex")
4542 (set_attr "mode" "OI")])
4544 ;; Modes handled by vec_extract patterns.
4545 (define_mode_iterator VEC_EXTRACT_MODE
4546 [(V32QI "TARGET_AVX") V16QI
4547 (V16HI "TARGET_AVX") V8HI
4548 (V8SI "TARGET_AVX") V4SI
4549 (V4DI "TARGET_AVX") V2DI
4550 (V8SF "TARGET_AVX") V4SF
4551 (V4DF "TARGET_AVX") V2DF])
4553 (define_expand "vec_extract<mode>"
4554 [(match_operand:<ssescalarmode> 0 "register_operand")
4555 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
4556 (match_operand 2 "const_int_operand")]
4559 ix86_expand_vector_extract (false, operands[0], operands[1],
4560 INTVAL (operands[2]));
4564 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4566 ;; Parallel double-precision floating point element swizzling
4568 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4570 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4571 (define_insn "avx_unpckhpd256"
4572 [(set (match_operand:V4DF 0 "register_operand" "=x")
4575 (match_operand:V4DF 1 "register_operand" "x")
4576 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4577 (parallel [(const_int 1) (const_int 5)
4578 (const_int 3) (const_int 7)])))]
4580 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4581 [(set_attr "type" "sselog")
4582 (set_attr "prefix" "vex")
4583 (set_attr "mode" "V4DF")])
4585 (define_expand "vec_interleave_highv4df"
4589 (match_operand:V4DF 1 "register_operand" "x")
4590 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4591 (parallel [(const_int 0) (const_int 4)
4592 (const_int 2) (const_int 6)])))
4598 (parallel [(const_int 1) (const_int 5)
4599 (const_int 3) (const_int 7)])))
4600 (set (match_operand:V4DF 0 "register_operand")
4605 (parallel [(const_int 2) (const_int 3)
4606 (const_int 6) (const_int 7)])))]
4609 operands[3] = gen_reg_rtx (V4DFmode);
4610 operands[4] = gen_reg_rtx (V4DFmode);
4614 (define_expand "vec_interleave_highv2df"
4615 [(set (match_operand:V2DF 0 "register_operand")
4618 (match_operand:V2DF 1 "nonimmediate_operand")
4619 (match_operand:V2DF 2 "nonimmediate_operand"))
4620 (parallel [(const_int 1)
4624 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4625 operands[2] = force_reg (V2DFmode, operands[2]);
4628 (define_insn "*vec_interleave_highv2df"
4629 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4632 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4633 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4634 (parallel [(const_int 1)
4636 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4638 unpckhpd\t{%2, %0|%0, %2}
4639 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4640 %vmovddup\t{%H1, %0|%0, %H1}
4641 movlpd\t{%H1, %0|%0, %H1}
4642 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4643 %vmovhpd\t{%1, %0|%0, %1}"
4644 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4645 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4646 (set_attr "prefix_data16" "*,*,*,1,*,1")
4647 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4648 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4650 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4651 (define_expand "avx_movddup256"
4652 [(set (match_operand:V4DF 0 "register_operand")
4655 (match_operand:V4DF 1 "nonimmediate_operand")
4657 (parallel [(const_int 0) (const_int 4)
4658 (const_int 2) (const_int 6)])))]
4661 (define_expand "avx_unpcklpd256"
4662 [(set (match_operand:V4DF 0 "register_operand")
4665 (match_operand:V4DF 1 "register_operand")
4666 (match_operand:V4DF 2 "nonimmediate_operand"))
4667 (parallel [(const_int 0) (const_int 4)
4668 (const_int 2) (const_int 6)])))]
4671 (define_insn "*avx_unpcklpd256"
4672 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4675 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4676 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4677 (parallel [(const_int 0) (const_int 4)
4678 (const_int 2) (const_int 6)])))]
4681 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4682 vmovddup\t{%1, %0|%0, %1}"
4683 [(set_attr "type" "sselog")
4684 (set_attr "prefix" "vex")
4685 (set_attr "mode" "V4DF")])
4687 (define_expand "vec_interleave_lowv4df"
4691 (match_operand:V4DF 1 "register_operand" "x")
4692 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4693 (parallel [(const_int 0) (const_int 4)
4694 (const_int 2) (const_int 6)])))
4700 (parallel [(const_int 1) (const_int 5)
4701 (const_int 3) (const_int 7)])))
4702 (set (match_operand:V4DF 0 "register_operand")
4707 (parallel [(const_int 0) (const_int 1)
4708 (const_int 4) (const_int 5)])))]
4711 operands[3] = gen_reg_rtx (V4DFmode);
4712 operands[4] = gen_reg_rtx (V4DFmode);
4715 (define_expand "vec_interleave_lowv2df"
4716 [(set (match_operand:V2DF 0 "register_operand")
4719 (match_operand:V2DF 1 "nonimmediate_operand")
4720 (match_operand:V2DF 2 "nonimmediate_operand"))
4721 (parallel [(const_int 0)
4725 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4726 operands[1] = force_reg (V2DFmode, operands[1]);
4729 (define_insn "*vec_interleave_lowv2df"
4730 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4733 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4734 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4735 (parallel [(const_int 0)
4737 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4739 unpcklpd\t{%2, %0|%0, %2}
4740 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4741 %vmovddup\t{%1, %0|%0, %1}
4742 movhpd\t{%2, %0|%0, %2}
4743 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4744 %vmovlpd\t{%2, %H0|%H0, %2}"
4745 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4746 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4747 (set_attr "prefix_data16" "*,*,*,1,*,1")
4748 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4749 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4752 [(set (match_operand:V2DF 0 "memory_operand")
4755 (match_operand:V2DF 1 "register_operand")
4757 (parallel [(const_int 0)
4759 "TARGET_SSE3 && reload_completed"
4762 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4763 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4764 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4769 [(set (match_operand:V2DF 0 "register_operand")
4772 (match_operand:V2DF 1 "memory_operand")
4774 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
4775 (match_operand:SI 3 "const_int_operand")])))]
4776 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4777 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4779 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4782 (define_expand "avx_shufpd256"
4783 [(match_operand:V4DF 0 "register_operand")
4784 (match_operand:V4DF 1 "register_operand")
4785 (match_operand:V4DF 2 "nonimmediate_operand")
4786 (match_operand:SI 3 "const_int_operand")]
4789 int mask = INTVAL (operands[3]);
4790 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4792 GEN_INT (mask & 2 ? 5 : 4),
4793 GEN_INT (mask & 4 ? 3 : 2),
4794 GEN_INT (mask & 8 ? 7 : 6)));
4798 (define_insn "avx_shufpd256_1"
4799 [(set (match_operand:V4DF 0 "register_operand" "=x")
4802 (match_operand:V4DF 1 "register_operand" "x")
4803 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4804 (parallel [(match_operand 3 "const_0_to_1_operand")
4805 (match_operand 4 "const_4_to_5_operand")
4806 (match_operand 5 "const_2_to_3_operand")
4807 (match_operand 6 "const_6_to_7_operand")])))]
4811 mask = INTVAL (operands[3]);
4812 mask |= (INTVAL (operands[4]) - 4) << 1;
4813 mask |= (INTVAL (operands[5]) - 2) << 2;
4814 mask |= (INTVAL (operands[6]) - 6) << 3;
4815 operands[3] = GEN_INT (mask);
4817 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4819 [(set_attr "type" "sseshuf")
4820 (set_attr "length_immediate" "1")
4821 (set_attr "prefix" "vex")
4822 (set_attr "mode" "V4DF")])
4824 (define_expand "sse2_shufpd"
4825 [(match_operand:V2DF 0 "register_operand")
4826 (match_operand:V2DF 1 "register_operand")
4827 (match_operand:V2DF 2 "nonimmediate_operand")
4828 (match_operand:SI 3 "const_int_operand")]
4831 int mask = INTVAL (operands[3]);
4832 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4834 GEN_INT (mask & 2 ? 3 : 2)));
4838 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4839 (define_insn "avx2_interleave_highv4di"
4840 [(set (match_operand:V4DI 0 "register_operand" "=x")
4843 (match_operand:V4DI 1 "register_operand" "x")
4844 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4845 (parallel [(const_int 1)
4850 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4851 [(set_attr "type" "sselog")
4852 (set_attr "prefix" "vex")
4853 (set_attr "mode" "OI")])
4855 (define_insn "vec_interleave_highv2di"
4856 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4859 (match_operand:V2DI 1 "register_operand" "0,x")
4860 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4861 (parallel [(const_int 1)
4865 punpckhqdq\t{%2, %0|%0, %2}
4866 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4867 [(set_attr "isa" "noavx,avx")
4868 (set_attr "type" "sselog")
4869 (set_attr "prefix_data16" "1,*")
4870 (set_attr "prefix" "orig,vex")
4871 (set_attr "mode" "TI")])
4873 (define_insn "avx2_interleave_lowv4di"
4874 [(set (match_operand:V4DI 0 "register_operand" "=x")
4877 (match_operand:V4DI 1 "register_operand" "x")
4878 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4879 (parallel [(const_int 0)
4884 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4885 [(set_attr "type" "sselog")
4886 (set_attr "prefix" "vex")
4887 (set_attr "mode" "OI")])
4889 (define_insn "vec_interleave_lowv2di"
4890 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4893 (match_operand:V2DI 1 "register_operand" "0,x")
4894 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4895 (parallel [(const_int 0)
4899 punpcklqdq\t{%2, %0|%0, %2}
4900 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4901 [(set_attr "isa" "noavx,avx")
4902 (set_attr "type" "sselog")
4903 (set_attr "prefix_data16" "1,*")
4904 (set_attr "prefix" "orig,vex")
4905 (set_attr "mode" "TI")])
4907 (define_insn "sse2_shufpd_<mode>"
4908 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4909 (vec_select:VI8F_128
4910 (vec_concat:<ssedoublevecmode>
4911 (match_operand:VI8F_128 1 "register_operand" "0,x")
4912 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4913 (parallel [(match_operand 3 "const_0_to_1_operand")
4914 (match_operand 4 "const_2_to_3_operand")])))]
4918 mask = INTVAL (operands[3]);
4919 mask |= (INTVAL (operands[4]) - 2) << 1;
4920 operands[3] = GEN_INT (mask);
4922 switch (which_alternative)
4925 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4927 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4932 [(set_attr "isa" "noavx,avx")
4933 (set_attr "type" "sseshuf")
4934 (set_attr "length_immediate" "1")
4935 (set_attr "prefix" "orig,vex")
4936 (set_attr "mode" "V2DF")])
4938 ;; Avoid combining registers from different units in a single alternative,
4939 ;; see comment above inline_secondary_memory_needed function in i386.c
4940 (define_insn "sse2_storehpd"
4941 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4943 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4944 (parallel [(const_int 1)])))]
4945 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4947 %vmovhpd\t{%1, %0|%0, %1}
4949 vunpckhpd\t{%d1, %0|%0, %d1}
4953 [(set_attr "isa" "*,noavx,avx,*,*,*")
4954 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4955 (set (attr "prefix_data16")
4957 (and (eq_attr "alternative" "0")
4958 (not (match_test "TARGET_AVX")))
4960 (const_string "*")))
4961 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4962 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4965 [(set (match_operand:DF 0 "register_operand")
4967 (match_operand:V2DF 1 "memory_operand")
4968 (parallel [(const_int 1)])))]
4969 "TARGET_SSE2 && reload_completed"
4970 [(set (match_dup 0) (match_dup 1))]
4971 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4973 (define_insn "*vec_extractv2df_1_sse"
4974 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4976 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4977 (parallel [(const_int 1)])))]
4978 "!TARGET_SSE2 && TARGET_SSE
4979 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4981 movhps\t{%1, %0|%0, %1}
4982 movhlps\t{%1, %0|%0, %1}
4983 movlps\t{%H1, %0|%0, %H1}"
4984 [(set_attr "type" "ssemov")
4985 (set_attr "mode" "V2SF,V4SF,V2SF")])
4987 ;; Avoid combining registers from different units in a single alternative,
4988 ;; see comment above inline_secondary_memory_needed function in i386.c
4989 (define_insn "sse2_storelpd"
4990 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4992 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4993 (parallel [(const_int 0)])))]
4994 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4996 %vmovlpd\t{%1, %0|%0, %1}
5001 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
5002 (set_attr "prefix_data16" "1,*,*,*,*")
5003 (set_attr "prefix" "maybe_vex")
5004 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5007 [(set (match_operand:DF 0 "register_operand")
5009 (match_operand:V2DF 1 "nonimmediate_operand")
5010 (parallel [(const_int 0)])))]
5011 "TARGET_SSE2 && reload_completed"
5014 rtx op1 = operands[1];
5016 op1 = gen_rtx_REG (DFmode, REGNO (op1));
5018 op1 = gen_lowpart (DFmode, op1);
5019 emit_move_insn (operands[0], op1);
5023 (define_insn "*vec_extractv2df_0_sse"
5024 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5026 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5027 (parallel [(const_int 0)])))]
5028 "!TARGET_SSE2 && TARGET_SSE
5029 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5031 movlps\t{%1, %0|%0, %1}
5032 movaps\t{%1, %0|%0, %1}
5033 movlps\t{%1, %0|%0, %1}"
5034 [(set_attr "type" "ssemov")
5035 (set_attr "mode" "V2SF,V4SF,V2SF")])
5037 (define_expand "sse2_loadhpd_exp"
5038 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5041 (match_operand:V2DF 1 "nonimmediate_operand")
5042 (parallel [(const_int 0)]))
5043 (match_operand:DF 2 "nonimmediate_operand")))]
5046 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5048 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
5050 /* Fix up the destination if needed. */
5051 if (dst != operands[0])
5052 emit_move_insn (operands[0], dst);
5057 ;; Avoid combining registers from different units in a single alternative,
5058 ;; see comment above inline_secondary_memory_needed function in i386.c
5059 (define_insn "sse2_loadhpd"
5060 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5064 (match_operand:V2DF 1 "nonimmediate_operand"
5066 (parallel [(const_int 0)]))
5067 (match_operand:DF 2 "nonimmediate_operand"
5068 " m,m,x,x,x,*f,r")))]
5069 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5071 movhpd\t{%2, %0|%0, %2}
5072 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5073 unpcklpd\t{%2, %0|%0, %2}
5074 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5078 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5079 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
5080 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
5081 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
5082 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
5085 [(set (match_operand:V2DF 0 "memory_operand")
5087 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5088 (match_operand:DF 1 "register_operand")))]
5089 "TARGET_SSE2 && reload_completed"
5090 [(set (match_dup 0) (match_dup 1))]
5091 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5093 (define_expand "sse2_loadlpd_exp"
5094 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5096 (match_operand:DF 2 "nonimmediate_operand")
5098 (match_operand:V2DF 1 "nonimmediate_operand")
5099 (parallel [(const_int 1)]))))]
5102 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5104 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5106 /* Fix up the destination if needed. */
5107 if (dst != operands[0])
5108 emit_move_insn (operands[0], dst);
5113 ;; Avoid combining registers from different units in a single alternative,
5114 ;; see comment above inline_secondary_memory_needed function in i386.c
5115 (define_insn "sse2_loadlpd"
5116 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5117 "=x,x,x,x,x,x,x,x,m,m ,m")
5119 (match_operand:DF 2 "nonimmediate_operand"
5120 " m,m,m,x,x,0,0,x,x,*f,r")
5122 (match_operand:V2DF 1 "vector_move_operand"
5123 " C,0,x,0,x,x,o,o,0,0 ,0")
5124 (parallel [(const_int 1)]))))]
5125 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5127 %vmovsd\t{%2, %0|%0, %2}
5128 movlpd\t{%2, %0|%0, %2}
5129 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5130 movsd\t{%2, %0|%0, %2}
5131 vmovsd\t{%2, %1, %0|%0, %1, %2}
5132 shufpd\t{$2, %1, %0|%0, %1, 2}
5133 movhpd\t{%H1, %0|%0, %H1}
5134 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5138 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
5140 (cond [(eq_attr "alternative" "5")
5141 (const_string "sselog")
5142 (eq_attr "alternative" "9")
5143 (const_string "fmov")
5144 (eq_attr "alternative" "10")
5145 (const_string "imov")
5147 (const_string "ssemov")))
5148 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
5149 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
5150 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
5151 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
5154 [(set (match_operand:V2DF 0 "memory_operand")
5156 (match_operand:DF 1 "register_operand")
5157 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5158 "TARGET_SSE2 && reload_completed"
5159 [(set (match_dup 0) (match_dup 1))]
5160 "operands[0] = adjust_address (operands[0], DFmode, 0);")
5162 (define_insn "sse2_movsd"
5163 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
5165 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
5166 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
5170 movsd\t{%2, %0|%0, %2}
5171 vmovsd\t{%2, %1, %0|%0, %1, %2}
5172 movlpd\t{%2, %0|%0, %2}
5173 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5174 %vmovlpd\t{%2, %0|%0, %2}
5175 shufpd\t{$2, %1, %0|%0, %1, 2}
5176 movhps\t{%H1, %0|%0, %H1}
5177 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5178 %vmovhps\t{%1, %H0|%H0, %1}"
5179 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
5182 (eq_attr "alternative" "5")
5183 (const_string "sselog")
5184 (const_string "ssemov")))
5185 (set (attr "prefix_data16")
5187 (and (eq_attr "alternative" "2,4")
5188 (not (match_test "TARGET_AVX")))
5190 (const_string "*")))
5191 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
5192 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
5193 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
5195 (define_insn "vec_dupv2df"
5196 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
5198 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
5202 %vmovddup\t{%1, %0|%0, %1}"
5203 [(set_attr "isa" "noavx,sse3")
5204 (set_attr "type" "sselog1")
5205 (set_attr "prefix" "orig,maybe_vex")
5206 (set_attr "mode" "V2DF,DF")])
5208 (define_insn "*vec_concatv2df"
5209 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
5211 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
5212 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
5215 unpcklpd\t{%2, %0|%0, %2}
5216 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5217 %vmovddup\t{%1, %0|%0, %1}
5218 movhpd\t{%2, %0|%0, %2}
5219 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5220 %vmovsd\t{%1, %0|%0, %1}
5221 movlhps\t{%2, %0|%0, %2}
5222 movhps\t{%2, %0|%0, %2}"
5223 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
5226 (eq_attr "alternative" "0,1,2")
5227 (const_string "sselog")
5228 (const_string "ssemov")))
5229 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
5230 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
5231 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
5233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5235 ;; Parallel integral arithmetic
5237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5239 (define_expand "neg<mode>2"
5240 [(set (match_operand:VI_AVX2 0 "register_operand")
5243 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
5245 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5247 (define_expand "<plusminus_insn><mode>3"
5248 [(set (match_operand:VI_AVX2 0 "register_operand")
5250 (match_operand:VI_AVX2 1 "nonimmediate_operand")
5251 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
5253 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5255 (define_insn "*<plusminus_insn><mode>3"
5256 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
5258 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5259 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5260 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5262 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5263 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5264 [(set_attr "isa" "noavx,avx")
5265 (set_attr "type" "sseiadd")
5266 (set_attr "prefix_data16" "1,*")
5267 (set_attr "prefix" "orig,vex")
5268 (set_attr "mode" "<sseinsnmode>")])
5270 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5271 [(set (match_operand:VI12_AVX2 0 "register_operand")
5272 (sat_plusminus:VI12_AVX2
5273 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
5274 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
5276 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5278 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5279 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5280 (sat_plusminus:VI12_AVX2
5281 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5282 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5283 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5285 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5286 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5287 [(set_attr "isa" "noavx,avx")
5288 (set_attr "type" "sseiadd")
5289 (set_attr "prefix_data16" "1,*")
5290 (set_attr "prefix" "orig,vex")
5291 (set_attr "mode" "TI")])
5293 (define_expand "mul<mode>3"
5294 [(set (match_operand:VI1_AVX2 0 "register_operand")
5295 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
5296 (match_operand:VI1_AVX2 2 "register_operand")))]
5299 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
5303 (define_expand "mul<mode>3"
5304 [(set (match_operand:VI2_AVX2 0 "register_operand")
5305 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
5306 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
5308 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5310 (define_insn "*mul<mode>3"
5311 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5312 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5313 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5314 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5316 pmullw\t{%2, %0|%0, %2}
5317 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5318 [(set_attr "isa" "noavx,avx")
5319 (set_attr "type" "sseimul")
5320 (set_attr "prefix_data16" "1,*")
5321 (set_attr "prefix" "orig,vex")
5322 (set_attr "mode" "<sseinsnmode>")])
5324 (define_expand "<s>mul<mode>3_highpart"
5325 [(set (match_operand:VI2_AVX2 0 "register_operand")
5327 (lshiftrt:<ssedoublemode>
5328 (mult:<ssedoublemode>
5329 (any_extend:<ssedoublemode>
5330 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
5331 (any_extend:<ssedoublemode>
5332 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
5335 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5337 (define_insn "*<s>mul<mode>3_highpart"
5338 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5340 (lshiftrt:<ssedoublemode>
5341 (mult:<ssedoublemode>
5342 (any_extend:<ssedoublemode>
5343 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5344 (any_extend:<ssedoublemode>
5345 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5347 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5349 pmulh<u>w\t{%2, %0|%0, %2}
5350 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5351 [(set_attr "isa" "noavx,avx")
5352 (set_attr "type" "sseimul")
5353 (set_attr "prefix_data16" "1,*")
5354 (set_attr "prefix" "orig,vex")
5355 (set_attr "mode" "<sseinsnmode>")])
5357 (define_expand "vec_widen_umult_even_v8si"
5358 [(set (match_operand:V4DI 0 "register_operand")
5362 (match_operand:V8SI 1 "nonimmediate_operand")
5363 (parallel [(const_int 0) (const_int 2)
5364 (const_int 4) (const_int 6)])))
5367 (match_operand:V8SI 2 "nonimmediate_operand")
5368 (parallel [(const_int 0) (const_int 2)
5369 (const_int 4) (const_int 6)])))))]
5371 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5373 (define_insn "*vec_widen_umult_even_v8si"
5374 [(set (match_operand:V4DI 0 "register_operand" "=x")
5378 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5379 (parallel [(const_int 0) (const_int 2)
5380 (const_int 4) (const_int 6)])))
5383 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5384 (parallel [(const_int 0) (const_int 2)
5385 (const_int 4) (const_int 6)])))))]
5386 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5387 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5388 [(set_attr "type" "sseimul")
5389 (set_attr "prefix" "vex")
5390 (set_attr "mode" "OI")])
5392 (define_expand "vec_widen_umult_even_v4si"
5393 [(set (match_operand:V2DI 0 "register_operand")
5397 (match_operand:V4SI 1 "nonimmediate_operand")
5398 (parallel [(const_int 0) (const_int 2)])))
5401 (match_operand:V4SI 2 "nonimmediate_operand")
5402 (parallel [(const_int 0) (const_int 2)])))))]
5404 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5406 (define_insn "*vec_widen_umult_even_v4si"
5407 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5411 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5412 (parallel [(const_int 0) (const_int 2)])))
5415 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5416 (parallel [(const_int 0) (const_int 2)])))))]
5417 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5419 pmuludq\t{%2, %0|%0, %2}
5420 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5421 [(set_attr "isa" "noavx,avx")
5422 (set_attr "type" "sseimul")
5423 (set_attr "prefix_data16" "1,*")
5424 (set_attr "prefix" "orig,vex")
5425 (set_attr "mode" "TI")])
5427 (define_expand "vec_widen_smult_even_v8si"
5428 [(set (match_operand:V4DI 0 "register_operand")
5432 (match_operand:V8SI 1 "nonimmediate_operand")
5433 (parallel [(const_int 0) (const_int 2)
5434 (const_int 4) (const_int 6)])))
5437 (match_operand:V8SI 2 "nonimmediate_operand")
5438 (parallel [(const_int 0) (const_int 2)
5439 (const_int 4) (const_int 6)])))))]
5441 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5443 (define_insn "*vec_widen_smult_even_v8si"
5444 [(set (match_operand:V4DI 0 "register_operand" "=x")
5448 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5449 (parallel [(const_int 0) (const_int 2)
5450 (const_int 4) (const_int 6)])))
5453 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5454 (parallel [(const_int 0) (const_int 2)
5455 (const_int 4) (const_int 6)])))))]
5456 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5457 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5458 [(set_attr "isa" "avx")
5459 (set_attr "type" "sseimul")
5460 (set_attr "prefix_extra" "1")
5461 (set_attr "prefix" "vex")
5462 (set_attr "mode" "OI")])
5464 (define_expand "sse4_1_mulv2siv2di3"
5465 [(set (match_operand:V2DI 0 "register_operand")
5469 (match_operand:V4SI 1 "nonimmediate_operand")
5470 (parallel [(const_int 0) (const_int 2)])))
5473 (match_operand:V4SI 2 "nonimmediate_operand")
5474 (parallel [(const_int 0) (const_int 2)])))))]
5476 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5478 (define_insn "*sse4_1_mulv2siv2di3"
5479 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5483 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5484 (parallel [(const_int 0) (const_int 2)])))
5487 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5488 (parallel [(const_int 0) (const_int 2)])))))]
5489 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5491 pmuldq\t{%2, %0|%0, %2}
5492 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5493 [(set_attr "isa" "noavx,avx")
5494 (set_attr "type" "sseimul")
5495 (set_attr "prefix_data16" "1,*")
5496 (set_attr "prefix_extra" "1")
5497 (set_attr "prefix" "orig,vex")
5498 (set_attr "mode" "TI")])
5500 (define_expand "avx2_pmaddwd"
5501 [(set (match_operand:V8SI 0 "register_operand")
5506 (match_operand:V16HI 1 "nonimmediate_operand")
5507 (parallel [(const_int 0) (const_int 2)
5508 (const_int 4) (const_int 6)
5509 (const_int 8) (const_int 10)
5510 (const_int 12) (const_int 14)])))
5513 (match_operand:V16HI 2 "nonimmediate_operand")
5514 (parallel [(const_int 0) (const_int 2)
5515 (const_int 4) (const_int 6)
5516 (const_int 8) (const_int 10)
5517 (const_int 12) (const_int 14)]))))
5520 (vec_select:V8HI (match_dup 1)
5521 (parallel [(const_int 1) (const_int 3)
5522 (const_int 5) (const_int 7)
5523 (const_int 9) (const_int 11)
5524 (const_int 13) (const_int 15)])))
5526 (vec_select:V8HI (match_dup 2)
5527 (parallel [(const_int 1) (const_int 3)
5528 (const_int 5) (const_int 7)
5529 (const_int 9) (const_int 11)
5530 (const_int 13) (const_int 15)]))))))]
5532 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5534 (define_insn "*avx2_pmaddwd"
5535 [(set (match_operand:V8SI 0 "register_operand" "=x")
5540 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5541 (parallel [(const_int 0) (const_int 2)
5542 (const_int 4) (const_int 6)
5543 (const_int 8) (const_int 10)
5544 (const_int 12) (const_int 14)])))
5547 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5548 (parallel [(const_int 0) (const_int 2)
5549 (const_int 4) (const_int 6)
5550 (const_int 8) (const_int 10)
5551 (const_int 12) (const_int 14)]))))
5554 (vec_select:V8HI (match_dup 1)
5555 (parallel [(const_int 1) (const_int 3)
5556 (const_int 5) (const_int 7)
5557 (const_int 9) (const_int 11)
5558 (const_int 13) (const_int 15)])))
5560 (vec_select:V8HI (match_dup 2)
5561 (parallel [(const_int 1) (const_int 3)
5562 (const_int 5) (const_int 7)
5563 (const_int 9) (const_int 11)
5564 (const_int 13) (const_int 15)]))))))]
5565 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5566 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5567 [(set_attr "type" "sseiadd")
5568 (set_attr "prefix" "vex")
5569 (set_attr "mode" "OI")])
5571 (define_expand "sse2_pmaddwd"
5572 [(set (match_operand:V4SI 0 "register_operand")
5577 (match_operand:V8HI 1 "nonimmediate_operand")
5578 (parallel [(const_int 0) (const_int 2)
5579 (const_int 4) (const_int 6)])))
5582 (match_operand:V8HI 2 "nonimmediate_operand")
5583 (parallel [(const_int 0) (const_int 2)
5584 (const_int 4) (const_int 6)]))))
5587 (vec_select:V4HI (match_dup 1)
5588 (parallel [(const_int 1) (const_int 3)
5589 (const_int 5) (const_int 7)])))
5591 (vec_select:V4HI (match_dup 2)
5592 (parallel [(const_int 1) (const_int 3)
5593 (const_int 5) (const_int 7)]))))))]
5595 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5597 (define_insn "*sse2_pmaddwd"
5598 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5603 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5604 (parallel [(const_int 0) (const_int 2)
5605 (const_int 4) (const_int 6)])))
5608 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5609 (parallel [(const_int 0) (const_int 2)
5610 (const_int 4) (const_int 6)]))))
5613 (vec_select:V4HI (match_dup 1)
5614 (parallel [(const_int 1) (const_int 3)
5615 (const_int 5) (const_int 7)])))
5617 (vec_select:V4HI (match_dup 2)
5618 (parallel [(const_int 1) (const_int 3)
5619 (const_int 5) (const_int 7)]))))))]
5620 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5622 pmaddwd\t{%2, %0|%0, %2}
5623 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5624 [(set_attr "isa" "noavx,avx")
5625 (set_attr "type" "sseiadd")
5626 (set_attr "atom_unit" "simul")
5627 (set_attr "prefix_data16" "1,*")
5628 (set_attr "prefix" "orig,vex")
5629 (set_attr "mode" "TI")])
5631 (define_expand "mul<mode>3"
5632 [(set (match_operand:VI4_AVX2 0 "register_operand")
5634 (match_operand:VI4_AVX2 1 "nonimmediate_operand")
5635 (match_operand:VI4_AVX2 2 "nonimmediate_operand")))]
5640 if (CONSTANT_P (operands[2]))
5641 operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[2]));
5642 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5646 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
5651 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5652 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5654 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5655 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5656 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5658 pmulld\t{%2, %0|%0, %2}
5659 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5660 [(set_attr "isa" "noavx,avx")
5661 (set_attr "type" "sseimul")
5662 (set_attr "prefix_extra" "1")
5663 (set_attr "prefix" "orig,vex")
5664 (set_attr "btver2_decode" "vector,vector")
5665 (set_attr "mode" "<sseinsnmode>")])
5667 (define_expand "mul<mode>3"
5668 [(set (match_operand:VI8_AVX2 0 "register_operand")
5669 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand")
5670 (match_operand:VI8_AVX2 2 "register_operand")))]
5673 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
5677 (define_expand "vec_widen_<s>mult_hi_<mode>"
5678 [(match_operand:<sseunpackmode> 0 "register_operand")
5679 (any_extend:<sseunpackmode>
5680 (match_operand:VI124_AVX2 1 "register_operand"))
5681 (match_operand:VI124_AVX2 2 "register_operand")]
5684 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5689 (define_expand "vec_widen_<s>mult_lo_<mode>"
5690 [(match_operand:<sseunpackmode> 0 "register_operand")
5691 (any_extend:<sseunpackmode>
5692 (match_operand:VI124_AVX2 1 "register_operand"))
5693 (match_operand:VI124_AVX2 2 "register_operand")]
5696 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5701 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
5702 ;; named patterns, but signed V4SI needs special help for plain SSE2.
5703 (define_expand "vec_widen_smult_even_v4si"
5704 [(match_operand:V2DI 0 "register_operand")
5705 (match_operand:V4SI 1 "register_operand")
5706 (match_operand:V4SI 2 "register_operand")]
5709 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5714 (define_expand "vec_widen_<s>mult_odd_<mode>"
5715 [(match_operand:<sseunpackmode> 0 "register_operand")
5716 (any_extend:<sseunpackmode>
5717 (match_operand:VI4_AVX2 1 "register_operand"))
5718 (match_operand:VI4_AVX2 2 "register_operand")]
5721 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5726 (define_expand "sdot_prod<mode>"
5727 [(match_operand:<sseunpackmode> 0 "register_operand")
5728 (match_operand:VI2_AVX2 1 "register_operand")
5729 (match_operand:VI2_AVX2 2 "register_operand")
5730 (match_operand:<sseunpackmode> 3 "register_operand")]
5733 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5734 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5735 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5736 gen_rtx_PLUS (<sseunpackmode>mode,
5741 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
5742 ;; back together when madd is available.
5743 (define_expand "sdot_prodv4si"
5744 [(match_operand:V2DI 0 "register_operand")
5745 (match_operand:V4SI 1 "register_operand")
5746 (match_operand:V4SI 2 "register_operand")
5747 (match_operand:V2DI 3 "register_operand")]
5750 rtx t = gen_reg_rtx (V2DImode);
5751 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
5752 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
5756 (define_insn "ashr<mode>3"
5757 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5759 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5760 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5763 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5764 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5765 [(set_attr "isa" "noavx,avx")
5766 (set_attr "type" "sseishft")
5767 (set (attr "length_immediate")
5768 (if_then_else (match_operand 2 "const_int_operand")
5770 (const_string "0")))
5771 (set_attr "prefix_data16" "1,*")
5772 (set_attr "prefix" "orig,vex")
5773 (set_attr "mode" "<sseinsnmode>")])
5775 (define_insn "<shift_insn><mode>3"
5776 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5777 (any_lshift:VI248_AVX2
5778 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5779 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5782 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5783 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5784 [(set_attr "isa" "noavx,avx")
5785 (set_attr "type" "sseishft")
5786 (set (attr "length_immediate")
5787 (if_then_else (match_operand 2 "const_int_operand")
5789 (const_string "0")))
5790 (set_attr "prefix_data16" "1,*")
5791 (set_attr "prefix" "orig,vex")
5792 (set_attr "mode" "<sseinsnmode>")])
5794 (define_expand "vec_shl_<mode>"
5795 [(set (match_operand:VI_128 0 "register_operand")
5797 (match_operand:VI_128 1 "register_operand")
5798 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
5801 operands[0] = gen_lowpart (V1TImode, operands[0]);
5802 operands[1] = gen_lowpart (V1TImode, operands[1]);
5805 (define_insn "<sse2_avx2>_ashl<mode>3"
5806 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5808 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5809 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5812 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5814 switch (which_alternative)
5817 return "pslldq\t{%2, %0|%0, %2}";
5819 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5824 [(set_attr "isa" "noavx,avx")
5825 (set_attr "type" "sseishft")
5826 (set_attr "length_immediate" "1")
5827 (set_attr "prefix_data16" "1,*")
5828 (set_attr "prefix" "orig,vex")
5829 (set_attr "mode" "<sseinsnmode>")])
5831 (define_expand "vec_shr_<mode>"
5832 [(set (match_operand:VI_128 0 "register_operand")
5834 (match_operand:VI_128 1 "register_operand")
5835 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
5838 operands[0] = gen_lowpart (V1TImode, operands[0]);
5839 operands[1] = gen_lowpart (V1TImode, operands[1]);
5842 (define_insn "<sse2_avx2>_lshr<mode>3"
5843 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5844 (lshiftrt:VIMAX_AVX2
5845 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5846 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5849 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5851 switch (which_alternative)
5854 return "psrldq\t{%2, %0|%0, %2}";
5856 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5861 [(set_attr "isa" "noavx,avx")
5862 (set_attr "type" "sseishft")
5863 (set_attr "length_immediate" "1")
5864 (set_attr "atom_unit" "sishuf")
5865 (set_attr "prefix_data16" "1,*")
5866 (set_attr "prefix" "orig,vex")
5867 (set_attr "mode" "<sseinsnmode>")])
5870 (define_expand "<code><mode>3"
5871 [(set (match_operand:VI124_256 0 "register_operand")
5873 (match_operand:VI124_256 1 "nonimmediate_operand")
5874 (match_operand:VI124_256 2 "nonimmediate_operand")))]
5876 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5878 (define_insn "*avx2_<code><mode>3"
5879 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5881 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5882 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5883 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5884 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5885 [(set_attr "type" "sseiadd")
5886 (set_attr "prefix_extra" "1")
5887 (set_attr "prefix" "vex")
5888 (set_attr "mode" "OI")])
5890 (define_expand "<code><mode>3"
5891 [(set (match_operand:VI8_AVX2 0 "register_operand")
5893 (match_operand:VI8_AVX2 1 "register_operand")
5894 (match_operand:VI8_AVX2 2 "register_operand")))]
5901 xops[0] = operands[0];
5903 if (<CODE> == SMAX || <CODE> == UMAX)
5905 xops[1] = operands[1];
5906 xops[2] = operands[2];
5910 xops[1] = operands[2];
5911 xops[2] = operands[1];
5914 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5916 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5917 xops[4] = operands[1];
5918 xops[5] = operands[2];
5920 ok = ix86_expand_int_vcond (xops);
5925 (define_expand "<code><mode>3"
5926 [(set (match_operand:VI124_128 0 "register_operand")
5928 (match_operand:VI124_128 1 "nonimmediate_operand")
5929 (match_operand:VI124_128 2 "nonimmediate_operand")))]
5932 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5933 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5939 xops[0] = operands[0];
5940 operands[1] = force_reg (<MODE>mode, operands[1]);
5941 operands[2] = force_reg (<MODE>mode, operands[2]);
5945 xops[1] = operands[1];
5946 xops[2] = operands[2];
5950 xops[1] = operands[2];
5951 xops[2] = operands[1];
5954 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5955 xops[4] = operands[1];
5956 xops[5] = operands[2];
5958 ok = ix86_expand_int_vcond (xops);
5964 (define_insn "*sse4_1_<code><mode>3"
5965 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5967 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5968 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5969 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5971 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5972 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5973 [(set_attr "isa" "noavx,avx")
5974 (set_attr "type" "sseiadd")
5975 (set_attr "prefix_extra" "1,*")
5976 (set_attr "prefix" "orig,vex")
5977 (set_attr "mode" "TI")])
5979 (define_insn "*<code>v8hi3"
5980 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5982 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5983 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5984 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5986 p<maxmin_int>w\t{%2, %0|%0, %2}
5987 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5988 [(set_attr "isa" "noavx,avx")
5989 (set_attr "type" "sseiadd")
5990 (set_attr "prefix_data16" "1,*")
5991 (set_attr "prefix_extra" "*,1")
5992 (set_attr "prefix" "orig,vex")
5993 (set_attr "mode" "TI")])
5995 (define_expand "<code><mode>3"
5996 [(set (match_operand:VI124_128 0 "register_operand")
5998 (match_operand:VI124_128 1 "nonimmediate_operand")
5999 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6002 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6003 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6004 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6006 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6007 operands[1] = force_reg (<MODE>mode, operands[1]);
6008 if (rtx_equal_p (op3, op2))
6009 op3 = gen_reg_rtx (V8HImode);
6010 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6011 emit_insn (gen_addv8hi3 (op0, op3, op2));
6019 operands[1] = force_reg (<MODE>mode, operands[1]);
6020 operands[2] = force_reg (<MODE>mode, operands[2]);
6022 xops[0] = operands[0];
6026 xops[1] = operands[1];
6027 xops[2] = operands[2];
6031 xops[1] = operands[2];
6032 xops[2] = operands[1];
6035 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6036 xops[4] = operands[1];
6037 xops[5] = operands[2];
6039 ok = ix86_expand_int_vcond (xops);
6045 (define_insn "*sse4_1_<code><mode>3"
6046 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6048 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6049 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6050 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6052 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6053 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6054 [(set_attr "isa" "noavx,avx")
6055 (set_attr "type" "sseiadd")
6056 (set_attr "prefix_extra" "1,*")
6057 (set_attr "prefix" "orig,vex")
6058 (set_attr "mode" "TI")])
6060 (define_insn "*<code>v16qi3"
6061 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6063 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6064 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6065 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6067 p<maxmin_int>b\t{%2, %0|%0, %2}
6068 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6069 [(set_attr "isa" "noavx,avx")
6070 (set_attr "type" "sseiadd")
6071 (set_attr "prefix_data16" "1,*")
6072 (set_attr "prefix_extra" "*,1")
6073 (set_attr "prefix" "orig,vex")
6074 (set_attr "mode" "TI")])
6076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6078 ;; Parallel integral comparisons
6080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6082 (define_expand "avx2_eq<mode>3"
6083 [(set (match_operand:VI_256 0 "register_operand")
6085 (match_operand:VI_256 1 "nonimmediate_operand")
6086 (match_operand:VI_256 2 "nonimmediate_operand")))]
6088 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6090 (define_insn "*avx2_eq<mode>3"
6091 [(set (match_operand:VI_256 0 "register_operand" "=x")
6093 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6094 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6095 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6096 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6097 [(set_attr "type" "ssecmp")
6098 (set_attr "prefix_extra" "1")
6099 (set_attr "prefix" "vex")
6100 (set_attr "mode" "OI")])
6102 (define_insn "*sse4_1_eqv2di3"
6103 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6105 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6106 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6107 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6109 pcmpeqq\t{%2, %0|%0, %2}
6110 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6111 [(set_attr "isa" "noavx,avx")
6112 (set_attr "type" "ssecmp")
6113 (set_attr "prefix_extra" "1")
6114 (set_attr "prefix" "orig,vex")
6115 (set_attr "mode" "TI")])
6117 (define_insn "*sse2_eq<mode>3"
6118 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6120 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6121 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6122 "TARGET_SSE2 && !TARGET_XOP
6123 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6125 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6126 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6127 [(set_attr "isa" "noavx,avx")
6128 (set_attr "type" "ssecmp")
6129 (set_attr "prefix_data16" "1,*")
6130 (set_attr "prefix" "orig,vex")
6131 (set_attr "mode" "TI")])
6133 (define_expand "sse2_eq<mode>3"
6134 [(set (match_operand:VI124_128 0 "register_operand")
6136 (match_operand:VI124_128 1 "nonimmediate_operand")
6137 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6138 "TARGET_SSE2 && !TARGET_XOP "
6139 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6141 (define_expand "sse4_1_eqv2di3"
6142 [(set (match_operand:V2DI 0 "register_operand")
6144 (match_operand:V2DI 1 "nonimmediate_operand")
6145 (match_operand:V2DI 2 "nonimmediate_operand")))]
6147 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6149 (define_insn "sse4_2_gtv2di3"
6150 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6152 (match_operand:V2DI 1 "register_operand" "0,x")
6153 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6156 pcmpgtq\t{%2, %0|%0, %2}
6157 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6158 [(set_attr "isa" "noavx,avx")
6159 (set_attr "type" "ssecmp")
6160 (set_attr "prefix_extra" "1")
6161 (set_attr "prefix" "orig,vex")
6162 (set_attr "mode" "TI")])
6164 (define_insn "avx2_gt<mode>3"
6165 [(set (match_operand:VI_256 0 "register_operand" "=x")
6167 (match_operand:VI_256 1 "register_operand" "x")
6168 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6170 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6171 [(set_attr "type" "ssecmp")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "prefix" "vex")
6174 (set_attr "mode" "OI")])
6176 (define_insn "sse2_gt<mode>3"
6177 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6179 (match_operand:VI124_128 1 "register_operand" "0,x")
6180 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6181 "TARGET_SSE2 && !TARGET_XOP"
6183 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6184 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6185 [(set_attr "isa" "noavx,avx")
6186 (set_attr "type" "ssecmp")
6187 (set_attr "prefix_data16" "1,*")
6188 (set_attr "prefix" "orig,vex")
6189 (set_attr "mode" "TI")])
6191 (define_expand "vcond<V_256:mode><VI_256:mode>"
6192 [(set (match_operand:V_256 0 "register_operand")
6194 (match_operator 3 ""
6195 [(match_operand:VI_256 4 "nonimmediate_operand")
6196 (match_operand:VI_256 5 "general_operand")])
6197 (match_operand:V_256 1)
6198 (match_operand:V_256 2)))]
6200 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6201 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6203 bool ok = ix86_expand_int_vcond (operands);
6208 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6209 [(set (match_operand:V_128 0 "register_operand")
6211 (match_operator 3 ""
6212 [(match_operand:VI124_128 4 "nonimmediate_operand")
6213 (match_operand:VI124_128 5 "general_operand")])
6214 (match_operand:V_128 1)
6215 (match_operand:V_128 2)))]
6217 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6218 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6220 bool ok = ix86_expand_int_vcond (operands);
6225 (define_expand "vcond<VI8F_128:mode>v2di"
6226 [(set (match_operand:VI8F_128 0 "register_operand")
6227 (if_then_else:VI8F_128
6228 (match_operator 3 ""
6229 [(match_operand:V2DI 4 "nonimmediate_operand")
6230 (match_operand:V2DI 5 "general_operand")])
6231 (match_operand:VI8F_128 1)
6232 (match_operand:VI8F_128 2)))]
6235 bool ok = ix86_expand_int_vcond (operands);
6240 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6241 [(set (match_operand:V_256 0 "register_operand")
6243 (match_operator 3 ""
6244 [(match_operand:VI_256 4 "nonimmediate_operand")
6245 (match_operand:VI_256 5 "nonimmediate_operand")])
6246 (match_operand:V_256 1 "general_operand")
6247 (match_operand:V_256 2 "general_operand")))]
6249 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6250 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6252 bool ok = ix86_expand_int_vcond (operands);
6257 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6258 [(set (match_operand:V_128 0 "register_operand")
6260 (match_operator 3 ""
6261 [(match_operand:VI124_128 4 "nonimmediate_operand")
6262 (match_operand:VI124_128 5 "nonimmediate_operand")])
6263 (match_operand:V_128 1 "general_operand")
6264 (match_operand:V_128 2 "general_operand")))]
6266 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6267 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6269 bool ok = ix86_expand_int_vcond (operands);
6274 (define_expand "vcondu<VI8F_128:mode>v2di"
6275 [(set (match_operand:VI8F_128 0 "register_operand")
6276 (if_then_else:VI8F_128
6277 (match_operator 3 ""
6278 [(match_operand:V2DI 4 "nonimmediate_operand")
6279 (match_operand:V2DI 5 "nonimmediate_operand")])
6280 (match_operand:VI8F_128 1 "general_operand")
6281 (match_operand:VI8F_128 2 "general_operand")))]
6284 bool ok = ix86_expand_int_vcond (operands);
6289 (define_mode_iterator VEC_PERM_AVX2
6290 [V16QI V8HI V4SI V2DI V4SF V2DF
6291 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6292 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6293 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6295 (define_expand "vec_perm<mode>"
6296 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
6297 (match_operand:VEC_PERM_AVX2 1 "register_operand")
6298 (match_operand:VEC_PERM_AVX2 2 "register_operand")
6299 (match_operand:<sseintvecmode> 3 "register_operand")]
6300 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6302 ix86_expand_vec_perm (operands);
6306 (define_mode_iterator VEC_PERM_CONST
6307 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6308 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6309 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6310 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6311 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6312 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6314 (define_expand "vec_perm_const<mode>"
6315 [(match_operand:VEC_PERM_CONST 0 "register_operand")
6316 (match_operand:VEC_PERM_CONST 1 "register_operand")
6317 (match_operand:VEC_PERM_CONST 2 "register_operand")
6318 (match_operand:<sseintvecmode> 3)]
6321 if (ix86_expand_vec_perm_const (operands))
6327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6329 ;; Parallel bitwise logical operations
6331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6333 (define_expand "one_cmpl<mode>2"
6334 [(set (match_operand:VI 0 "register_operand")
6335 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
6339 int i, n = GET_MODE_NUNITS (<MODE>mode);
6340 rtvec v = rtvec_alloc (n);
6342 for (i = 0; i < n; ++i)
6343 RTVEC_ELT (v, i) = constm1_rtx;
6345 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6348 (define_expand "<sse2_avx2>_andnot<mode>3"
6349 [(set (match_operand:VI_AVX2 0 "register_operand")
6351 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
6352 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
6355 (define_insn "*andnot<mode>3"
6356 [(set (match_operand:VI 0 "register_operand" "=x,x")
6358 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6359 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6362 static char buf[32];
6366 switch (get_attr_mode (insn))
6369 gcc_assert (TARGET_AVX2);
6371 gcc_assert (TARGET_SSE2);
6377 gcc_assert (TARGET_AVX);
6379 gcc_assert (TARGET_SSE);
6388 switch (which_alternative)
6391 ops = "%s\t{%%2, %%0|%%0, %%2}";
6394 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6400 snprintf (buf, sizeof (buf), ops, tmp);
6403 [(set_attr "isa" "noavx,avx")
6404 (set_attr "type" "sselog")
6405 (set (attr "prefix_data16")
6407 (and (eq_attr "alternative" "0")
6408 (eq_attr "mode" "TI"))
6410 (const_string "*")))
6411 (set_attr "prefix" "orig,vex")
6413 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6414 (const_string "<ssePSmode>")
6415 (match_test "TARGET_AVX2")
6416 (const_string "<sseinsnmode>")
6417 (match_test "TARGET_AVX")
6419 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6420 (const_string "V8SF")
6421 (const_string "<sseinsnmode>"))
6422 (ior (not (match_test "TARGET_SSE2"))
6423 (match_test "optimize_function_for_size_p (cfun)"))
6424 (const_string "V4SF")
6426 (const_string "<sseinsnmode>")))])
6428 (define_expand "<code><mode>3"
6429 [(set (match_operand:VI 0 "register_operand")
6431 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
6432 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
6435 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
6439 (define_insn "*<code><mode>3"
6440 [(set (match_operand:VI 0 "register_operand" "=x,x")
6442 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6443 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6445 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6447 static char buf[32];
6451 switch (get_attr_mode (insn))
6454 gcc_assert (TARGET_AVX2);
6456 gcc_assert (TARGET_SSE2);
6462 gcc_assert (TARGET_AVX);
6464 gcc_assert (TARGET_SSE);
6473 switch (which_alternative)
6476 ops = "%s\t{%%2, %%0|%%0, %%2}";
6479 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6485 snprintf (buf, sizeof (buf), ops, tmp);
6488 [(set_attr "isa" "noavx,avx")
6489 (set_attr "type" "sselog")
6490 (set (attr "prefix_data16")
6492 (and (eq_attr "alternative" "0")
6493 (eq_attr "mode" "TI"))
6495 (const_string "*")))
6496 (set_attr "prefix" "orig,vex")
6498 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6499 (const_string "<ssePSmode>")
6500 (match_test "TARGET_AVX2")
6501 (const_string "<sseinsnmode>")
6502 (match_test "TARGET_AVX")
6504 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6505 (const_string "V8SF")
6506 (const_string "<sseinsnmode>"))
6507 (ior (not (match_test "TARGET_SSE2"))
6508 (match_test "optimize_function_for_size_p (cfun)"))
6509 (const_string "V4SF")
6511 (const_string "<sseinsnmode>")))])
6513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6515 ;; Parallel integral element swizzling
6517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6519 (define_expand "vec_pack_trunc_<mode>"
6520 [(match_operand:<ssepackmode> 0 "register_operand")
6521 (match_operand:VI248_AVX2 1 "register_operand")
6522 (match_operand:VI248_AVX2 2 "register_operand")]
6525 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6526 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6527 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6531 (define_insn "<sse2_avx2>_packsswb"
6532 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6533 (vec_concat:VI1_AVX2
6534 (ss_truncate:<ssehalfvecmode>
6535 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6536 (ss_truncate:<ssehalfvecmode>
6537 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6540 packsswb\t{%2, %0|%0, %2}
6541 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6542 [(set_attr "isa" "noavx,avx")
6543 (set_attr "type" "sselog")
6544 (set_attr "prefix_data16" "1,*")
6545 (set_attr "prefix" "orig,vex")
6546 (set_attr "mode" "<sseinsnmode>")])
6548 (define_insn "<sse2_avx2>_packssdw"
6549 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6550 (vec_concat:VI2_AVX2
6551 (ss_truncate:<ssehalfvecmode>
6552 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6553 (ss_truncate:<ssehalfvecmode>
6554 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6557 packssdw\t{%2, %0|%0, %2}
6558 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6559 [(set_attr "isa" "noavx,avx")
6560 (set_attr "type" "sselog")
6561 (set_attr "prefix_data16" "1,*")
6562 (set_attr "prefix" "orig,vex")
6563 (set_attr "mode" "<sseinsnmode>")])
6565 (define_insn "<sse2_avx2>_packuswb"
6566 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6567 (vec_concat:VI1_AVX2
6568 (us_truncate:<ssehalfvecmode>
6569 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6570 (us_truncate:<ssehalfvecmode>
6571 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6574 packuswb\t{%2, %0|%0, %2}
6575 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6576 [(set_attr "isa" "noavx,avx")
6577 (set_attr "type" "sselog")
6578 (set_attr "prefix_data16" "1,*")
6579 (set_attr "prefix" "orig,vex")
6580 (set_attr "mode" "<sseinsnmode>")])
6582 (define_insn "avx2_interleave_highv32qi"
6583 [(set (match_operand:V32QI 0 "register_operand" "=x")
6586 (match_operand:V32QI 1 "register_operand" "x")
6587 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6588 (parallel [(const_int 8) (const_int 40)
6589 (const_int 9) (const_int 41)
6590 (const_int 10) (const_int 42)
6591 (const_int 11) (const_int 43)
6592 (const_int 12) (const_int 44)
6593 (const_int 13) (const_int 45)
6594 (const_int 14) (const_int 46)
6595 (const_int 15) (const_int 47)
6596 (const_int 24) (const_int 56)
6597 (const_int 25) (const_int 57)
6598 (const_int 26) (const_int 58)
6599 (const_int 27) (const_int 59)
6600 (const_int 28) (const_int 60)
6601 (const_int 29) (const_int 61)
6602 (const_int 30) (const_int 62)
6603 (const_int 31) (const_int 63)])))]
6605 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6606 [(set_attr "type" "sselog")
6607 (set_attr "prefix" "vex")
6608 (set_attr "mode" "OI")])
6610 (define_insn "vec_interleave_highv16qi"
6611 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6614 (match_operand:V16QI 1 "register_operand" "0,x")
6615 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6616 (parallel [(const_int 8) (const_int 24)
6617 (const_int 9) (const_int 25)
6618 (const_int 10) (const_int 26)
6619 (const_int 11) (const_int 27)
6620 (const_int 12) (const_int 28)
6621 (const_int 13) (const_int 29)
6622 (const_int 14) (const_int 30)
6623 (const_int 15) (const_int 31)])))]
6626 punpckhbw\t{%2, %0|%0, %2}
6627 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6628 [(set_attr "isa" "noavx,avx")
6629 (set_attr "type" "sselog")
6630 (set_attr "prefix_data16" "1,*")
6631 (set_attr "prefix" "orig,vex")
6632 (set_attr "mode" "TI")])
6634 (define_insn "avx2_interleave_lowv32qi"
6635 [(set (match_operand:V32QI 0 "register_operand" "=x")
6638 (match_operand:V32QI 1 "register_operand" "x")
6639 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6640 (parallel [(const_int 0) (const_int 32)
6641 (const_int 1) (const_int 33)
6642 (const_int 2) (const_int 34)
6643 (const_int 3) (const_int 35)
6644 (const_int 4) (const_int 36)
6645 (const_int 5) (const_int 37)
6646 (const_int 6) (const_int 38)
6647 (const_int 7) (const_int 39)
6648 (const_int 16) (const_int 48)
6649 (const_int 17) (const_int 49)
6650 (const_int 18) (const_int 50)
6651 (const_int 19) (const_int 51)
6652 (const_int 20) (const_int 52)
6653 (const_int 21) (const_int 53)
6654 (const_int 22) (const_int 54)
6655 (const_int 23) (const_int 55)])))]
6657 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6658 [(set_attr "type" "sselog")
6659 (set_attr "prefix" "vex")
6660 (set_attr "mode" "OI")])
6662 (define_insn "vec_interleave_lowv16qi"
6663 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6666 (match_operand:V16QI 1 "register_operand" "0,x")
6667 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6668 (parallel [(const_int 0) (const_int 16)
6669 (const_int 1) (const_int 17)
6670 (const_int 2) (const_int 18)
6671 (const_int 3) (const_int 19)
6672 (const_int 4) (const_int 20)
6673 (const_int 5) (const_int 21)
6674 (const_int 6) (const_int 22)
6675 (const_int 7) (const_int 23)])))]
6678 punpcklbw\t{%2, %0|%0, %2}
6679 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6680 [(set_attr "isa" "noavx,avx")
6681 (set_attr "type" "sselog")
6682 (set_attr "prefix_data16" "1,*")
6683 (set_attr "prefix" "orig,vex")
6684 (set_attr "mode" "TI")])
6686 (define_insn "avx2_interleave_highv16hi"
6687 [(set (match_operand:V16HI 0 "register_operand" "=x")
6690 (match_operand:V16HI 1 "register_operand" "x")
6691 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6692 (parallel [(const_int 4) (const_int 20)
6693 (const_int 5) (const_int 21)
6694 (const_int 6) (const_int 22)
6695 (const_int 7) (const_int 23)
6696 (const_int 12) (const_int 28)
6697 (const_int 13) (const_int 29)
6698 (const_int 14) (const_int 30)
6699 (const_int 15) (const_int 31)])))]
6701 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6702 [(set_attr "type" "sselog")
6703 (set_attr "prefix" "vex")
6704 (set_attr "mode" "OI")])
6706 (define_insn "vec_interleave_highv8hi"
6707 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6710 (match_operand:V8HI 1 "register_operand" "0,x")
6711 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6712 (parallel [(const_int 4) (const_int 12)
6713 (const_int 5) (const_int 13)
6714 (const_int 6) (const_int 14)
6715 (const_int 7) (const_int 15)])))]
6718 punpckhwd\t{%2, %0|%0, %2}
6719 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6720 [(set_attr "isa" "noavx,avx")
6721 (set_attr "type" "sselog")
6722 (set_attr "prefix_data16" "1,*")
6723 (set_attr "prefix" "orig,vex")
6724 (set_attr "mode" "TI")])
6726 (define_insn "avx2_interleave_lowv16hi"
6727 [(set (match_operand:V16HI 0 "register_operand" "=x")
6730 (match_operand:V16HI 1 "register_operand" "x")
6731 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6732 (parallel [(const_int 0) (const_int 16)
6733 (const_int 1) (const_int 17)
6734 (const_int 2) (const_int 18)
6735 (const_int 3) (const_int 19)
6736 (const_int 8) (const_int 24)
6737 (const_int 9) (const_int 25)
6738 (const_int 10) (const_int 26)
6739 (const_int 11) (const_int 27)])))]
6741 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6742 [(set_attr "type" "sselog")
6743 (set_attr "prefix" "vex")
6744 (set_attr "mode" "OI")])
6746 (define_insn "vec_interleave_lowv8hi"
6747 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6750 (match_operand:V8HI 1 "register_operand" "0,x")
6751 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6752 (parallel [(const_int 0) (const_int 8)
6753 (const_int 1) (const_int 9)
6754 (const_int 2) (const_int 10)
6755 (const_int 3) (const_int 11)])))]
6758 punpcklwd\t{%2, %0|%0, %2}
6759 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6760 [(set_attr "isa" "noavx,avx")
6761 (set_attr "type" "sselog")
6762 (set_attr "prefix_data16" "1,*")
6763 (set_attr "prefix" "orig,vex")
6764 (set_attr "mode" "TI")])
6766 (define_insn "avx2_interleave_highv8si"
6767 [(set (match_operand:V8SI 0 "register_operand" "=x")
6770 (match_operand:V8SI 1 "register_operand" "x")
6771 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6772 (parallel [(const_int 2) (const_int 10)
6773 (const_int 3) (const_int 11)
6774 (const_int 6) (const_int 14)
6775 (const_int 7) (const_int 15)])))]
6777 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6778 [(set_attr "type" "sselog")
6779 (set_attr "prefix" "vex")
6780 (set_attr "mode" "OI")])
6782 (define_insn "vec_interleave_highv4si"
6783 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6786 (match_operand:V4SI 1 "register_operand" "0,x")
6787 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6788 (parallel [(const_int 2) (const_int 6)
6789 (const_int 3) (const_int 7)])))]
6792 punpckhdq\t{%2, %0|%0, %2}
6793 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6794 [(set_attr "isa" "noavx,avx")
6795 (set_attr "type" "sselog")
6796 (set_attr "prefix_data16" "1,*")
6797 (set_attr "prefix" "orig,vex")
6798 (set_attr "mode" "TI")])
6800 (define_insn "avx2_interleave_lowv8si"
6801 [(set (match_operand:V8SI 0 "register_operand" "=x")
6804 (match_operand:V8SI 1 "register_operand" "x")
6805 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6806 (parallel [(const_int 0) (const_int 8)
6807 (const_int 1) (const_int 9)
6808 (const_int 4) (const_int 12)
6809 (const_int 5) (const_int 13)])))]
6811 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6812 [(set_attr "type" "sselog")
6813 (set_attr "prefix" "vex")
6814 (set_attr "mode" "OI")])
6816 (define_insn "vec_interleave_lowv4si"
6817 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6820 (match_operand:V4SI 1 "register_operand" "0,x")
6821 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6822 (parallel [(const_int 0) (const_int 4)
6823 (const_int 1) (const_int 5)])))]
6826 punpckldq\t{%2, %0|%0, %2}
6827 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6828 [(set_attr "isa" "noavx,avx")
6829 (set_attr "type" "sselog")
6830 (set_attr "prefix_data16" "1,*")
6831 (set_attr "prefix" "orig,vex")
6832 (set_attr "mode" "TI")])
6834 (define_expand "vec_interleave_high<mode>"
6835 [(match_operand:VI_256 0 "register_operand" "=x")
6836 (match_operand:VI_256 1 "register_operand" "x")
6837 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6840 rtx t1 = gen_reg_rtx (<MODE>mode);
6841 rtx t2 = gen_reg_rtx (<MODE>mode);
6842 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6843 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6844 emit_insn (gen_avx2_permv2ti
6845 (gen_lowpart (V4DImode, operands[0]),
6846 gen_lowpart (V4DImode, t1),
6847 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6851 (define_expand "vec_interleave_low<mode>"
6852 [(match_operand:VI_256 0 "register_operand" "=x")
6853 (match_operand:VI_256 1 "register_operand" "x")
6854 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6857 rtx t1 = gen_reg_rtx (<MODE>mode);
6858 rtx t2 = gen_reg_rtx (<MODE>mode);
6859 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6860 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6861 emit_insn (gen_avx2_permv2ti
6862 (gen_lowpart (V4DImode, operands[0]),
6863 gen_lowpart (V4DImode, t1),
6864 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6868 ;; Modes handled by pinsr patterns.
6869 (define_mode_iterator PINSR_MODE
6870 [(V16QI "TARGET_SSE4_1") V8HI
6871 (V4SI "TARGET_SSE4_1")
6872 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6874 (define_mode_attr sse2p4_1
6875 [(V16QI "sse4_1") (V8HI "sse2")
6876 (V4SI "sse4_1") (V2DI "sse4_1")])
6878 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6879 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6880 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6881 (vec_merge:PINSR_MODE
6882 (vec_duplicate:PINSR_MODE
6883 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6884 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6885 (match_operand:SI 3 "const_int_operand")))]
6887 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6888 < GET_MODE_NUNITS (<MODE>mode))"
6890 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6892 switch (which_alternative)
6895 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6896 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6899 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6901 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6902 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6905 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6910 [(set_attr "isa" "noavx,noavx,avx,avx")
6911 (set_attr "type" "sselog")
6912 (set (attr "prefix_rex")
6914 (and (not (match_test "TARGET_AVX"))
6915 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6917 (const_string "*")))
6918 (set (attr "prefix_data16")
6920 (and (not (match_test "TARGET_AVX"))
6921 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6923 (const_string "*")))
6924 (set (attr "prefix_extra")
6926 (and (not (match_test "TARGET_AVX"))
6927 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6929 (const_string "1")))
6930 (set_attr "length_immediate" "1")
6931 (set_attr "prefix" "orig,orig,vex,vex")
6932 (set_attr "mode" "TI")])
6934 (define_insn "*sse4_1_pextrb_<mode>"
6935 [(set (match_operand:SWI48 0 "register_operand" "=r")
6938 (match_operand:V16QI 1 "register_operand" "x")
6939 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6941 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6942 [(set_attr "type" "sselog")
6943 (set_attr "prefix_extra" "1")
6944 (set_attr "length_immediate" "1")
6945 (set_attr "prefix" "maybe_vex")
6946 (set_attr "mode" "TI")])
6948 (define_insn "*sse4_1_pextrb_memory"
6949 [(set (match_operand:QI 0 "memory_operand" "=m")
6951 (match_operand:V16QI 1 "register_operand" "x")
6952 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6954 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6955 [(set_attr "type" "sselog")
6956 (set_attr "prefix_extra" "1")
6957 (set_attr "length_immediate" "1")
6958 (set_attr "prefix" "maybe_vex")
6959 (set_attr "mode" "TI")])
6961 (define_insn "*sse2_pextrw_<mode>"
6962 [(set (match_operand:SWI48 0 "register_operand" "=r")
6965 (match_operand:V8HI 1 "register_operand" "x")
6966 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6968 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6969 [(set_attr "type" "sselog")
6970 (set_attr "prefix_data16" "1")
6971 (set_attr "length_immediate" "1")
6972 (set_attr "prefix" "maybe_vex")
6973 (set_attr "mode" "TI")])
6975 (define_insn "*sse4_1_pextrw_memory"
6976 [(set (match_operand:HI 0 "memory_operand" "=m")
6978 (match_operand:V8HI 1 "register_operand" "x")
6979 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6981 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6982 [(set_attr "type" "sselog")
6983 (set_attr "prefix_extra" "1")
6984 (set_attr "length_immediate" "1")
6985 (set_attr "prefix" "maybe_vex")
6986 (set_attr "mode" "TI")])
6988 (define_insn "*sse4_1_pextrd"
6989 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6991 (match_operand:V4SI 1 "register_operand" "x")
6992 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6994 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6995 [(set_attr "type" "sselog")
6996 (set_attr "prefix_extra" "1")
6997 (set_attr "length_immediate" "1")
6998 (set_attr "prefix" "maybe_vex")
6999 (set_attr "mode" "TI")])
7001 (define_insn "*sse4_1_pextrd_zext"
7002 [(set (match_operand:DI 0 "register_operand" "=r")
7005 (match_operand:V4SI 1 "register_operand" "x")
7006 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7007 "TARGET_64BIT && TARGET_SSE4_1"
7008 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7009 [(set_attr "type" "sselog")
7010 (set_attr "prefix_extra" "1")
7011 (set_attr "length_immediate" "1")
7012 (set_attr "prefix" "maybe_vex")
7013 (set_attr "mode" "TI")])
7015 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7016 (define_insn "*sse4_1_pextrq"
7017 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7019 (match_operand:V2DI 1 "register_operand" "x")
7020 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7021 "TARGET_SSE4_1 && TARGET_64BIT"
7022 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7023 [(set_attr "type" "sselog")
7024 (set_attr "prefix_rex" "1")
7025 (set_attr "prefix_extra" "1")
7026 (set_attr "length_immediate" "1")
7027 (set_attr "prefix" "maybe_vex")
7028 (set_attr "mode" "TI")])
7030 (define_expand "avx2_pshufdv3"
7031 [(match_operand:V8SI 0 "register_operand")
7032 (match_operand:V8SI 1 "nonimmediate_operand")
7033 (match_operand:SI 2 "const_0_to_255_operand")]
7036 int mask = INTVAL (operands[2]);
7037 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7038 GEN_INT ((mask >> 0) & 3),
7039 GEN_INT ((mask >> 2) & 3),
7040 GEN_INT ((mask >> 4) & 3),
7041 GEN_INT ((mask >> 6) & 3),
7042 GEN_INT (((mask >> 0) & 3) + 4),
7043 GEN_INT (((mask >> 2) & 3) + 4),
7044 GEN_INT (((mask >> 4) & 3) + 4),
7045 GEN_INT (((mask >> 6) & 3) + 4)));
7049 (define_insn "avx2_pshufd_1"
7050 [(set (match_operand:V8SI 0 "register_operand" "=x")
7052 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7053 (parallel [(match_operand 2 "const_0_to_3_operand")
7054 (match_operand 3 "const_0_to_3_operand")
7055 (match_operand 4 "const_0_to_3_operand")
7056 (match_operand 5 "const_0_to_3_operand")
7057 (match_operand 6 "const_4_to_7_operand")
7058 (match_operand 7 "const_4_to_7_operand")
7059 (match_operand 8 "const_4_to_7_operand")
7060 (match_operand 9 "const_4_to_7_operand")])))]
7062 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7063 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7064 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7065 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7068 mask |= INTVAL (operands[2]) << 0;
7069 mask |= INTVAL (operands[3]) << 2;
7070 mask |= INTVAL (operands[4]) << 4;
7071 mask |= INTVAL (operands[5]) << 6;
7072 operands[2] = GEN_INT (mask);
7074 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7076 [(set_attr "type" "sselog1")
7077 (set_attr "prefix" "vex")
7078 (set_attr "length_immediate" "1")
7079 (set_attr "mode" "OI")])
7081 (define_expand "sse2_pshufd"
7082 [(match_operand:V4SI 0 "register_operand")
7083 (match_operand:V4SI 1 "nonimmediate_operand")
7084 (match_operand:SI 2 "const_int_operand")]
7087 int mask = INTVAL (operands[2]);
7088 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7089 GEN_INT ((mask >> 0) & 3),
7090 GEN_INT ((mask >> 2) & 3),
7091 GEN_INT ((mask >> 4) & 3),
7092 GEN_INT ((mask >> 6) & 3)));
7096 (define_insn "sse2_pshufd_1"
7097 [(set (match_operand:V4SI 0 "register_operand" "=x")
7099 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7100 (parallel [(match_operand 2 "const_0_to_3_operand")
7101 (match_operand 3 "const_0_to_3_operand")
7102 (match_operand 4 "const_0_to_3_operand")
7103 (match_operand 5 "const_0_to_3_operand")])))]
7107 mask |= INTVAL (operands[2]) << 0;
7108 mask |= INTVAL (operands[3]) << 2;
7109 mask |= INTVAL (operands[4]) << 4;
7110 mask |= INTVAL (operands[5]) << 6;
7111 operands[2] = GEN_INT (mask);
7113 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7115 [(set_attr "type" "sselog1")
7116 (set_attr "prefix_data16" "1")
7117 (set_attr "prefix" "maybe_vex")
7118 (set_attr "length_immediate" "1")
7119 (set_attr "mode" "TI")])
7121 (define_expand "avx2_pshuflwv3"
7122 [(match_operand:V16HI 0 "register_operand")
7123 (match_operand:V16HI 1 "nonimmediate_operand")
7124 (match_operand:SI 2 "const_0_to_255_operand")]
7127 int mask = INTVAL (operands[2]);
7128 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7129 GEN_INT ((mask >> 0) & 3),
7130 GEN_INT ((mask >> 2) & 3),
7131 GEN_INT ((mask >> 4) & 3),
7132 GEN_INT ((mask >> 6) & 3),
7133 GEN_INT (((mask >> 0) & 3) + 8),
7134 GEN_INT (((mask >> 2) & 3) + 8),
7135 GEN_INT (((mask >> 4) & 3) + 8),
7136 GEN_INT (((mask >> 6) & 3) + 8)));
7140 (define_insn "avx2_pshuflw_1"
7141 [(set (match_operand:V16HI 0 "register_operand" "=x")
7143 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7144 (parallel [(match_operand 2 "const_0_to_3_operand")
7145 (match_operand 3 "const_0_to_3_operand")
7146 (match_operand 4 "const_0_to_3_operand")
7147 (match_operand 5 "const_0_to_3_operand")
7152 (match_operand 6 "const_8_to_11_operand")
7153 (match_operand 7 "const_8_to_11_operand")
7154 (match_operand 8 "const_8_to_11_operand")
7155 (match_operand 9 "const_8_to_11_operand")
7161 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7162 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7163 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7164 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7167 mask |= INTVAL (operands[2]) << 0;
7168 mask |= INTVAL (operands[3]) << 2;
7169 mask |= INTVAL (operands[4]) << 4;
7170 mask |= INTVAL (operands[5]) << 6;
7171 operands[2] = GEN_INT (mask);
7173 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7175 [(set_attr "type" "sselog")
7176 (set_attr "prefix" "vex")
7177 (set_attr "length_immediate" "1")
7178 (set_attr "mode" "OI")])
7180 (define_expand "sse2_pshuflw"
7181 [(match_operand:V8HI 0 "register_operand")
7182 (match_operand:V8HI 1 "nonimmediate_operand")
7183 (match_operand:SI 2 "const_int_operand")]
7186 int mask = INTVAL (operands[2]);
7187 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7188 GEN_INT ((mask >> 0) & 3),
7189 GEN_INT ((mask >> 2) & 3),
7190 GEN_INT ((mask >> 4) & 3),
7191 GEN_INT ((mask >> 6) & 3)));
7195 (define_insn "sse2_pshuflw_1"
7196 [(set (match_operand:V8HI 0 "register_operand" "=x")
7198 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7199 (parallel [(match_operand 2 "const_0_to_3_operand")
7200 (match_operand 3 "const_0_to_3_operand")
7201 (match_operand 4 "const_0_to_3_operand")
7202 (match_operand 5 "const_0_to_3_operand")
7210 mask |= INTVAL (operands[2]) << 0;
7211 mask |= INTVAL (operands[3]) << 2;
7212 mask |= INTVAL (operands[4]) << 4;
7213 mask |= INTVAL (operands[5]) << 6;
7214 operands[2] = GEN_INT (mask);
7216 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7218 [(set_attr "type" "sselog")
7219 (set_attr "prefix_data16" "0")
7220 (set_attr "prefix_rep" "1")
7221 (set_attr "prefix" "maybe_vex")
7222 (set_attr "length_immediate" "1")
7223 (set_attr "mode" "TI")])
7225 (define_expand "avx2_pshufhwv3"
7226 [(match_operand:V16HI 0 "register_operand")
7227 (match_operand:V16HI 1 "nonimmediate_operand")
7228 (match_operand:SI 2 "const_0_to_255_operand")]
7231 int mask = INTVAL (operands[2]);
7232 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7233 GEN_INT (((mask >> 0) & 3) + 4),
7234 GEN_INT (((mask >> 2) & 3) + 4),
7235 GEN_INT (((mask >> 4) & 3) + 4),
7236 GEN_INT (((mask >> 6) & 3) + 4),
7237 GEN_INT (((mask >> 0) & 3) + 12),
7238 GEN_INT (((mask >> 2) & 3) + 12),
7239 GEN_INT (((mask >> 4) & 3) + 12),
7240 GEN_INT (((mask >> 6) & 3) + 12)));
7244 (define_insn "avx2_pshufhw_1"
7245 [(set (match_operand:V16HI 0 "register_operand" "=x")
7247 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7248 (parallel [(const_int 0)
7252 (match_operand 2 "const_4_to_7_operand")
7253 (match_operand 3 "const_4_to_7_operand")
7254 (match_operand 4 "const_4_to_7_operand")
7255 (match_operand 5 "const_4_to_7_operand")
7260 (match_operand 6 "const_12_to_15_operand")
7261 (match_operand 7 "const_12_to_15_operand")
7262 (match_operand 8 "const_12_to_15_operand")
7263 (match_operand 9 "const_12_to_15_operand")])))]
7265 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7266 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7267 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7268 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7271 mask |= (INTVAL (operands[2]) - 4) << 0;
7272 mask |= (INTVAL (operands[3]) - 4) << 2;
7273 mask |= (INTVAL (operands[4]) - 4) << 4;
7274 mask |= (INTVAL (operands[5]) - 4) << 6;
7275 operands[2] = GEN_INT (mask);
7277 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7279 [(set_attr "type" "sselog")
7280 (set_attr "prefix" "vex")
7281 (set_attr "length_immediate" "1")
7282 (set_attr "mode" "OI")])
7284 (define_expand "sse2_pshufhw"
7285 [(match_operand:V8HI 0 "register_operand")
7286 (match_operand:V8HI 1 "nonimmediate_operand")
7287 (match_operand:SI 2 "const_int_operand")]
7290 int mask = INTVAL (operands[2]);
7291 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7292 GEN_INT (((mask >> 0) & 3) + 4),
7293 GEN_INT (((mask >> 2) & 3) + 4),
7294 GEN_INT (((mask >> 4) & 3) + 4),
7295 GEN_INT (((mask >> 6) & 3) + 4)));
7299 (define_insn "sse2_pshufhw_1"
7300 [(set (match_operand:V8HI 0 "register_operand" "=x")
7302 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7303 (parallel [(const_int 0)
7307 (match_operand 2 "const_4_to_7_operand")
7308 (match_operand 3 "const_4_to_7_operand")
7309 (match_operand 4 "const_4_to_7_operand")
7310 (match_operand 5 "const_4_to_7_operand")])))]
7314 mask |= (INTVAL (operands[2]) - 4) << 0;
7315 mask |= (INTVAL (operands[3]) - 4) << 2;
7316 mask |= (INTVAL (operands[4]) - 4) << 4;
7317 mask |= (INTVAL (operands[5]) - 4) << 6;
7318 operands[2] = GEN_INT (mask);
7320 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7322 [(set_attr "type" "sselog")
7323 (set_attr "prefix_rep" "1")
7324 (set_attr "prefix_data16" "0")
7325 (set_attr "prefix" "maybe_vex")
7326 (set_attr "length_immediate" "1")
7327 (set_attr "mode" "TI")])
7329 (define_expand "sse2_loadd"
7330 [(set (match_operand:V4SI 0 "register_operand")
7333 (match_operand:SI 1 "nonimmediate_operand"))
7337 "operands[2] = CONST0_RTX (V4SImode);")
7339 (define_insn "sse2_loadld"
7340 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7343 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7344 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7348 %vmovd\t{%2, %0|%0, %2}
7349 %vmovd\t{%2, %0|%0, %2}
7350 movss\t{%2, %0|%0, %2}
7351 movss\t{%2, %0|%0, %2}
7352 vmovss\t{%2, %1, %0|%0, %1, %2}"
7353 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7354 (set_attr "type" "ssemov")
7355 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7356 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7358 (define_insn_and_split "sse2_stored"
7359 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7361 (match_operand:V4SI 1 "register_operand" "x,Yi")
7362 (parallel [(const_int 0)])))]
7365 "&& reload_completed
7366 && (TARGET_INTER_UNIT_MOVES
7367 || MEM_P (operands [0])
7368 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7369 [(set (match_dup 0) (match_dup 1))]
7370 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7372 (define_insn_and_split "*vec_ext_v4si_mem"
7373 [(set (match_operand:SI 0 "register_operand" "=r")
7375 (match_operand:V4SI 1 "memory_operand" "o")
7376 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
7382 int i = INTVAL (operands[2]);
7384 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7388 (define_expand "sse_storeq"
7389 [(set (match_operand:DI 0 "nonimmediate_operand")
7391 (match_operand:V2DI 1 "register_operand")
7392 (parallel [(const_int 0)])))]
7395 (define_insn "*sse2_storeq_rex64"
7396 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7398 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7399 (parallel [(const_int 0)])))]
7400 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7404 mov{q}\t{%1, %0|%0, %1}"
7405 [(set_attr "type" "*,*,imov")
7406 (set_attr "mode" "*,*,DI")])
7408 (define_insn "*sse2_storeq"
7409 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7411 (match_operand:V2DI 1 "register_operand" "x")
7412 (parallel [(const_int 0)])))]
7417 [(set (match_operand:DI 0 "nonimmediate_operand")
7419 (match_operand:V2DI 1 "register_operand")
7420 (parallel [(const_int 0)])))]
7423 && (TARGET_INTER_UNIT_MOVES
7424 || MEM_P (operands [0])
7425 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7426 [(set (match_dup 0) (match_dup 1))]
7427 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7429 (define_insn "*vec_extractv2di_1_rex64"
7430 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7432 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7433 (parallel [(const_int 1)])))]
7434 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7436 %vmovhps\t{%1, %0|%0, %1}
7437 psrldq\t{$8, %0|%0, 8}
7438 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7439 %vmovq\t{%H1, %0|%0, %H1}
7440 mov{q}\t{%H1, %0|%0, %H1}"
7441 [(set_attr "isa" "*,noavx,avx,*,*")
7442 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7443 (set_attr "length_immediate" "*,1,1,*,*")
7444 (set_attr "memory" "*,none,none,*,*")
7445 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7446 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7448 (define_insn "*vec_extractv2di_1"
7449 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7451 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7452 (parallel [(const_int 1)])))]
7453 "!TARGET_64BIT && TARGET_SSE
7454 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7456 %vmovhps\t{%1, %0|%0, %1}
7457 psrldq\t{$8, %0|%0, 8}
7458 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7459 %vmovq\t{%H1, %0|%0, %H1}
7460 movhlps\t{%1, %0|%0, %1}
7461 movlps\t{%H1, %0|%0, %H1}"
7462 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7463 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7464 (set_attr "length_immediate" "*,1,1,*,*,*")
7465 (set_attr "memory" "*,none,none,*,*,*")
7466 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7467 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7469 (define_insn "*vec_dupv4si"
7470 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7472 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7475 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7476 vbroadcastss\t{%1, %0|%0, %1}
7477 shufps\t{$0, %0, %0|%0, %0, 0}"
7478 [(set_attr "isa" "sse2,avx,noavx")
7479 (set_attr "type" "sselog1,ssemov,sselog1")
7480 (set_attr "length_immediate" "1,0,1")
7481 (set_attr "prefix_extra" "0,1,*")
7482 (set_attr "prefix" "maybe_vex,vex,orig")
7483 (set_attr "mode" "TI,V4SF,V4SF")])
7485 (define_insn "*vec_dupv2di"
7486 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7488 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7492 vpunpcklqdq\t{%d1, %0|%0, %d1}
7493 %vmovddup\t{%1, %0|%0, %1}
7495 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7496 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7497 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7498 (set_attr "mode" "TI,TI,DF,V4SF")])
7500 (define_insn "*vec_concatv2si_sse4_1"
7501 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7503 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7504 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7507 pinsrd\t{$1, %2, %0|%0, %2, 1}
7508 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7509 punpckldq\t{%2, %0|%0, %2}
7510 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7511 %vmovd\t{%1, %0|%0, %1}
7512 punpckldq\t{%2, %0|%0, %2}
7513 movd\t{%1, %0|%0, %1}"
7514 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7515 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7516 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7517 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7518 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7519 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7521 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7522 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7523 ;; alternatives pretty much forces the MMX alternative to be chosen.
7524 (define_insn "*vec_concatv2si_sse2"
7525 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7527 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7528 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7531 punpckldq\t{%2, %0|%0, %2}
7532 movd\t{%1, %0|%0, %1}
7533 punpckldq\t{%2, %0|%0, %2}
7534 movd\t{%1, %0|%0, %1}"
7535 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7536 (set_attr "mode" "TI,TI,DI,DI")])
7538 (define_insn "*vec_concatv2si_sse"
7539 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7541 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7542 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7545 unpcklps\t{%2, %0|%0, %2}
7546 movss\t{%1, %0|%0, %1}
7547 punpckldq\t{%2, %0|%0, %2}
7548 movd\t{%1, %0|%0, %1}"
7549 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7550 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7552 (define_insn "*vec_concatv4si"
7553 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7555 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7556 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7559 punpcklqdq\t{%2, %0|%0, %2}
7560 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7561 movlhps\t{%2, %0|%0, %2}
7562 movhps\t{%2, %0|%0, %2}
7563 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7564 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7565 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7566 (set_attr "prefix" "orig,vex,orig,orig,vex")
7567 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7569 ;; movd instead of movq is required to handle broken assemblers.
7570 (define_insn "*vec_concatv2di_rex64"
7571 [(set (match_operand:V2DI 0 "register_operand"
7572 "=x,x ,x ,Yi,!x,x,x,x,x")
7574 (match_operand:DI 1 "nonimmediate_operand"
7575 " 0,x ,xm,r ,*y,0,x,0,x")
7576 (match_operand:DI 2 "vector_move_operand"
7577 "rm,rm,C ,C ,C ,x,x,m,m")))]
7580 pinsrq\t{$1, %2, %0|%0, %2, 1}
7581 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7582 %vmovq\t{%1, %0|%0, %1}
7583 %vmovd\t{%1, %0|%0, %1}
7584 movq2dq\t{%1, %0|%0, %1}
7585 punpcklqdq\t{%2, %0|%0, %2}
7586 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7587 movhps\t{%2, %0|%0, %2}
7588 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7589 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7592 (eq_attr "alternative" "0,1,5,6")
7593 (const_string "sselog")
7594 (const_string "ssemov")))
7595 (set (attr "prefix_rex")
7597 (and (eq_attr "alternative" "0,3")
7598 (not (match_test "TARGET_AVX")))
7600 (const_string "*")))
7601 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7602 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7603 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7604 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7606 (define_insn "vec_concatv2di"
7607 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7609 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7610 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7611 "!TARGET_64BIT && TARGET_SSE"
7613 %vmovq\t{%1, %0|%0, %1}
7614 movq2dq\t{%1, %0|%0, %1}
7615 punpcklqdq\t{%2, %0|%0, %2}
7616 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7617 movlhps\t{%2, %0|%0, %2}
7618 movhps\t{%2, %0|%0, %2}
7619 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7620 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7621 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7622 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7623 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7625 (define_expand "vec_unpacks_lo_<mode>"
7626 [(match_operand:<sseunpackmode> 0 "register_operand")
7627 (match_operand:VI124_AVX2 1 "register_operand")]
7629 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
7631 (define_expand "vec_unpacks_hi_<mode>"
7632 [(match_operand:<sseunpackmode> 0 "register_operand")
7633 (match_operand:VI124_AVX2 1 "register_operand")]
7635 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
7637 (define_expand "vec_unpacku_lo_<mode>"
7638 [(match_operand:<sseunpackmode> 0 "register_operand")
7639 (match_operand:VI124_AVX2 1 "register_operand")]
7641 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
7643 (define_expand "vec_unpacku_hi_<mode>"
7644 [(match_operand:<sseunpackmode> 0 "register_operand")
7645 (match_operand:VI124_AVX2 1 "register_operand")]
7647 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
7649 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7655 (define_expand "<sse2_avx2>_uavg<mode>3"
7656 [(set (match_operand:VI12_AVX2 0 "register_operand")
7658 (lshiftrt:<ssedoublemode>
7659 (plus:<ssedoublemode>
7660 (plus:<ssedoublemode>
7661 (zero_extend:<ssedoublemode>
7662 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
7663 (zero_extend:<ssedoublemode>
7664 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
7669 operands[3] = CONST1_RTX(<MODE>mode);
7670 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7673 (define_insn "*<sse2_avx2>_uavg<mode>3"
7674 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
7676 (lshiftrt:<ssedoublemode>
7677 (plus:<ssedoublemode>
7678 (plus:<ssedoublemode>
7679 (zero_extend:<ssedoublemode>
7680 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
7681 (zero_extend:<ssedoublemode>
7682 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
7683 (match_operand:VI12_AVX2 3 "const1_operand"))
7685 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7687 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
7688 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7689 [(set_attr "isa" "noavx,avx")
7690 (set_attr "type" "sseiadd")
7691 (set_attr "prefix_data16" "1,*")
7692 (set_attr "prefix" "orig,vex")
7693 (set_attr "mode" "<sseinsnmode>")])
7695 ;; The correct representation for this is absolutely enormous, and
7696 ;; surely not generally useful.
7697 (define_insn "<sse2_avx2>_psadbw"
7698 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7700 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7701 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7705 psadbw\t{%2, %0|%0, %2}
7706 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7707 [(set_attr "isa" "noavx,avx")
7708 (set_attr "type" "sseiadd")
7709 (set_attr "atom_unit" "simul")
7710 (set_attr "prefix_data16" "1,*")
7711 (set_attr "prefix" "orig,vex")
7712 (set_attr "mode" "<sseinsnmode>")])
7714 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7715 [(set (match_operand:SI 0 "register_operand" "=r")
7717 [(match_operand:VF 1 "register_operand" "x")]
7720 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7721 [(set_attr "type" "ssemov")
7722 (set_attr "prefix" "maybe_vex")
7723 (set_attr "mode" "<MODE>")])
7725 (define_insn "avx2_pmovmskb"
7726 [(set (match_operand:SI 0 "register_operand" "=r")
7727 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7730 "vpmovmskb\t{%1, %0|%0, %1}"
7731 [(set_attr "type" "ssemov")
7732 (set_attr "prefix" "vex")
7733 (set_attr "mode" "DI")])
7735 (define_insn "sse2_pmovmskb"
7736 [(set (match_operand:SI 0 "register_operand" "=r")
7737 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7740 "%vpmovmskb\t{%1, %0|%0, %1}"
7741 [(set_attr "type" "ssemov")
7742 (set_attr "prefix_data16" "1")
7743 (set_attr "prefix" "maybe_vex")
7744 (set_attr "mode" "SI")])
7746 (define_expand "sse2_maskmovdqu"
7747 [(set (match_operand:V16QI 0 "memory_operand")
7748 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
7749 (match_operand:V16QI 2 "register_operand")
7754 (define_insn "*sse2_maskmovdqu"
7755 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7756 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7757 (match_operand:V16QI 2 "register_operand" "x")
7758 (mem:V16QI (match_dup 0))]
7761 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7762 [(set_attr "type" "ssemov")
7763 (set_attr "prefix_data16" "1")
7764 ;; The implicit %rdi operand confuses default length_vex computation.
7765 (set (attr "length_vex")
7766 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7767 (set_attr "prefix" "maybe_vex")
7768 (set_attr "mode" "TI")])
7770 (define_insn "sse_ldmxcsr"
7771 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7775 [(set_attr "type" "sse")
7776 (set_attr "atom_sse_attr" "mxcsr")
7777 (set_attr "prefix" "maybe_vex")
7778 (set_attr "memory" "load")])
7780 (define_insn "sse_stmxcsr"
7781 [(set (match_operand:SI 0 "memory_operand" "=m")
7782 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7785 [(set_attr "type" "sse")
7786 (set_attr "atom_sse_attr" "mxcsr")
7787 (set_attr "prefix" "maybe_vex")
7788 (set_attr "memory" "store")])
7790 (define_insn "sse2_clflush"
7791 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7795 [(set_attr "type" "sse")
7796 (set_attr "atom_sse_attr" "fence")
7797 (set_attr "memory" "unknown")])
7800 (define_insn "sse3_mwait"
7801 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7802 (match_operand:SI 1 "register_operand" "c")]
7805 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7806 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7807 ;; we only need to set up 32bit registers.
7809 [(set_attr "length" "3")])
7811 (define_insn "sse3_monitor"
7812 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7813 (match_operand:SI 1 "register_operand" "c")
7814 (match_operand:SI 2 "register_operand" "d")]
7816 "TARGET_SSE3 && !TARGET_64BIT"
7817 "monitor\t%0, %1, %2"
7818 [(set_attr "length" "3")])
7820 (define_insn "sse3_monitor64_<mode>"
7821 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
7822 (match_operand:SI 1 "register_operand" "c")
7823 (match_operand:SI 2 "register_operand" "d")]
7825 "TARGET_SSE3 && TARGET_64BIT"
7826 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7827 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7828 ;; zero extended to 64bit, we only need to set up 32bit registers.
7830 [(set_attr "length" "3")])
7832 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7834 ;; SSSE3 instructions
7836 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7838 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
7840 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
7841 [(set (match_operand:V16HI 0 "register_operand" "=x")
7848 (match_operand:V16HI 1 "register_operand" "x")
7849 (parallel [(const_int 0)]))
7850 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7852 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7853 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7856 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7857 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7859 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7860 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7864 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
7865 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
7867 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
7868 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
7871 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
7872 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
7874 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
7875 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
7881 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7882 (parallel [(const_int 0)]))
7883 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7885 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7886 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7889 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7890 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7892 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7893 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
7897 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
7898 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
7900 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
7901 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
7904 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
7905 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
7907 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
7908 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
7910 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
7911 [(set_attr "type" "sseiadd")
7912 (set_attr "prefix_extra" "1")
7913 (set_attr "prefix" "vex")
7914 (set_attr "mode" "OI")])
7916 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
7917 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7923 (match_operand:V8HI 1 "register_operand" "0,x")
7924 (parallel [(const_int 0)]))
7925 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7927 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7928 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7931 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7932 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7934 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7935 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7940 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7941 (parallel [(const_int 0)]))
7942 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7944 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7945 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7948 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7949 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7951 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7952 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7955 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
7956 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
7957 [(set_attr "isa" "noavx,avx")
7958 (set_attr "type" "sseiadd")
7959 (set_attr "atom_unit" "complex")
7960 (set_attr "prefix_data16" "1,*")
7961 (set_attr "prefix_extra" "1")
7962 (set_attr "prefix" "orig,vex")
7963 (set_attr "mode" "TI")])
7965 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
7966 [(set (match_operand:V4HI 0 "register_operand" "=y")
7971 (match_operand:V4HI 1 "register_operand" "0")
7972 (parallel [(const_int 0)]))
7973 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7975 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7976 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7980 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7981 (parallel [(const_int 0)]))
7982 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7984 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7985 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7987 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
7988 [(set_attr "type" "sseiadd")
7989 (set_attr "atom_unit" "complex")
7990 (set_attr "prefix_extra" "1")
7991 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7992 (set_attr "mode" "DI")])
7994 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
7995 [(set (match_operand:V8SI 0 "register_operand" "=x")
8001 (match_operand:V8SI 1 "register_operand" "x")
8002 (parallel [(const_int 0)]))
8003 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8005 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8006 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8009 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8010 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8012 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8013 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8018 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8019 (parallel [(const_int 0)]))
8020 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8022 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8023 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8026 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8027 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8029 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8030 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8032 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8033 [(set_attr "type" "sseiadd")
8034 (set_attr "prefix_extra" "1")
8035 (set_attr "prefix" "vex")
8036 (set_attr "mode" "OI")])
8038 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
8039 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8044 (match_operand:V4SI 1 "register_operand" "0,x")
8045 (parallel [(const_int 0)]))
8046 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8048 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8049 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8053 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8054 (parallel [(const_int 0)]))
8055 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8057 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8058 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8061 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
8062 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8063 [(set_attr "isa" "noavx,avx")
8064 (set_attr "type" "sseiadd")
8065 (set_attr "atom_unit" "complex")
8066 (set_attr "prefix_data16" "1,*")
8067 (set_attr "prefix_extra" "1")
8068 (set_attr "prefix" "orig,vex")
8069 (set_attr "mode" "TI")])
8071 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
8072 [(set (match_operand:V2SI 0 "register_operand" "=y")
8076 (match_operand:V2SI 1 "register_operand" "0")
8077 (parallel [(const_int 0)]))
8078 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8081 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8082 (parallel [(const_int 0)]))
8083 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8085 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
8086 [(set_attr "type" "sseiadd")
8087 (set_attr "atom_unit" "complex")
8088 (set_attr "prefix_extra" "1")
8089 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8090 (set_attr "mode" "DI")])
8092 (define_insn "avx2_pmaddubsw256"
8093 [(set (match_operand:V16HI 0 "register_operand" "=x")
8098 (match_operand:V32QI 1 "register_operand" "x")
8099 (parallel [(const_int 0) (const_int 2)
8100 (const_int 4) (const_int 6)
8101 (const_int 8) (const_int 10)
8102 (const_int 12) (const_int 14)
8103 (const_int 16) (const_int 18)
8104 (const_int 20) (const_int 22)
8105 (const_int 24) (const_int 26)
8106 (const_int 28) (const_int 30)])))
8109 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8110 (parallel [(const_int 0) (const_int 2)
8111 (const_int 4) (const_int 6)
8112 (const_int 8) (const_int 10)
8113 (const_int 12) (const_int 14)
8114 (const_int 16) (const_int 18)
8115 (const_int 20) (const_int 22)
8116 (const_int 24) (const_int 26)
8117 (const_int 28) (const_int 30)]))))
8120 (vec_select:V16QI (match_dup 1)
8121 (parallel [(const_int 1) (const_int 3)
8122 (const_int 5) (const_int 7)
8123 (const_int 9) (const_int 11)
8124 (const_int 13) (const_int 15)
8125 (const_int 17) (const_int 19)
8126 (const_int 21) (const_int 23)
8127 (const_int 25) (const_int 27)
8128 (const_int 29) (const_int 31)])))
8130 (vec_select:V16QI (match_dup 2)
8131 (parallel [(const_int 1) (const_int 3)
8132 (const_int 5) (const_int 7)
8133 (const_int 9) (const_int 11)
8134 (const_int 13) (const_int 15)
8135 (const_int 17) (const_int 19)
8136 (const_int 21) (const_int 23)
8137 (const_int 25) (const_int 27)
8138 (const_int 29) (const_int 31)]))))))]
8140 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8141 [(set_attr "type" "sseiadd")
8142 (set_attr "prefix_extra" "1")
8143 (set_attr "prefix" "vex")
8144 (set_attr "mode" "OI")])
8146 (define_insn "ssse3_pmaddubsw128"
8147 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8152 (match_operand:V16QI 1 "register_operand" "0,x")
8153 (parallel [(const_int 0) (const_int 2)
8154 (const_int 4) (const_int 6)
8155 (const_int 8) (const_int 10)
8156 (const_int 12) (const_int 14)])))
8159 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8160 (parallel [(const_int 0) (const_int 2)
8161 (const_int 4) (const_int 6)
8162 (const_int 8) (const_int 10)
8163 (const_int 12) (const_int 14)]))))
8166 (vec_select:V8QI (match_dup 1)
8167 (parallel [(const_int 1) (const_int 3)
8168 (const_int 5) (const_int 7)
8169 (const_int 9) (const_int 11)
8170 (const_int 13) (const_int 15)])))
8172 (vec_select:V8QI (match_dup 2)
8173 (parallel [(const_int 1) (const_int 3)
8174 (const_int 5) (const_int 7)
8175 (const_int 9) (const_int 11)
8176 (const_int 13) (const_int 15)]))))))]
8179 pmaddubsw\t{%2, %0|%0, %2}
8180 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8181 [(set_attr "isa" "noavx,avx")
8182 (set_attr "type" "sseiadd")
8183 (set_attr "atom_unit" "simul")
8184 (set_attr "prefix_data16" "1,*")
8185 (set_attr "prefix_extra" "1")
8186 (set_attr "prefix" "orig,vex")
8187 (set_attr "mode" "TI")])
8189 (define_insn "ssse3_pmaddubsw"
8190 [(set (match_operand:V4HI 0 "register_operand" "=y")
8195 (match_operand:V8QI 1 "register_operand" "0")
8196 (parallel [(const_int 0) (const_int 2)
8197 (const_int 4) (const_int 6)])))
8200 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8201 (parallel [(const_int 0) (const_int 2)
8202 (const_int 4) (const_int 6)]))))
8205 (vec_select:V4QI (match_dup 1)
8206 (parallel [(const_int 1) (const_int 3)
8207 (const_int 5) (const_int 7)])))
8209 (vec_select:V4QI (match_dup 2)
8210 (parallel [(const_int 1) (const_int 3)
8211 (const_int 5) (const_int 7)]))))))]
8213 "pmaddubsw\t{%2, %0|%0, %2}"
8214 [(set_attr "type" "sseiadd")
8215 (set_attr "atom_unit" "simul")
8216 (set_attr "prefix_extra" "1")
8217 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8218 (set_attr "mode" "DI")])
8220 (define_mode_iterator PMULHRSW
8221 [V4HI V8HI (V16HI "TARGET_AVX2")])
8223 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
8224 [(set (match_operand:PMULHRSW 0 "register_operand")
8226 (lshiftrt:<ssedoublemode>
8227 (plus:<ssedoublemode>
8228 (lshiftrt:<ssedoublemode>
8229 (mult:<ssedoublemode>
8230 (sign_extend:<ssedoublemode>
8231 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
8232 (sign_extend:<ssedoublemode>
8233 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
8239 operands[3] = CONST1_RTX(<MODE>mode);
8240 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8243 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
8244 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8246 (lshiftrt:<ssedoublemode>
8247 (plus:<ssedoublemode>
8248 (lshiftrt:<ssedoublemode>
8249 (mult:<ssedoublemode>
8250 (sign_extend:<ssedoublemode>
8251 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
8252 (sign_extend:<ssedoublemode>
8253 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
8255 (match_operand:VI2_AVX2 3 "const1_operand"))
8257 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8259 pmulhrsw\t{%2, %0|%0, %2}
8260 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8261 [(set_attr "isa" "noavx,avx")
8262 (set_attr "type" "sseimul")
8263 (set_attr "prefix_data16" "1,*")
8264 (set_attr "prefix_extra" "1")
8265 (set_attr "prefix" "orig,vex")
8266 (set_attr "mode" "<sseinsnmode>")])
8268 (define_insn "*ssse3_pmulhrswv4hi3"
8269 [(set (match_operand:V4HI 0 "register_operand" "=y")
8276 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8278 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8280 (match_operand:V4HI 3 "const1_operand"))
8282 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8283 "pmulhrsw\t{%2, %0|%0, %2}"
8284 [(set_attr "type" "sseimul")
8285 (set_attr "prefix_extra" "1")
8286 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8287 (set_attr "mode" "DI")])
8289 (define_insn "<ssse3_avx2>_pshufb<mode>3"
8290 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8292 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8293 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
8297 pshufb\t{%2, %0|%0, %2}
8298 vpshufb\t{%2, %1, %0|%0, %1, %2}"
8299 [(set_attr "isa" "noavx,avx")
8300 (set_attr "type" "sselog1")
8301 (set_attr "prefix_data16" "1,*")
8302 (set_attr "prefix_extra" "1")
8303 (set_attr "prefix" "orig,vex")
8304 (set_attr "btver2_decode" "vector,vector")
8305 (set_attr "mode" "<sseinsnmode>")])
8307 (define_insn "ssse3_pshufbv8qi3"
8308 [(set (match_operand:V8QI 0 "register_operand" "=y")
8309 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8310 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8313 "pshufb\t{%2, %0|%0, %2}";
8314 [(set_attr "type" "sselog1")
8315 (set_attr "prefix_extra" "1")
8316 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8317 (set_attr "mode" "DI")])
8319 (define_insn "<ssse3_avx2>_psign<mode>3"
8320 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
8322 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
8323 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
8327 psign<ssemodesuffix>\t{%2, %0|%0, %2}
8328 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8329 [(set_attr "isa" "noavx,avx")
8330 (set_attr "type" "sselog1")
8331 (set_attr "prefix_data16" "1,*")
8332 (set_attr "prefix_extra" "1")
8333 (set_attr "prefix" "orig,vex")
8334 (set_attr "mode" "<sseinsnmode>")])
8336 (define_insn "ssse3_psign<mode>3"
8337 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8339 [(match_operand:MMXMODEI 1 "register_operand" "0")
8340 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8343 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8344 [(set_attr "type" "sselog1")
8345 (set_attr "prefix_extra" "1")
8346 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8347 (set_attr "mode" "DI")])
8349 (define_insn "<ssse3_avx2>_palignr<mode>"
8350 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
8351 (unspec:SSESCALARMODE
8352 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
8353 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
8354 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
8358 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8360 switch (which_alternative)
8363 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8365 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8370 [(set_attr "isa" "noavx,avx")
8371 (set_attr "type" "sseishft")
8372 (set_attr "atom_unit" "sishuf")
8373 (set_attr "prefix_data16" "1,*")
8374 (set_attr "prefix_extra" "1")
8375 (set_attr "length_immediate" "1")
8376 (set_attr "prefix" "orig,vex")
8377 (set_attr "mode" "<sseinsnmode>")])
8379 (define_insn "ssse3_palignrdi"
8380 [(set (match_operand:DI 0 "register_operand" "=y")
8381 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8382 (match_operand:DI 2 "nonimmediate_operand" "ym")
8383 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8387 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8388 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8390 [(set_attr "type" "sseishft")
8391 (set_attr "atom_unit" "sishuf")
8392 (set_attr "prefix_extra" "1")
8393 (set_attr "length_immediate" "1")
8394 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8395 (set_attr "mode" "DI")])
8397 (define_insn "abs<mode>2"
8398 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
8400 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
8402 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
8403 [(set_attr "type" "sselog1")
8404 (set_attr "prefix_data16" "1")
8405 (set_attr "prefix_extra" "1")
8406 (set_attr "prefix" "maybe_vex")
8407 (set_attr "mode" "<sseinsnmode>")])
8409 (define_insn "abs<mode>2"
8410 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8412 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8414 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8415 [(set_attr "type" "sselog1")
8416 (set_attr "prefix_rep" "0")
8417 (set_attr "prefix_extra" "1")
8418 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8419 (set_attr "mode" "DI")])
8421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8423 ;; AMD SSE4A instructions
8425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8427 (define_insn "sse4a_movnt<mode>"
8428 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8430 [(match_operand:MODEF 1 "register_operand" "x")]
8433 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
8434 [(set_attr "type" "ssemov")
8435 (set_attr "mode" "<MODE>")])
8437 (define_insn "sse4a_vmmovnt<mode>"
8438 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8439 (unspec:<ssescalarmode>
8440 [(vec_select:<ssescalarmode>
8441 (match_operand:VF_128 1 "register_operand" "x")
8442 (parallel [(const_int 0)]))]
8445 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
8446 [(set_attr "type" "ssemov")
8447 (set_attr "mode" "<ssescalarmode>")])
8449 (define_insn "sse4a_extrqi"
8450 [(set (match_operand:V2DI 0 "register_operand" "=x")
8451 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8452 (match_operand 2 "const_0_to_255_operand")
8453 (match_operand 3 "const_0_to_255_operand")]
8456 "extrq\t{%3, %2, %0|%0, %2, %3}"
8457 [(set_attr "type" "sse")
8458 (set_attr "prefix_data16" "1")
8459 (set_attr "length_immediate" "2")
8460 (set_attr "mode" "TI")])
8462 (define_insn "sse4a_extrq"
8463 [(set (match_operand:V2DI 0 "register_operand" "=x")
8464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8465 (match_operand:V16QI 2 "register_operand" "x")]
8468 "extrq\t{%2, %0|%0, %2}"
8469 [(set_attr "type" "sse")
8470 (set_attr "prefix_data16" "1")
8471 (set_attr "mode" "TI")])
8473 (define_insn "sse4a_insertqi"
8474 [(set (match_operand:V2DI 0 "register_operand" "=x")
8475 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8476 (match_operand:V2DI 2 "register_operand" "x")
8477 (match_operand 3 "const_0_to_255_operand")
8478 (match_operand 4 "const_0_to_255_operand")]
8481 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8482 [(set_attr "type" "sseins")
8483 (set_attr "prefix_data16" "0")
8484 (set_attr "prefix_rep" "1")
8485 (set_attr "length_immediate" "2")
8486 (set_attr "mode" "TI")])
8488 (define_insn "sse4a_insertq"
8489 [(set (match_operand:V2DI 0 "register_operand" "=x")
8490 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8491 (match_operand:V2DI 2 "register_operand" "x")]
8494 "insertq\t{%2, %0|%0, %2}"
8495 [(set_attr "type" "sseins")
8496 (set_attr "prefix_data16" "0")
8497 (set_attr "prefix_rep" "1")
8498 (set_attr "mode" "TI")])
8500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8502 ;; Intel SSE4.1 instructions
8504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8506 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
8507 [(set (match_operand:VF 0 "register_operand" "=x,x")
8509 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8510 (match_operand:VF 1 "register_operand" "0,x")
8511 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
8514 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8515 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8516 [(set_attr "isa" "noavx,avx")
8517 (set_attr "type" "ssemov")
8518 (set_attr "length_immediate" "1")
8519 (set_attr "prefix_data16" "1,*")
8520 (set_attr "prefix_extra" "1")
8521 (set_attr "prefix" "orig,vex")
8522 (set_attr "mode" "<MODE>")])
8524 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
8525 [(set (match_operand:VF 0 "register_operand" "=x,x")
8527 [(match_operand:VF 1 "register_operand" "0,x")
8528 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8529 (match_operand:VF 3 "register_operand" "Yz,x")]
8533 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8534 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8535 [(set_attr "isa" "noavx,avx")
8536 (set_attr "type" "ssemov")
8537 (set_attr "length_immediate" "1")
8538 (set_attr "prefix_data16" "1,*")
8539 (set_attr "prefix_extra" "1")
8540 (set_attr "prefix" "orig,vex")
8541 (set_attr "btver2_decode" "vector,vector")
8542 (set_attr "mode" "<MODE>")])
8544 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
8545 [(set (match_operand:VF 0 "register_operand" "=x,x")
8547 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
8548 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8549 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8553 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8554 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8555 [(set_attr "isa" "noavx,avx")
8556 (set_attr "type" "ssemul")
8557 (set_attr "length_immediate" "1")
8558 (set_attr "prefix_data16" "1,*")
8559 (set_attr "prefix_extra" "1")
8560 (set_attr "prefix" "orig,vex")
8561 (set_attr "btver2_decode" "vector,vector")
8562 (set_attr "mode" "<MODE>")])
8564 (define_insn "<sse4_1_avx2>_movntdqa"
8565 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
8566 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
8569 "%vmovntdqa\t{%1, %0|%0, %1}"
8570 [(set_attr "type" "ssemov")
8571 (set_attr "prefix_extra" "1")
8572 (set_attr "prefix" "maybe_vex")
8573 (set_attr "mode" "<sseinsnmode>")])
8575 (define_insn "<sse4_1_avx2>_mpsadbw"
8576 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8578 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8579 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8580 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8584 mpsadbw\t{%3, %2, %0|%0, %2, %3}
8585 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8586 [(set_attr "isa" "noavx,avx")
8587 (set_attr "type" "sselog1")
8588 (set_attr "length_immediate" "1")
8589 (set_attr "prefix_extra" "1")
8590 (set_attr "prefix" "orig,vex")
8591 (set_attr "btver2_decode" "vector,vector")
8592 (set_attr "mode" "<sseinsnmode>")])
8594 (define_insn "avx2_packusdw"
8595 [(set (match_operand:V16HI 0 "register_operand" "=x")
8598 (match_operand:V8SI 1 "register_operand" "x"))
8600 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
8602 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8603 [(set_attr "type" "sselog")
8604 (set_attr "prefix_extra" "1")
8605 (set_attr "prefix" "vex")
8606 (set_attr "mode" "OI")])
8608 (define_insn "sse4_1_packusdw"
8609 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8612 (match_operand:V4SI 1 "register_operand" "0,x"))
8614 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
8617 packusdw\t{%2, %0|%0, %2}
8618 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8619 [(set_attr "isa" "noavx,avx")
8620 (set_attr "type" "sselog")
8621 (set_attr "prefix_extra" "1")
8622 (set_attr "prefix" "orig,vex")
8623 (set_attr "mode" "TI")])
8625 (define_insn "<sse4_1_avx2>_pblendvb"
8626 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8628 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8629 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8630 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
8634 pblendvb\t{%3, %2, %0|%0, %2, %3}
8635 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8636 [(set_attr "isa" "noavx,avx")
8637 (set_attr "type" "ssemov")
8638 (set_attr "prefix_extra" "1")
8639 (set_attr "length_immediate" "*,1")
8640 (set_attr "prefix" "orig,vex")
8641 (set_attr "btver2_decode" "vector,vector")
8642 (set_attr "mode" "<sseinsnmode>")])
8644 (define_insn "sse4_1_pblendw"
8645 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8647 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8648 (match_operand:V8HI 1 "register_operand" "0,x")
8649 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8652 pblendw\t{%3, %2, %0|%0, %2, %3}
8653 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8654 [(set_attr "isa" "noavx,avx")
8655 (set_attr "type" "ssemov")
8656 (set_attr "prefix_extra" "1")
8657 (set_attr "length_immediate" "1")
8658 (set_attr "prefix" "orig,vex")
8659 (set_attr "mode" "TI")])
8661 ;; The builtin uses an 8-bit immediate. Expand that.
8662 (define_expand "avx2_pblendw"
8663 [(set (match_operand:V16HI 0 "register_operand")
8665 (match_operand:V16HI 2 "nonimmediate_operand")
8666 (match_operand:V16HI 1 "register_operand")
8667 (match_operand:SI 3 "const_0_to_255_operand")))]
8670 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
8671 operands[3] = GEN_INT (val << 8 | val);
8674 (define_insn "*avx2_pblendw"
8675 [(set (match_operand:V16HI 0 "register_operand" "=x")
8677 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8678 (match_operand:V16HI 1 "register_operand" "x")
8679 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
8682 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
8683 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8685 [(set_attr "type" "ssemov")
8686 (set_attr "prefix_extra" "1")
8687 (set_attr "length_immediate" "1")
8688 (set_attr "prefix" "vex")
8689 (set_attr "mode" "OI")])
8691 (define_insn "avx2_pblendd<mode>"
8692 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
8694 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
8695 (match_operand:VI4_AVX2 1 "register_operand" "x")
8696 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8698 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8699 [(set_attr "type" "ssemov")
8700 (set_attr "prefix_extra" "1")
8701 (set_attr "length_immediate" "1")
8702 (set_attr "prefix" "vex")
8703 (set_attr "mode" "<sseinsnmode>")])
8705 (define_insn "sse4_1_phminposuw"
8706 [(set (match_operand:V8HI 0 "register_operand" "=x")
8707 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8708 UNSPEC_PHMINPOSUW))]
8710 "%vphminposuw\t{%1, %0|%0, %1}"
8711 [(set_attr "type" "sselog1")
8712 (set_attr "prefix_extra" "1")
8713 (set_attr "prefix" "maybe_vex")
8714 (set_attr "mode" "TI")])
8716 (define_insn "avx2_<code>v16qiv16hi2"
8717 [(set (match_operand:V16HI 0 "register_operand" "=x")
8719 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
8721 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8722 [(set_attr "type" "ssemov")
8723 (set_attr "prefix_extra" "1")
8724 (set_attr "prefix" "vex")
8725 (set_attr "mode" "OI")])
8727 (define_insn "sse4_1_<code>v8qiv8hi2"
8728 [(set (match_operand:V8HI 0 "register_operand" "=x")
8731 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8732 (parallel [(const_int 0) (const_int 1)
8733 (const_int 2) (const_int 3)
8734 (const_int 4) (const_int 5)
8735 (const_int 6) (const_int 7)]))))]
8737 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
8738 [(set_attr "type" "ssemov")
8739 (set_attr "prefix_extra" "1")
8740 (set_attr "prefix" "maybe_vex")
8741 (set_attr "mode" "TI")])
8743 (define_insn "avx2_<code>v8qiv8si2"
8744 [(set (match_operand:V8SI 0 "register_operand" "=x")
8747 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8748 (parallel [(const_int 0) (const_int 1)
8749 (const_int 2) (const_int 3)
8750 (const_int 4) (const_int 5)
8751 (const_int 6) (const_int 7)]))))]
8753 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
8754 [(set_attr "type" "ssemov")
8755 (set_attr "prefix_extra" "1")
8756 (set_attr "prefix" "vex")
8757 (set_attr "mode" "OI")])
8759 (define_insn "sse4_1_<code>v4qiv4si2"
8760 [(set (match_operand:V4SI 0 "register_operand" "=x")
8763 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8764 (parallel [(const_int 0) (const_int 1)
8765 (const_int 2) (const_int 3)]))))]
8767 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
8768 [(set_attr "type" "ssemov")
8769 (set_attr "prefix_extra" "1")
8770 (set_attr "prefix" "maybe_vex")
8771 (set_attr "mode" "TI")])
8773 (define_insn "avx2_<code>v8hiv8si2"
8774 [(set (match_operand:V8SI 0 "register_operand" "=x")
8776 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
8778 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8779 [(set_attr "type" "ssemov")
8780 (set_attr "prefix_extra" "1")
8781 (set_attr "prefix" "vex")
8782 (set_attr "mode" "OI")])
8784 (define_insn "sse4_1_<code>v4hiv4si2"
8785 [(set (match_operand:V4SI 0 "register_operand" "=x")
8788 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8789 (parallel [(const_int 0) (const_int 1)
8790 (const_int 2) (const_int 3)]))))]
8792 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
8793 [(set_attr "type" "ssemov")
8794 (set_attr "prefix_extra" "1")
8795 (set_attr "prefix" "maybe_vex")
8796 (set_attr "mode" "TI")])
8798 (define_insn "avx2_<code>v4qiv4di2"
8799 [(set (match_operand:V4DI 0 "register_operand" "=x")
8802 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8803 (parallel [(const_int 0) (const_int 1)
8804 (const_int 2) (const_int 3)]))))]
8806 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
8807 [(set_attr "type" "ssemov")
8808 (set_attr "prefix_extra" "1")
8809 (set_attr "prefix" "vex")
8810 (set_attr "mode" "OI")])
8812 (define_insn "sse4_1_<code>v2qiv2di2"
8813 [(set (match_operand:V2DI 0 "register_operand" "=x")
8816 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8817 (parallel [(const_int 0) (const_int 1)]))))]
8819 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
8820 [(set_attr "type" "ssemov")
8821 (set_attr "prefix_extra" "1")
8822 (set_attr "prefix" "maybe_vex")
8823 (set_attr "mode" "TI")])
8825 (define_insn "avx2_<code>v4hiv4di2"
8826 [(set (match_operand:V4DI 0 "register_operand" "=x")
8829 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8830 (parallel [(const_int 0) (const_int 1)
8831 (const_int 2) (const_int 3)]))))]
8833 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
8834 [(set_attr "type" "ssemov")
8835 (set_attr "prefix_extra" "1")
8836 (set_attr "prefix" "vex")
8837 (set_attr "mode" "OI")])
8839 (define_insn "sse4_1_<code>v2hiv2di2"
8840 [(set (match_operand:V2DI 0 "register_operand" "=x")
8843 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8844 (parallel [(const_int 0) (const_int 1)]))))]
8846 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
8847 [(set_attr "type" "ssemov")
8848 (set_attr "prefix_extra" "1")
8849 (set_attr "prefix" "maybe_vex")
8850 (set_attr "mode" "TI")])
8852 (define_insn "avx2_<code>v4siv4di2"
8853 [(set (match_operand:V4DI 0 "register_operand" "=x")
8855 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
8857 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8858 [(set_attr "type" "ssemov")
8859 (set_attr "prefix_extra" "1")
8860 (set_attr "mode" "OI")])
8862 (define_insn "sse4_1_<code>v2siv2di2"
8863 [(set (match_operand:V2DI 0 "register_operand" "=x")
8866 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8867 (parallel [(const_int 0) (const_int 1)]))))]
8869 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
8870 [(set_attr "type" "ssemov")
8871 (set_attr "prefix_extra" "1")
8872 (set_attr "prefix" "maybe_vex")
8873 (set_attr "mode" "TI")])
8875 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8876 ;; setting FLAGS_REG. But it is not a really compare instruction.
8877 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8878 [(set (reg:CC FLAGS_REG)
8879 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8880 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8883 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8884 [(set_attr "type" "ssecomi")
8885 (set_attr "prefix_extra" "1")
8886 (set_attr "prefix" "vex")
8887 (set_attr "mode" "<MODE>")])
8889 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8890 ;; But it is not a really compare instruction.
8891 (define_insn "avx_ptest256"
8892 [(set (reg:CC FLAGS_REG)
8893 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8894 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8897 "vptest\t{%1, %0|%0, %1}"
8898 [(set_attr "type" "ssecomi")
8899 (set_attr "prefix_extra" "1")
8900 (set_attr "prefix" "vex")
8901 (set_attr "btver2_decode" "vector")
8902 (set_attr "mode" "OI")])
8904 (define_insn "sse4_1_ptest"
8905 [(set (reg:CC FLAGS_REG)
8906 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8907 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8910 "%vptest\t{%1, %0|%0, %1}"
8911 [(set_attr "type" "ssecomi")
8912 (set_attr "prefix_extra" "1")
8913 (set_attr "prefix" "maybe_vex")
8914 (set_attr "mode" "TI")])
8916 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8917 [(set (match_operand:VF 0 "register_operand" "=x")
8919 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8920 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8923 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8924 [(set_attr "type" "ssecvt")
8925 (set (attr "prefix_data16")
8927 (match_test "TARGET_AVX")
8929 (const_string "1")))
8930 (set_attr "prefix_extra" "1")
8931 (set_attr "length_immediate" "1")
8932 (set_attr "prefix" "maybe_vex")
8933 (set_attr "mode" "<MODE>")])
8935 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
8936 [(match_operand:<sseintvecmode> 0 "register_operand")
8937 (match_operand:VF1 1 "nonimmediate_operand")
8938 (match_operand:SI 2 "const_0_to_15_operand")]
8941 rtx tmp = gen_reg_rtx (<MODE>mode);
8944 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
8947 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
8951 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
8952 [(match_operand:<ssepackfltmode> 0 "register_operand")
8953 (match_operand:VF2 1 "nonimmediate_operand")
8954 (match_operand:VF2 2 "nonimmediate_operand")
8955 (match_operand:SI 3 "const_0_to_15_operand")]
8960 if (<MODE>mode == V2DFmode
8961 && TARGET_AVX && !TARGET_PREFER_AVX128)
8963 rtx tmp2 = gen_reg_rtx (V4DFmode);
8965 tmp0 = gen_reg_rtx (V4DFmode);
8966 tmp1 = force_reg (V2DFmode, operands[1]);
8968 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
8969 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
8970 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
8974 tmp0 = gen_reg_rtx (<MODE>mode);
8975 tmp1 = gen_reg_rtx (<MODE>mode);
8978 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
8981 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
8984 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
8989 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8990 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8993 [(match_operand:VF_128 2 "register_operand" "x,x")
8994 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8996 (match_operand:VF_128 1 "register_operand" "0,x")
9000 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9001 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9002 [(set_attr "isa" "noavx,avx")
9003 (set_attr "type" "ssecvt")
9004 (set_attr "length_immediate" "1")
9005 (set_attr "prefix_data16" "1,*")
9006 (set_attr "prefix_extra" "1")
9007 (set_attr "prefix" "orig,vex")
9008 (set_attr "mode" "<MODE>")])
9010 (define_expand "round<mode>2"
9013 (match_operand:VF 1 "register_operand")
9015 (set (match_operand:VF 0 "register_operand")
9017 [(match_dup 4) (match_dup 5)]
9019 "TARGET_ROUND && !flag_trapping_math"
9021 enum machine_mode scalar_mode;
9022 const struct real_format *fmt;
9023 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9026 scalar_mode = GET_MODE_INNER (<MODE>mode);
9028 /* load nextafter (0.5, 0.0) */
9029 fmt = REAL_MODE_FORMAT (scalar_mode);
9030 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9031 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9032 half = const_double_from_real_value (pred_half, scalar_mode);
9034 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9035 vec_half = force_reg (<MODE>mode, vec_half);
9037 operands[3] = gen_reg_rtx (<MODE>mode);
9038 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9040 operands[4] = gen_reg_rtx (<MODE>mode);
9041 operands[5] = GEN_INT (ROUND_TRUNC);
9044 (define_expand "round<mode>2_sfix"
9045 [(match_operand:<sseintvecmode> 0 "register_operand")
9046 (match_operand:VF1 1 "register_operand")]
9047 "TARGET_ROUND && !flag_trapping_math"
9049 rtx tmp = gen_reg_rtx (<MODE>mode);
9051 emit_insn (gen_round<mode>2 (tmp, operands[1]));
9054 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9058 (define_expand "round<mode>2_vec_pack_sfix"
9059 [(match_operand:<ssepackfltmode> 0 "register_operand")
9060 (match_operand:VF2 1 "register_operand")
9061 (match_operand:VF2 2 "register_operand")]
9062 "TARGET_ROUND && !flag_trapping_math"
9066 if (<MODE>mode == V2DFmode
9067 && TARGET_AVX && !TARGET_PREFER_AVX128)
9069 rtx tmp2 = gen_reg_rtx (V4DFmode);
9071 tmp0 = gen_reg_rtx (V4DFmode);
9072 tmp1 = force_reg (V2DFmode, operands[1]);
9074 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9075 emit_insn (gen_roundv4df2 (tmp2, tmp0));
9076 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9080 tmp0 = gen_reg_rtx (<MODE>mode);
9081 tmp1 = gen_reg_rtx (<MODE>mode);
9083 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
9084 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
9087 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9094 ;; Intel SSE4.2 string/text processing instructions
9096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9098 (define_insn_and_split "sse4_2_pcmpestr"
9099 [(set (match_operand:SI 0 "register_operand" "=c,c")
9101 [(match_operand:V16QI 2 "register_operand" "x,x")
9102 (match_operand:SI 3 "register_operand" "a,a")
9103 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
9104 (match_operand:SI 5 "register_operand" "d,d")
9105 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9107 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9115 (set (reg:CC FLAGS_REG)
9124 && can_create_pseudo_p ()"
9129 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9130 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9131 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9134 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9135 operands[3], operands[4],
9136 operands[5], operands[6]));
9138 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9139 operands[3], operands[4],
9140 operands[5], operands[6]));
9141 if (flags && !(ecx || xmm0))
9142 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9143 operands[2], operands[3],
9144 operands[4], operands[5],
9146 if (!(flags || ecx || xmm0))
9147 emit_note (NOTE_INSN_DELETED);
9151 [(set_attr "type" "sselog")
9152 (set_attr "prefix_data16" "1")
9153 (set_attr "prefix_extra" "1")
9154 (set_attr "length_immediate" "1")
9155 (set_attr "memory" "none,load")
9156 (set_attr "mode" "TI")])
9158 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
9159 [(set (match_operand:SI 0 "register_operand" "=c")
9161 [(match_operand:V16QI 2 "register_operand" "x")
9162 (match_operand:SI 3 "register_operand" "a")
9164 [(match_operand:V16QI 4 "memory_operand" "m")]
9166 (match_operand:SI 5 "register_operand" "d")
9167 (match_operand:SI 6 "const_0_to_255_operand" "n")]
9169 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9173 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9177 (set (reg:CC FLAGS_REG)
9181 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9186 && can_create_pseudo_p ()"
9191 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9192 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9193 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9196 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9197 operands[3], operands[4],
9198 operands[5], operands[6]));
9200 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9201 operands[3], operands[4],
9202 operands[5], operands[6]));
9203 if (flags && !(ecx || xmm0))
9204 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9205 operands[2], operands[3],
9206 operands[4], operands[5],
9208 if (!(flags || ecx || xmm0))
9209 emit_note (NOTE_INSN_DELETED);
9213 [(set_attr "type" "sselog")
9214 (set_attr "prefix_data16" "1")
9215 (set_attr "prefix_extra" "1")
9216 (set_attr "length_immediate" "1")
9217 (set_attr "memory" "load")
9218 (set_attr "mode" "TI")])
9220 (define_insn "sse4_2_pcmpestri"
9221 [(set (match_operand:SI 0 "register_operand" "=c,c")
9223 [(match_operand:V16QI 1 "register_operand" "x,x")
9224 (match_operand:SI 2 "register_operand" "a,a")
9225 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9226 (match_operand:SI 4 "register_operand" "d,d")
9227 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9229 (set (reg:CC FLAGS_REG)
9238 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9239 [(set_attr "type" "sselog")
9240 (set_attr "prefix_data16" "1")
9241 (set_attr "prefix_extra" "1")
9242 (set_attr "prefix" "maybe_vex")
9243 (set_attr "length_immediate" "1")
9244 (set_attr "btver2_decode" "vector")
9245 (set_attr "memory" "none,load")
9246 (set_attr "mode" "TI")])
9248 (define_insn "sse4_2_pcmpestrm"
9249 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9251 [(match_operand:V16QI 1 "register_operand" "x,x")
9252 (match_operand:SI 2 "register_operand" "a,a")
9253 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9254 (match_operand:SI 4 "register_operand" "d,d")
9255 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9257 (set (reg:CC FLAGS_REG)
9266 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9267 [(set_attr "type" "sselog")
9268 (set_attr "prefix_data16" "1")
9269 (set_attr "prefix_extra" "1")
9270 (set_attr "length_immediate" "1")
9271 (set_attr "prefix" "maybe_vex")
9272 (set_attr "btver2_decode" "vector")
9273 (set_attr "memory" "none,load")
9274 (set_attr "mode" "TI")])
9276 (define_insn "sse4_2_pcmpestr_cconly"
9277 [(set (reg:CC FLAGS_REG)
9279 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9280 (match_operand:SI 3 "register_operand" "a,a,a,a")
9281 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9282 (match_operand:SI 5 "register_operand" "d,d,d,d")
9283 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9285 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9286 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9289 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9290 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9291 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9292 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9293 [(set_attr "type" "sselog")
9294 (set_attr "prefix_data16" "1")
9295 (set_attr "prefix_extra" "1")
9296 (set_attr "length_immediate" "1")
9297 (set_attr "memory" "none,load,none,load")
9298 (set_attr "btver2_decode" "vector,vector,vector,vector")
9299 (set_attr "prefix" "maybe_vex")
9300 (set_attr "mode" "TI")])
9302 (define_insn_and_split "sse4_2_pcmpistr"
9303 [(set (match_operand:SI 0 "register_operand" "=c,c")
9305 [(match_operand:V16QI 2 "register_operand" "x,x")
9306 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9307 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9309 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9315 (set (reg:CC FLAGS_REG)
9322 && can_create_pseudo_p ()"
9327 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9328 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9329 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9332 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9333 operands[3], operands[4]));
9335 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9336 operands[3], operands[4]));
9337 if (flags && !(ecx || xmm0))
9338 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9339 operands[2], operands[3],
9341 if (!(flags || ecx || xmm0))
9342 emit_note (NOTE_INSN_DELETED);
9346 [(set_attr "type" "sselog")
9347 (set_attr "prefix_data16" "1")
9348 (set_attr "prefix_extra" "1")
9349 (set_attr "length_immediate" "1")
9350 (set_attr "memory" "none,load")
9351 (set_attr "mode" "TI")])
9353 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
9354 [(set (match_operand:SI 0 "register_operand" "=c")
9356 [(match_operand:V16QI 2 "register_operand" "x")
9358 [(match_operand:V16QI 3 "memory_operand" "m")]
9360 (match_operand:SI 4 "const_0_to_255_operand" "n")]
9362 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9365 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9368 (set (reg:CC FLAGS_REG)
9371 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9375 && can_create_pseudo_p ()"
9380 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9381 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9382 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9385 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9386 operands[3], operands[4]));
9388 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9389 operands[3], operands[4]));
9390 if (flags && !(ecx || xmm0))
9391 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9392 operands[2], operands[3],
9394 if (!(flags || ecx || xmm0))
9395 emit_note (NOTE_INSN_DELETED);
9399 [(set_attr "type" "sselog")
9400 (set_attr "prefix_data16" "1")
9401 (set_attr "prefix_extra" "1")
9402 (set_attr "length_immediate" "1")
9403 (set_attr "memory" "load")
9404 (set_attr "mode" "TI")])
9406 (define_insn "sse4_2_pcmpistri"
9407 [(set (match_operand:SI 0 "register_operand" "=c,c")
9409 [(match_operand:V16QI 1 "register_operand" "x,x")
9410 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9411 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9413 (set (reg:CC FLAGS_REG)
9420 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9421 [(set_attr "type" "sselog")
9422 (set_attr "prefix_data16" "1")
9423 (set_attr "prefix_extra" "1")
9424 (set_attr "length_immediate" "1")
9425 (set_attr "prefix" "maybe_vex")
9426 (set_attr "memory" "none,load")
9427 (set_attr "btver2_decode" "vector")
9428 (set_attr "mode" "TI")])
9430 (define_insn "sse4_2_pcmpistrm"
9431 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9433 [(match_operand:V16QI 1 "register_operand" "x,x")
9434 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9435 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9437 (set (reg:CC FLAGS_REG)
9444 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9445 [(set_attr "type" "sselog")
9446 (set_attr "prefix_data16" "1")
9447 (set_attr "prefix_extra" "1")
9448 (set_attr "length_immediate" "1")
9449 (set_attr "prefix" "maybe_vex")
9450 (set_attr "memory" "none,load")
9451 (set_attr "btver2_decode" "vector")
9452 (set_attr "mode" "TI")])
9454 (define_insn "sse4_2_pcmpistr_cconly"
9455 [(set (reg:CC FLAGS_REG)
9457 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9458 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9459 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9461 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9462 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9465 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9466 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9467 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9468 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9469 [(set_attr "type" "sselog")
9470 (set_attr "prefix_data16" "1")
9471 (set_attr "prefix_extra" "1")
9472 (set_attr "length_immediate" "1")
9473 (set_attr "memory" "none,load,none,load")
9474 (set_attr "prefix" "maybe_vex")
9475 (set_attr "btver2_decode" "vector,vector,vector,vector")
9476 (set_attr "mode" "TI")])
9478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9482 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9484 (define_code_iterator xop_plus [plus ss_plus])
9486 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
9487 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
9489 ;; XOP parallel integer multiply/add instructions.
9491 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
9492 [(set (match_operand:VI24_128 0 "register_operand" "=x")
9495 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
9496 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
9497 (match_operand:VI24_128 3 "register_operand" "x")))]
9499 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9500 [(set_attr "type" "ssemuladd")
9501 (set_attr "mode" "TI")])
9503 (define_insn "xop_p<macs>dql"
9504 [(set (match_operand:V2DI 0 "register_operand" "=x")
9509 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9510 (parallel [(const_int 0) (const_int 2)])))
9513 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9514 (parallel [(const_int 0) (const_int 2)]))))
9515 (match_operand:V2DI 3 "register_operand" "x")))]
9517 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9518 [(set_attr "type" "ssemuladd")
9519 (set_attr "mode" "TI")])
9521 (define_insn "xop_p<macs>dqh"
9522 [(set (match_operand:V2DI 0 "register_operand" "=x")
9527 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9528 (parallel [(const_int 1) (const_int 3)])))
9531 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9532 (parallel [(const_int 1) (const_int 3)]))))
9533 (match_operand:V2DI 3 "register_operand" "x")))]
9535 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9536 [(set_attr "type" "ssemuladd")
9537 (set_attr "mode" "TI")])
9539 ;; XOP parallel integer multiply/add instructions for the intrinisics
9540 (define_insn "xop_p<macs>wd"
9541 [(set (match_operand:V4SI 0 "register_operand" "=x")
9546 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9547 (parallel [(const_int 1) (const_int 3)
9548 (const_int 5) (const_int 7)])))
9551 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9552 (parallel [(const_int 1) (const_int 3)
9553 (const_int 5) (const_int 7)]))))
9554 (match_operand:V4SI 3 "register_operand" "x")))]
9556 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9557 [(set_attr "type" "ssemuladd")
9558 (set_attr "mode" "TI")])
9560 (define_insn "xop_p<madcs>wd"
9561 [(set (match_operand:V4SI 0 "register_operand" "=x")
9567 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9568 (parallel [(const_int 0) (const_int 2)
9569 (const_int 4) (const_int 6)])))
9572 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9573 (parallel [(const_int 0) (const_int 2)
9574 (const_int 4) (const_int 6)]))))
9579 (parallel [(const_int 1) (const_int 3)
9580 (const_int 5) (const_int 7)])))
9584 (parallel [(const_int 1) (const_int 3)
9585 (const_int 5) (const_int 7)])))))
9586 (match_operand:V4SI 3 "register_operand" "x")))]
9588 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9589 [(set_attr "type" "ssemuladd")
9590 (set_attr "mode" "TI")])
9592 ;; XOP parallel XMM conditional moves
9593 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
9594 [(set (match_operand:V 0 "register_operand" "=x,x")
9596 (match_operand:V 3 "nonimmediate_operand" "x,m")
9597 (match_operand:V 1 "register_operand" "x,x")
9598 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
9600 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9601 [(set_attr "type" "sse4arg")])
9603 ;; XOP horizontal add/subtract instructions
9604 (define_insn "xop_phadd<u>bw"
9605 [(set (match_operand:V8HI 0 "register_operand" "=x")
9609 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9610 (parallel [(const_int 0) (const_int 2)
9611 (const_int 4) (const_int 6)
9612 (const_int 8) (const_int 10)
9613 (const_int 12) (const_int 14)])))
9617 (parallel [(const_int 1) (const_int 3)
9618 (const_int 5) (const_int 7)
9619 (const_int 9) (const_int 11)
9620 (const_int 13) (const_int 15)])))))]
9622 "vphadd<u>bw\t{%1, %0|%0, %1}"
9623 [(set_attr "type" "sseiadd1")])
9625 (define_insn "xop_phadd<u>bd"
9626 [(set (match_operand:V4SI 0 "register_operand" "=x")
9631 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9632 (parallel [(const_int 0) (const_int 4)
9633 (const_int 8) (const_int 12)])))
9637 (parallel [(const_int 1) (const_int 5)
9638 (const_int 9) (const_int 13)]))))
9643 (parallel [(const_int 2) (const_int 6)
9644 (const_int 10) (const_int 14)])))
9648 (parallel [(const_int 3) (const_int 7)
9649 (const_int 11) (const_int 15)]))))))]
9651 "vphadd<u>bd\t{%1, %0|%0, %1}"
9652 [(set_attr "type" "sseiadd1")])
9654 (define_insn "xop_phadd<u>bq"
9655 [(set (match_operand:V2DI 0 "register_operand" "=x")
9661 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9662 (parallel [(const_int 0) (const_int 8)])))
9666 (parallel [(const_int 1) (const_int 9)]))))
9671 (parallel [(const_int 2) (const_int 10)])))
9675 (parallel [(const_int 3) (const_int 11)])))))
9681 (parallel [(const_int 4) (const_int 12)])))
9685 (parallel [(const_int 5) (const_int 13)]))))
9690 (parallel [(const_int 6) (const_int 14)])))
9694 (parallel [(const_int 7) (const_int 15)])))))))]
9696 "vphadd<u>bq\t{%1, %0|%0, %1}"
9697 [(set_attr "type" "sseiadd1")])
9699 (define_insn "xop_phadd<u>wd"
9700 [(set (match_operand:V4SI 0 "register_operand" "=x")
9704 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9705 (parallel [(const_int 0) (const_int 2)
9706 (const_int 4) (const_int 6)])))
9710 (parallel [(const_int 1) (const_int 3)
9711 (const_int 5) (const_int 7)])))))]
9713 "vphadd<u>wd\t{%1, %0|%0, %1}"
9714 [(set_attr "type" "sseiadd1")])
9716 (define_insn "xop_phadd<u>wq"
9717 [(set (match_operand:V2DI 0 "register_operand" "=x")
9722 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9723 (parallel [(const_int 0) (const_int 4)])))
9727 (parallel [(const_int 1) (const_int 5)]))))
9732 (parallel [(const_int 2) (const_int 6)])))
9736 (parallel [(const_int 3) (const_int 7)]))))))]
9738 "vphadd<u>wq\t{%1, %0|%0, %1}"
9739 [(set_attr "type" "sseiadd1")])
9741 (define_insn "xop_phadd<u>dq"
9742 [(set (match_operand:V2DI 0 "register_operand" "=x")
9746 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9747 (parallel [(const_int 0) (const_int 2)])))
9751 (parallel [(const_int 1) (const_int 3)])))))]
9753 "vphadd<u>dq\t{%1, %0|%0, %1}"
9754 [(set_attr "type" "sseiadd1")])
9756 (define_insn "xop_phsubbw"
9757 [(set (match_operand:V8HI 0 "register_operand" "=x")
9761 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9762 (parallel [(const_int 0) (const_int 2)
9763 (const_int 4) (const_int 6)
9764 (const_int 8) (const_int 10)
9765 (const_int 12) (const_int 14)])))
9769 (parallel [(const_int 1) (const_int 3)
9770 (const_int 5) (const_int 7)
9771 (const_int 9) (const_int 11)
9772 (const_int 13) (const_int 15)])))))]
9774 "vphsubbw\t{%1, %0|%0, %1}"
9775 [(set_attr "type" "sseiadd1")])
9777 (define_insn "xop_phsubwd"
9778 [(set (match_operand:V4SI 0 "register_operand" "=x")
9782 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9783 (parallel [(const_int 0) (const_int 2)
9784 (const_int 4) (const_int 6)])))
9788 (parallel [(const_int 1) (const_int 3)
9789 (const_int 5) (const_int 7)])))))]
9791 "vphsubwd\t{%1, %0|%0, %1}"
9792 [(set_attr "type" "sseiadd1")])
9794 (define_insn "xop_phsubdq"
9795 [(set (match_operand:V2DI 0 "register_operand" "=x")
9799 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9800 (parallel [(const_int 0) (const_int 2)])))
9804 (parallel [(const_int 1) (const_int 3)])))))]
9806 "vphsubdq\t{%1, %0|%0, %1}"
9807 [(set_attr "type" "sseiadd1")])
9809 ;; XOP permute instructions
9810 (define_insn "xop_pperm"
9811 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9813 [(match_operand:V16QI 1 "register_operand" "x,x")
9814 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9815 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9816 UNSPEC_XOP_PERMUTE))]
9817 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9818 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9819 [(set_attr "type" "sse4arg")
9820 (set_attr "mode" "TI")])
9822 ;; XOP pack instructions that combine two vectors into a smaller vector
9823 (define_insn "xop_pperm_pack_v2di_v4si"
9824 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9827 (match_operand:V2DI 1 "register_operand" "x,x"))
9829 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9830 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9831 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9832 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9833 [(set_attr "type" "sse4arg")
9834 (set_attr "mode" "TI")])
9836 (define_insn "xop_pperm_pack_v4si_v8hi"
9837 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9840 (match_operand:V4SI 1 "register_operand" "x,x"))
9842 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9843 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9844 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9845 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9846 [(set_attr "type" "sse4arg")
9847 (set_attr "mode" "TI")])
9849 (define_insn "xop_pperm_pack_v8hi_v16qi"
9850 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9853 (match_operand:V8HI 1 "register_operand" "x,x"))
9855 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9856 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9857 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9858 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9859 [(set_attr "type" "sse4arg")
9860 (set_attr "mode" "TI")])
9862 ;; XOP packed rotate instructions
9863 (define_expand "rotl<mode>3"
9864 [(set (match_operand:VI_128 0 "register_operand")
9866 (match_operand:VI_128 1 "nonimmediate_operand")
9867 (match_operand:SI 2 "general_operand")))]
9870 /* If we were given a scalar, convert it to parallel */
9871 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9873 rtvec vs = rtvec_alloc (<ssescalarnum>);
9874 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9875 rtx reg = gen_reg_rtx (<MODE>mode);
9876 rtx op2 = operands[2];
9879 if (GET_MODE (op2) != <ssescalarmode>mode)
9881 op2 = gen_reg_rtx (<ssescalarmode>mode);
9882 convert_move (op2, operands[2], false);
9885 for (i = 0; i < <ssescalarnum>; i++)
9886 RTVEC_ELT (vs, i) = op2;
9888 emit_insn (gen_vec_init<mode> (reg, par));
9889 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9894 (define_expand "rotr<mode>3"
9895 [(set (match_operand:VI_128 0 "register_operand")
9897 (match_operand:VI_128 1 "nonimmediate_operand")
9898 (match_operand:SI 2 "general_operand")))]
9901 /* If we were given a scalar, convert it to parallel */
9902 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9904 rtvec vs = rtvec_alloc (<ssescalarnum>);
9905 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9906 rtx neg = gen_reg_rtx (<MODE>mode);
9907 rtx reg = gen_reg_rtx (<MODE>mode);
9908 rtx op2 = operands[2];
9911 if (GET_MODE (op2) != <ssescalarmode>mode)
9913 op2 = gen_reg_rtx (<ssescalarmode>mode);
9914 convert_move (op2, operands[2], false);
9917 for (i = 0; i < <ssescalarnum>; i++)
9918 RTVEC_ELT (vs, i) = op2;
9920 emit_insn (gen_vec_init<mode> (reg, par));
9921 emit_insn (gen_neg<mode>2 (neg, reg));
9922 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9927 (define_insn "xop_rotl<mode>3"
9928 [(set (match_operand:VI_128 0 "register_operand" "=x")
9930 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9931 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9933 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9934 [(set_attr "type" "sseishft")
9935 (set_attr "length_immediate" "1")
9936 (set_attr "mode" "TI")])
9938 (define_insn "xop_rotr<mode>3"
9939 [(set (match_operand:VI_128 0 "register_operand" "=x")
9941 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9942 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9946 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
9947 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9949 [(set_attr "type" "sseishft")
9950 (set_attr "length_immediate" "1")
9951 (set_attr "mode" "TI")])
9953 (define_expand "vrotr<mode>3"
9954 [(match_operand:VI_128 0 "register_operand")
9955 (match_operand:VI_128 1 "register_operand")
9956 (match_operand:VI_128 2 "register_operand")]
9959 rtx reg = gen_reg_rtx (<MODE>mode);
9960 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9961 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9965 (define_expand "vrotl<mode>3"
9966 [(match_operand:VI_128 0 "register_operand")
9967 (match_operand:VI_128 1 "register_operand")
9968 (match_operand:VI_128 2 "register_operand")]
9971 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9975 (define_insn "xop_vrotl<mode>3"
9976 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9977 (if_then_else:VI_128
9979 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9982 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9986 (neg:VI_128 (match_dup 2)))))]
9987 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9988 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9989 [(set_attr "type" "sseishft")
9990 (set_attr "prefix_data16" "0")
9991 (set_attr "prefix_extra" "2")
9992 (set_attr "mode" "TI")])
9994 ;; XOP packed shift instructions.
9995 (define_expand "vlshr<mode>3"
9996 [(set (match_operand:VI12_128 0 "register_operand")
9998 (match_operand:VI12_128 1 "register_operand")
9999 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10002 rtx neg = gen_reg_rtx (<MODE>mode);
10003 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10004 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10008 (define_expand "vlshr<mode>3"
10009 [(set (match_operand:VI48_128 0 "register_operand")
10011 (match_operand:VI48_128 1 "register_operand")
10012 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10013 "TARGET_AVX2 || TARGET_XOP"
10017 rtx neg = gen_reg_rtx (<MODE>mode);
10018 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10019 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10024 (define_expand "vlshr<mode>3"
10025 [(set (match_operand:VI48_256 0 "register_operand")
10027 (match_operand:VI48_256 1 "register_operand")
10028 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10031 (define_expand "vashr<mode>3"
10032 [(set (match_operand:VI128_128 0 "register_operand")
10033 (ashiftrt:VI128_128
10034 (match_operand:VI128_128 1 "register_operand")
10035 (match_operand:VI128_128 2 "nonimmediate_operand")))]
10038 rtx neg = gen_reg_rtx (<MODE>mode);
10039 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10040 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
10044 (define_expand "vashrv4si3"
10045 [(set (match_operand:V4SI 0 "register_operand")
10046 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
10047 (match_operand:V4SI 2 "nonimmediate_operand")))]
10048 "TARGET_AVX2 || TARGET_XOP"
10052 rtx neg = gen_reg_rtx (V4SImode);
10053 emit_insn (gen_negv4si2 (neg, operands[2]));
10054 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
10059 (define_expand "vashrv8si3"
10060 [(set (match_operand:V8SI 0 "register_operand")
10061 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
10062 (match_operand:V8SI 2 "nonimmediate_operand")))]
10065 (define_expand "vashl<mode>3"
10066 [(set (match_operand:VI12_128 0 "register_operand")
10068 (match_operand:VI12_128 1 "register_operand")
10069 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10072 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10076 (define_expand "vashl<mode>3"
10077 [(set (match_operand:VI48_128 0 "register_operand")
10079 (match_operand:VI48_128 1 "register_operand")
10080 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10081 "TARGET_AVX2 || TARGET_XOP"
10085 operands[2] = force_reg (<MODE>mode, operands[2]);
10086 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10091 (define_expand "vashl<mode>3"
10092 [(set (match_operand:VI48_256 0 "register_operand")
10094 (match_operand:VI48_256 1 "register_operand")
10095 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10098 (define_insn "xop_sha<mode>3"
10099 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10100 (if_then_else:VI_128
10102 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10105 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10109 (neg:VI_128 (match_dup 2)))))]
10110 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10111 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10112 [(set_attr "type" "sseishft")
10113 (set_attr "prefix_data16" "0")
10114 (set_attr "prefix_extra" "2")
10115 (set_attr "mode" "TI")])
10117 (define_insn "xop_shl<mode>3"
10118 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10119 (if_then_else:VI_128
10121 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10124 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10128 (neg:VI_128 (match_dup 2)))))]
10129 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10130 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10131 [(set_attr "type" "sseishft")
10132 (set_attr "prefix_data16" "0")
10133 (set_attr "prefix_extra" "2")
10134 (set_attr "mode" "TI")])
10136 (define_expand "<shift_insn><mode>3"
10137 [(set (match_operand:VI1_AVX2 0 "register_operand")
10138 (any_shift:VI1_AVX2
10139 (match_operand:VI1_AVX2 1 "register_operand")
10140 (match_operand:SI 2 "nonmemory_operand")))]
10143 if (TARGET_XOP && <MODE>mode == V16QImode)
10145 bool negate = false;
10146 rtx (*gen) (rtx, rtx, rtx);
10150 if (<CODE> != ASHIFT)
10152 if (CONST_INT_P (operands[2]))
10153 operands[2] = GEN_INT (-INTVAL (operands[2]));
10157 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
10158 for (i = 0; i < 16; i++)
10159 XVECEXP (par, 0, i) = operands[2];
10161 tmp = gen_reg_rtx (V16QImode);
10162 emit_insn (gen_vec_initv16qi (tmp, par));
10165 emit_insn (gen_negv16qi2 (tmp, tmp));
10167 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
10168 emit_insn (gen (operands[0], operands[1], tmp));
10171 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
10175 (define_expand "ashrv2di3"
10176 [(set (match_operand:V2DI 0 "register_operand")
10178 (match_operand:V2DI 1 "register_operand")
10179 (match_operand:DI 2 "nonmemory_operand")))]
10182 rtx reg = gen_reg_rtx (V2DImode);
10184 bool negate = false;
10187 if (CONST_INT_P (operands[2]))
10188 operands[2] = GEN_INT (-INTVAL (operands[2]));
10192 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
10193 for (i = 0; i < 2; i++)
10194 XVECEXP (par, 0, i) = operands[2];
10196 emit_insn (gen_vec_initv2di (reg, par));
10199 emit_insn (gen_negv2di2 (reg, reg));
10201 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
10205 ;; XOP FRCZ support
10206 (define_insn "xop_frcz<mode>2"
10207 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
10209 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
10212 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
10213 [(set_attr "type" "ssecvt1")
10214 (set_attr "mode" "<MODE>")])
10217 (define_expand "xop_vmfrcz<mode>2"
10218 [(set (match_operand:VF_128 0 "register_operand")
10221 [(match_operand:VF_128 1 "nonimmediate_operand")]
10227 operands[3] = CONST0_RTX (<MODE>mode);
10230 (define_insn "*xop_vmfrcz_<mode>"
10231 [(set (match_operand:VF_128 0 "register_operand" "=x")
10234 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
10236 (match_operand:VF_128 2 "const0_operand")
10239 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
10240 [(set_attr "type" "ssecvt1")
10241 (set_attr "mode" "<MODE>")])
10243 (define_insn "xop_maskcmp<mode>3"
10244 [(set (match_operand:VI_128 0 "register_operand" "=x")
10245 (match_operator:VI_128 1 "ix86_comparison_int_operator"
10246 [(match_operand:VI_128 2 "register_operand" "x")
10247 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10249 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10250 [(set_attr "type" "sse4arg")
10251 (set_attr "prefix_data16" "0")
10252 (set_attr "prefix_rep" "0")
10253 (set_attr "prefix_extra" "2")
10254 (set_attr "length_immediate" "1")
10255 (set_attr "mode" "TI")])
10257 (define_insn "xop_maskcmp_uns<mode>3"
10258 [(set (match_operand:VI_128 0 "register_operand" "=x")
10259 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
10260 [(match_operand:VI_128 2 "register_operand" "x")
10261 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10263 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10264 [(set_attr "type" "ssecmp")
10265 (set_attr "prefix_data16" "0")
10266 (set_attr "prefix_rep" "0")
10267 (set_attr "prefix_extra" "2")
10268 (set_attr "length_immediate" "1")
10269 (set_attr "mode" "TI")])
10271 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
10272 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
10273 ;; the exact instruction generated for the intrinsic.
10274 (define_insn "xop_maskcmp_uns2<mode>3"
10275 [(set (match_operand:VI_128 0 "register_operand" "=x")
10277 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
10278 [(match_operand:VI_128 2 "register_operand" "x")
10279 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
10280 UNSPEC_XOP_UNSIGNED_CMP))]
10282 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10283 [(set_attr "type" "ssecmp")
10284 (set_attr "prefix_data16" "0")
10285 (set_attr "prefix_extra" "2")
10286 (set_attr "length_immediate" "1")
10287 (set_attr "mode" "TI")])
10289 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
10290 ;; being added here to be complete.
10291 (define_insn "xop_pcom_tf<mode>3"
10292 [(set (match_operand:VI_128 0 "register_operand" "=x")
10294 [(match_operand:VI_128 1 "register_operand" "x")
10295 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
10296 (match_operand:SI 3 "const_int_operand" "n")]
10297 UNSPEC_XOP_TRUEFALSE))]
10300 return ((INTVAL (operands[3]) != 0)
10301 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10302 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
10304 [(set_attr "type" "ssecmp")
10305 (set_attr "prefix_data16" "0")
10306 (set_attr "prefix_extra" "2")
10307 (set_attr "length_immediate" "1")
10308 (set_attr "mode" "TI")])
10310 (define_insn "xop_vpermil2<mode>3"
10311 [(set (match_operand:VF 0 "register_operand" "=x")
10313 [(match_operand:VF 1 "register_operand" "x")
10314 (match_operand:VF 2 "nonimmediate_operand" "%x")
10315 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
10316 (match_operand:SI 4 "const_0_to_3_operand" "n")]
10319 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
10320 [(set_attr "type" "sse4arg")
10321 (set_attr "length_immediate" "1")
10322 (set_attr "mode" "<MODE>")])
10324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10326 (define_insn "aesenc"
10327 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10328 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10329 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10333 aesenc\t{%2, %0|%0, %2}
10334 vaesenc\t{%2, %1, %0|%0, %1, %2}"
10335 [(set_attr "isa" "noavx,avx")
10336 (set_attr "type" "sselog1")
10337 (set_attr "prefix_extra" "1")
10338 (set_attr "prefix" "orig,vex")
10339 (set_attr "btver2_decode" "double,double")
10340 (set_attr "mode" "TI")])
10342 (define_insn "aesenclast"
10343 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10344 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10345 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10346 UNSPEC_AESENCLAST))]
10349 aesenclast\t{%2, %0|%0, %2}
10350 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
10351 [(set_attr "isa" "noavx,avx")
10352 (set_attr "type" "sselog1")
10353 (set_attr "prefix_extra" "1")
10354 (set_attr "prefix" "orig,vex")
10355 (set_attr "btver2_decode" "double,double")
10356 (set_attr "mode" "TI")])
10358 (define_insn "aesdec"
10359 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10360 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10361 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10365 aesdec\t{%2, %0|%0, %2}
10366 vaesdec\t{%2, %1, %0|%0, %1, %2}"
10367 [(set_attr "isa" "noavx,avx")
10368 (set_attr "type" "sselog1")
10369 (set_attr "prefix_extra" "1")
10370 (set_attr "prefix" "orig,vex")
10371 (set_attr "btver2_decode" "double,double")
10372 (set_attr "mode" "TI")])
10374 (define_insn "aesdeclast"
10375 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10376 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10377 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10378 UNSPEC_AESDECLAST))]
10381 aesdeclast\t{%2, %0|%0, %2}
10382 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
10383 [(set_attr "isa" "noavx,avx")
10384 (set_attr "type" "sselog1")
10385 (set_attr "prefix_extra" "1")
10386 (set_attr "prefix" "orig,vex")
10387 (set_attr "btver2_decode" "double,double")
10388 (set_attr "mode" "TI")])
10390 (define_insn "aesimc"
10391 [(set (match_operand:V2DI 0 "register_operand" "=x")
10392 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10395 "%vaesimc\t{%1, %0|%0, %1}"
10396 [(set_attr "type" "sselog1")
10397 (set_attr "prefix_extra" "1")
10398 (set_attr "prefix" "maybe_vex")
10399 (set_attr "mode" "TI")])
10401 (define_insn "aeskeygenassist"
10402 [(set (match_operand:V2DI 0 "register_operand" "=x")
10403 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
10404 (match_operand:SI 2 "const_0_to_255_operand" "n")]
10405 UNSPEC_AESKEYGENASSIST))]
10407 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
10408 [(set_attr "type" "sselog1")
10409 (set_attr "prefix_extra" "1")
10410 (set_attr "length_immediate" "1")
10411 (set_attr "prefix" "maybe_vex")
10412 (set_attr "mode" "TI")])
10414 (define_insn "pclmulqdq"
10415 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10416 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10417 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
10418 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10422 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
10423 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10424 [(set_attr "isa" "noavx,avx")
10425 (set_attr "type" "sselog1")
10426 (set_attr "prefix_extra" "1")
10427 (set_attr "length_immediate" "1")
10428 (set_attr "prefix" "orig,vex")
10429 (set_attr "mode" "TI")])
10431 (define_expand "avx_vzeroall"
10432 [(match_par_dup 0 [(const_int 0)])]
10435 int nregs = TARGET_64BIT ? 16 : 8;
10438 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
10440 XVECEXP (operands[0], 0, 0)
10441 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
10444 for (regno = 0; regno < nregs; regno++)
10445 XVECEXP (operands[0], 0, regno + 1)
10446 = gen_rtx_SET (VOIDmode,
10447 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
10448 CONST0_RTX (V8SImode));
10451 (define_insn "*avx_vzeroall"
10452 [(match_parallel 0 "vzeroall_operation"
10453 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
10456 [(set_attr "type" "sse")
10457 (set_attr "modrm" "0")
10458 (set_attr "memory" "none")
10459 (set_attr "prefix" "vex")
10460 (set_attr "btver2_decode" "vector")
10461 (set_attr "mode" "OI")])
10463 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
10464 ;; if the upper 128bits are unused.
10465 (define_insn "avx_vzeroupper"
10466 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
10469 [(set_attr "type" "sse")
10470 (set_attr "modrm" "0")
10471 (set_attr "memory" "none")
10472 (set_attr "prefix" "vex")
10473 (set_attr "btver2_decode" "vector")
10474 (set_attr "mode" "OI")])
10476 (define_mode_attr AVXTOSSEMODE
10477 [(V4DI "V2DI") (V2DI "V2DI")
10478 (V8SI "V4SI") (V4SI "V4SI")
10479 (V16HI "V8HI") (V8HI "V8HI")
10480 (V32QI "V16QI") (V16QI "V16QI")])
10482 (define_insn "avx2_pbroadcast<mode>"
10483 [(set (match_operand:VI 0 "register_operand" "=x")
10485 (vec_select:<ssescalarmode>
10486 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
10487 (parallel [(const_int 0)]))))]
10489 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
10490 [(set_attr "type" "ssemov")
10491 (set_attr "prefix_extra" "1")
10492 (set_attr "prefix" "vex")
10493 (set_attr "mode" "<sseinsnmode>")])
10495 (define_insn "avx2_pbroadcast<mode>_1"
10496 [(set (match_operand:VI_256 0 "register_operand" "=x")
10497 (vec_duplicate:VI_256
10498 (vec_select:<ssescalarmode>
10499 (match_operand:VI_256 1 "nonimmediate_operand" "xm")
10500 (parallel [(const_int 0)]))))]
10502 "vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
10503 [(set_attr "type" "ssemov")
10504 (set_attr "prefix_extra" "1")
10505 (set_attr "prefix" "vex")
10506 (set_attr "mode" "<sseinsnmode>")])
10508 (define_insn "avx2_permvar<mode>"
10509 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10511 [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm")
10512 (match_operand:V8SI 2 "register_operand" "x")]
10515 "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
10516 [(set_attr "type" "sselog")
10517 (set_attr "prefix" "vex")
10518 (set_attr "mode" "OI")])
10520 (define_expand "avx2_perm<mode>"
10521 [(match_operand:VI8F_256 0 "register_operand")
10522 (match_operand:VI8F_256 1 "nonimmediate_operand")
10523 (match_operand:SI 2 "const_0_to_255_operand")]
10526 int mask = INTVAL (operands[2]);
10527 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
10528 GEN_INT ((mask >> 0) & 3),
10529 GEN_INT ((mask >> 2) & 3),
10530 GEN_INT ((mask >> 4) & 3),
10531 GEN_INT ((mask >> 6) & 3)));
10535 (define_insn "avx2_perm<mode>_1"
10536 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10537 (vec_select:VI8F_256
10538 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm")
10539 (parallel [(match_operand 2 "const_0_to_3_operand")
10540 (match_operand 3 "const_0_to_3_operand")
10541 (match_operand 4 "const_0_to_3_operand")
10542 (match_operand 5 "const_0_to_3_operand")])))]
10546 mask |= INTVAL (operands[2]) << 0;
10547 mask |= INTVAL (operands[3]) << 2;
10548 mask |= INTVAL (operands[4]) << 4;
10549 mask |= INTVAL (operands[5]) << 6;
10550 operands[2] = GEN_INT (mask);
10551 return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10553 [(set_attr "type" "sselog")
10554 (set_attr "prefix" "vex")
10555 (set_attr "mode" "<sseinsnmode>")])
10557 (define_insn "avx2_permv2ti"
10558 [(set (match_operand:V4DI 0 "register_operand" "=x")
10560 [(match_operand:V4DI 1 "register_operand" "x")
10561 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
10562 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10565 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10566 [(set_attr "type" "sselog")
10567 (set_attr "prefix" "vex")
10568 (set_attr "mode" "OI")])
10570 (define_insn "avx2_vec_dupv4df"
10571 [(set (match_operand:V4DF 0 "register_operand" "=x")
10572 (vec_duplicate:V4DF
10574 (match_operand:V2DF 1 "register_operand" "x")
10575 (parallel [(const_int 0)]))))]
10577 "vbroadcastsd\t{%1, %0|%0, %1}"
10578 [(set_attr "type" "sselog1")
10579 (set_attr "prefix" "vex")
10580 (set_attr "mode" "V4DF")])
10582 ;; Modes handled by AVX vec_dup patterns.
10583 (define_mode_iterator AVX_VEC_DUP_MODE
10584 [V8SI V8SF V4DI V4DF])
10586 (define_insn "vec_dup<mode>"
10587 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
10588 (vec_duplicate:AVX_VEC_DUP_MODE
10589 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
10592 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
10593 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
10595 [(set_attr "type" "ssemov")
10596 (set_attr "prefix_extra" "1")
10597 (set_attr "prefix" "vex")
10598 (set_attr "isa" "*,avx2,noavx2")
10599 (set_attr "mode" "V8SF")])
10601 (define_insn "avx2_vbroadcasti128_<mode>"
10602 [(set (match_operand:VI_256 0 "register_operand" "=x")
10604 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
10607 "vbroadcasti128\t{%1, %0|%0, %1}"
10608 [(set_attr "type" "ssemov")
10609 (set_attr "prefix_extra" "1")
10610 (set_attr "prefix" "vex")
10611 (set_attr "mode" "OI")])
10614 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
10615 (vec_duplicate:AVX_VEC_DUP_MODE
10616 (match_operand:<ssescalarmode> 1 "register_operand")))]
10617 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
10618 [(set (match_dup 2)
10619 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
10621 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
10622 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
10624 (define_insn "avx_vbroadcastf128_<mode>"
10625 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
10627 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
10631 vbroadcast<i128>\t{%1, %0|%0, %1}
10632 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
10633 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
10634 [(set_attr "type" "ssemov,sselog1,sselog1")
10635 (set_attr "prefix_extra" "1")
10636 (set_attr "length_immediate" "0,1,1")
10637 (set_attr "prefix" "vex")
10638 (set_attr "mode" "<sseinsnmode>")])
10640 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
10641 ;; If it so happens that the input is in memory, use vbroadcast.
10642 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
10643 (define_insn "*avx_vperm_broadcast_v4sf"
10644 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
10646 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
10647 (match_parallel 2 "avx_vbroadcast_operand"
10648 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10651 int elt = INTVAL (operands[3]);
10652 switch (which_alternative)
10656 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
10657 return "vbroadcastss\t{%1, %0|%0, %1}";
10659 operands[2] = GEN_INT (elt * 0x55);
10660 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
10662 gcc_unreachable ();
10665 [(set_attr "type" "ssemov,ssemov,sselog1")
10666 (set_attr "prefix_extra" "1")
10667 (set_attr "length_immediate" "0,0,1")
10668 (set_attr "prefix" "vex")
10669 (set_attr "mode" "SF,SF,V4SF")])
10671 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
10672 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
10674 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
10675 (match_parallel 2 "avx_vbroadcast_operand"
10676 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10679 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
10680 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
10682 rtx op0 = operands[0], op1 = operands[1];
10683 int elt = INTVAL (operands[3]);
10689 if (TARGET_AVX2 && elt == 0)
10691 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
10696 /* Shuffle element we care about into all elements of the 128-bit lane.
10697 The other lane gets shuffled too, but we don't care. */
10698 if (<MODE>mode == V4DFmode)
10699 mask = (elt & 1 ? 15 : 0);
10701 mask = (elt & 3) * 0x55;
10702 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
10704 /* Shuffle the lane we care about into both lanes of the dest. */
10705 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
10706 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10710 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
10711 elt * GET_MODE_SIZE (<ssescalarmode>mode));
10714 (define_expand "avx_vpermil<mode>"
10715 [(set (match_operand:VF2 0 "register_operand")
10717 (match_operand:VF2 1 "nonimmediate_operand")
10718 (match_operand:SI 2 "const_0_to_255_operand")))]
10721 int mask = INTVAL (operands[2]);
10722 rtx perm[<ssescalarnum>];
10724 perm[0] = GEN_INT (mask & 1);
10725 perm[1] = GEN_INT ((mask >> 1) & 1);
10726 if (<MODE>mode == V4DFmode)
10728 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10729 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10733 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10736 (define_expand "avx_vpermil<mode>"
10737 [(set (match_operand:VF1 0 "register_operand")
10739 (match_operand:VF1 1 "nonimmediate_operand")
10740 (match_operand:SI 2 "const_0_to_255_operand")))]
10743 int mask = INTVAL (operands[2]);
10744 rtx perm[<ssescalarnum>];
10746 perm[0] = GEN_INT (mask & 3);
10747 perm[1] = GEN_INT ((mask >> 2) & 3);
10748 perm[2] = GEN_INT ((mask >> 4) & 3);
10749 perm[3] = GEN_INT ((mask >> 6) & 3);
10750 if (<MODE>mode == V8SFmode)
10752 perm[4] = GEN_INT ((mask & 3) + 4);
10753 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10754 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10755 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10759 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10762 (define_insn "*avx_vpermilp<mode>"
10763 [(set (match_operand:VF 0 "register_operand" "=x")
10765 (match_operand:VF 1 "nonimmediate_operand" "xm")
10766 (match_parallel 2 ""
10767 [(match_operand 3 "const_int_operand")])))]
10769 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
10771 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10772 operands[2] = GEN_INT (mask);
10773 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10775 [(set_attr "type" "sselog")
10776 (set_attr "prefix_extra" "1")
10777 (set_attr "length_immediate" "1")
10778 (set_attr "prefix" "vex")
10779 (set_attr "mode" "<MODE>")])
10781 (define_insn "avx_vpermilvar<mode>3"
10782 [(set (match_operand:VF 0 "register_operand" "=x")
10784 [(match_operand:VF 1 "register_operand" "x")
10785 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10788 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10789 [(set_attr "type" "sselog")
10790 (set_attr "prefix_extra" "1")
10791 (set_attr "prefix" "vex")
10792 (set_attr "btver2_decode" "vector")
10793 (set_attr "mode" "<MODE>")])
10795 (define_expand "avx_vperm2f128<mode>3"
10796 [(set (match_operand:AVX256MODE2P 0 "register_operand")
10797 (unspec:AVX256MODE2P
10798 [(match_operand:AVX256MODE2P 1 "register_operand")
10799 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
10800 (match_operand:SI 3 "const_0_to_255_operand")]
10801 UNSPEC_VPERMIL2F128))]
10804 int mask = INTVAL (operands[3]);
10805 if ((mask & 0x88) == 0)
10807 rtx perm[<ssescalarnum>], t1, t2;
10808 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10810 base = (mask & 3) * nelt2;
10811 for (i = 0; i < nelt2; ++i)
10812 perm[i] = GEN_INT (base + i);
10814 base = ((mask >> 4) & 3) * nelt2;
10815 for (i = 0; i < nelt2; ++i)
10816 perm[i + nelt2] = GEN_INT (base + i);
10818 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10819 operands[1], operands[2]);
10820 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10821 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10822 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10828 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10829 ;; means that in order to represent this properly in rtl we'd have to
10830 ;; nest *another* vec_concat with a zero operand and do the select from
10831 ;; a 4x wide vector. That doesn't seem very nice.
10832 (define_insn "*avx_vperm2f128<mode>_full"
10833 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10834 (unspec:AVX256MODE2P
10835 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10836 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10837 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10838 UNSPEC_VPERMIL2F128))]
10840 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10841 [(set_attr "type" "sselog")
10842 (set_attr "prefix_extra" "1")
10843 (set_attr "length_immediate" "1")
10844 (set_attr "prefix" "vex")
10845 (set_attr "mode" "<sseinsnmode>")])
10847 (define_insn "*avx_vperm2f128<mode>_nozero"
10848 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10849 (vec_select:AVX256MODE2P
10850 (vec_concat:<ssedoublevecmode>
10851 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10852 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10853 (match_parallel 3 ""
10854 [(match_operand 4 "const_int_operand")])))]
10856 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10858 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10860 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
10862 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
10863 operands[3] = GEN_INT (mask);
10864 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10866 [(set_attr "type" "sselog")
10867 (set_attr "prefix_extra" "1")
10868 (set_attr "length_immediate" "1")
10869 (set_attr "prefix" "vex")
10870 (set_attr "mode" "<sseinsnmode>")])
10872 (define_expand "avx_vinsertf128<mode>"
10873 [(match_operand:V_256 0 "register_operand")
10874 (match_operand:V_256 1 "register_operand")
10875 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
10876 (match_operand:SI 3 "const_0_to_1_operand")]
10879 rtx (*insn)(rtx, rtx, rtx);
10881 switch (INTVAL (operands[3]))
10884 insn = gen_vec_set_lo_<mode>;
10887 insn = gen_vec_set_hi_<mode>;
10890 gcc_unreachable ();
10893 emit_insn (insn (operands[0], operands[1], operands[2]));
10897 (define_insn "avx2_vec_set_lo_v4di"
10898 [(set (match_operand:V4DI 0 "register_operand" "=x")
10900 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
10902 (match_operand:V4DI 1 "register_operand" "x")
10903 (parallel [(const_int 2) (const_int 3)]))))]
10905 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10906 [(set_attr "type" "sselog")
10907 (set_attr "prefix_extra" "1")
10908 (set_attr "length_immediate" "1")
10909 (set_attr "prefix" "vex")
10910 (set_attr "mode" "OI")])
10912 (define_insn "avx2_vec_set_hi_v4di"
10913 [(set (match_operand:V4DI 0 "register_operand" "=x")
10916 (match_operand:V4DI 1 "register_operand" "x")
10917 (parallel [(const_int 0) (const_int 1)]))
10918 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
10920 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10921 [(set_attr "type" "sselog")
10922 (set_attr "prefix_extra" "1")
10923 (set_attr "length_immediate" "1")
10924 (set_attr "prefix" "vex")
10925 (set_attr "mode" "OI")])
10927 (define_insn "vec_set_lo_<mode>"
10928 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10929 (vec_concat:VI8F_256
10930 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10931 (vec_select:<ssehalfvecmode>
10932 (match_operand:VI8F_256 1 "register_operand" "x")
10933 (parallel [(const_int 2) (const_int 3)]))))]
10935 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10936 [(set_attr "type" "sselog")
10937 (set_attr "prefix_extra" "1")
10938 (set_attr "length_immediate" "1")
10939 (set_attr "prefix" "vex")
10940 (set_attr "mode" "<sseinsnmode>")])
10942 (define_insn "vec_set_hi_<mode>"
10943 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10944 (vec_concat:VI8F_256
10945 (vec_select:<ssehalfvecmode>
10946 (match_operand:VI8F_256 1 "register_operand" "x")
10947 (parallel [(const_int 0) (const_int 1)]))
10948 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10950 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10951 [(set_attr "type" "sselog")
10952 (set_attr "prefix_extra" "1")
10953 (set_attr "length_immediate" "1")
10954 (set_attr "prefix" "vex")
10955 (set_attr "mode" "<sseinsnmode>")])
10957 (define_insn "vec_set_lo_<mode>"
10958 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10959 (vec_concat:VI4F_256
10960 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10961 (vec_select:<ssehalfvecmode>
10962 (match_operand:VI4F_256 1 "register_operand" "x")
10963 (parallel [(const_int 4) (const_int 5)
10964 (const_int 6) (const_int 7)]))))]
10966 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10967 [(set_attr "type" "sselog")
10968 (set_attr "prefix_extra" "1")
10969 (set_attr "length_immediate" "1")
10970 (set_attr "prefix" "vex")
10971 (set_attr "mode" "<sseinsnmode>")])
10973 (define_insn "vec_set_hi_<mode>"
10974 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10975 (vec_concat:VI4F_256
10976 (vec_select:<ssehalfvecmode>
10977 (match_operand:VI4F_256 1 "register_operand" "x")
10978 (parallel [(const_int 0) (const_int 1)
10979 (const_int 2) (const_int 3)]))
10980 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10982 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10983 [(set_attr "type" "sselog")
10984 (set_attr "prefix_extra" "1")
10985 (set_attr "length_immediate" "1")
10986 (set_attr "prefix" "vex")
10987 (set_attr "mode" "<sseinsnmode>")])
10989 (define_insn "vec_set_lo_v16hi"
10990 [(set (match_operand:V16HI 0 "register_operand" "=x")
10992 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10994 (match_operand:V16HI 1 "register_operand" "x")
10995 (parallel [(const_int 8) (const_int 9)
10996 (const_int 10) (const_int 11)
10997 (const_int 12) (const_int 13)
10998 (const_int 14) (const_int 15)]))))]
11000 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11001 [(set_attr "type" "sselog")
11002 (set_attr "prefix_extra" "1")
11003 (set_attr "length_immediate" "1")
11004 (set_attr "prefix" "vex")
11005 (set_attr "mode" "OI")])
11007 (define_insn "vec_set_hi_v16hi"
11008 [(set (match_operand:V16HI 0 "register_operand" "=x")
11011 (match_operand:V16HI 1 "register_operand" "x")
11012 (parallel [(const_int 0) (const_int 1)
11013 (const_int 2) (const_int 3)
11014 (const_int 4) (const_int 5)
11015 (const_int 6) (const_int 7)]))
11016 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11018 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11019 [(set_attr "type" "sselog")
11020 (set_attr "prefix_extra" "1")
11021 (set_attr "length_immediate" "1")
11022 (set_attr "prefix" "vex")
11023 (set_attr "mode" "OI")])
11025 (define_insn "vec_set_lo_v32qi"
11026 [(set (match_operand:V32QI 0 "register_operand" "=x")
11028 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11030 (match_operand:V32QI 1 "register_operand" "x")
11031 (parallel [(const_int 16) (const_int 17)
11032 (const_int 18) (const_int 19)
11033 (const_int 20) (const_int 21)
11034 (const_int 22) (const_int 23)
11035 (const_int 24) (const_int 25)
11036 (const_int 26) (const_int 27)
11037 (const_int 28) (const_int 29)
11038 (const_int 30) (const_int 31)]))))]
11040 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11041 [(set_attr "type" "sselog")
11042 (set_attr "prefix_extra" "1")
11043 (set_attr "length_immediate" "1")
11044 (set_attr "prefix" "vex")
11045 (set_attr "mode" "OI")])
11047 (define_insn "vec_set_hi_v32qi"
11048 [(set (match_operand:V32QI 0 "register_operand" "=x")
11051 (match_operand:V32QI 1 "register_operand" "x")
11052 (parallel [(const_int 0) (const_int 1)
11053 (const_int 2) (const_int 3)
11054 (const_int 4) (const_int 5)
11055 (const_int 6) (const_int 7)
11056 (const_int 8) (const_int 9)
11057 (const_int 10) (const_int 11)
11058 (const_int 12) (const_int 13)
11059 (const_int 14) (const_int 15)]))
11060 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11062 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11063 [(set_attr "type" "sselog")
11064 (set_attr "prefix_extra" "1")
11065 (set_attr "length_immediate" "1")
11066 (set_attr "prefix" "vex")
11067 (set_attr "mode" "OI")])
11069 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
11070 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
11072 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
11073 (match_operand:V48_AVX2 1 "memory_operand" "m")]
11076 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11077 [(set_attr "type" "sselog1")
11078 (set_attr "prefix_extra" "1")
11079 (set_attr "prefix" "vex")
11080 (set_attr "btver2_decode" "vector")
11081 (set_attr "mode" "<sseinsnmode>")])
11083 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
11084 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
11086 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
11087 (match_operand:V48_AVX2 2 "register_operand" "x")
11091 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11092 [(set_attr "type" "sselog1")
11093 (set_attr "prefix_extra" "1")
11094 (set_attr "prefix" "vex")
11095 (set_attr "btver2_decode" "vector")
11096 (set_attr "mode" "<sseinsnmode>")])
11098 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
11099 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11100 (unspec:AVX256MODE2P
11101 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11105 "&& reload_completed"
11108 rtx op0 = operands[0];
11109 rtx op1 = operands[1];
11111 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
11113 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11114 emit_move_insn (op0, op1);
11118 (define_expand "vec_init<mode>"
11119 [(match_operand:V_256 0 "register_operand")
11123 ix86_expand_vector_init (false, operands[0], operands[1]);
11127 (define_expand "avx2_extracti128"
11128 [(match_operand:V2DI 0 "nonimmediate_operand")
11129 (match_operand:V4DI 1 "register_operand")
11130 (match_operand:SI 2 "const_0_to_1_operand")]
11133 rtx (*insn)(rtx, rtx);
11135 switch (INTVAL (operands[2]))
11138 insn = gen_vec_extract_lo_v4di;
11141 insn = gen_vec_extract_hi_v4di;
11144 gcc_unreachable ();
11147 emit_insn (insn (operands[0], operands[1]));
11151 (define_expand "avx2_inserti128"
11152 [(match_operand:V4DI 0 "register_operand")
11153 (match_operand:V4DI 1 "register_operand")
11154 (match_operand:V2DI 2 "nonimmediate_operand")
11155 (match_operand:SI 3 "const_0_to_1_operand")]
11158 rtx (*insn)(rtx, rtx, rtx);
11160 switch (INTVAL (operands[3]))
11163 insn = gen_avx2_vec_set_lo_v4di;
11166 insn = gen_avx2_vec_set_hi_v4di;
11169 gcc_unreachable ();
11172 emit_insn (insn (operands[0], operands[1], operands[2]));
11176 (define_insn "avx2_ashrv<mode>"
11177 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11179 (match_operand:VI4_AVX2 1 "register_operand" "x")
11180 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
11182 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
11183 [(set_attr "type" "sseishft")
11184 (set_attr "prefix" "vex")
11185 (set_attr "mode" "<sseinsnmode>")])
11187 (define_insn "avx2_<shift_insn>v<mode>"
11188 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
11189 (any_lshift:VI48_AVX2
11190 (match_operand:VI48_AVX2 1 "register_operand" "x")
11191 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
11193 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11194 [(set_attr "type" "sseishft")
11195 (set_attr "prefix" "vex")
11196 (set_attr "mode" "<sseinsnmode>")])
11198 (define_insn "avx_vec_concat<mode>"
11199 [(set (match_operand:V_256 0 "register_operand" "=x,x")
11201 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
11202 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
11205 switch (which_alternative)
11208 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11210 switch (get_attr_mode (insn))
11213 return "vmovaps\t{%1, %x0|%x0, %1}";
11215 return "vmovapd\t{%1, %x0|%x0, %1}";
11217 return "vmovdqa\t{%1, %x0|%x0, %1}";
11220 gcc_unreachable ();
11223 [(set_attr "type" "sselog,ssemov")
11224 (set_attr "prefix_extra" "1,*")
11225 (set_attr "length_immediate" "1,*")
11226 (set_attr "prefix" "vex")
11227 (set_attr "mode" "<sseinsnmode>")])
11229 (define_insn "vcvtph2ps"
11230 [(set (match_operand:V4SF 0 "register_operand" "=x")
11232 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11234 (parallel [(const_int 0) (const_int 1)
11235 (const_int 2) (const_int 3)])))]
11237 "vcvtph2ps\t{%1, %0|%0, %1}"
11238 [(set_attr "type" "ssecvt")
11239 (set_attr "prefix" "vex")
11240 (set_attr "mode" "V4SF")])
11242 (define_insn "*vcvtph2ps_load"
11243 [(set (match_operand:V4SF 0 "register_operand" "=x")
11244 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11245 UNSPEC_VCVTPH2PS))]
11247 "vcvtph2ps\t{%1, %0|%0, %1}"
11248 [(set_attr "type" "ssecvt")
11249 (set_attr "prefix" "vex")
11250 (set_attr "mode" "V8SF")])
11252 (define_insn "vcvtph2ps256"
11253 [(set (match_operand:V8SF 0 "register_operand" "=x")
11254 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11255 UNSPEC_VCVTPH2PS))]
11257 "vcvtph2ps\t{%1, %0|%0, %1}"
11258 [(set_attr "type" "ssecvt")
11259 (set_attr "prefix" "vex")
11260 (set_attr "btver2_decode" "double")
11261 (set_attr "mode" "V8SF")])
11263 (define_expand "vcvtps2ph"
11264 [(set (match_operand:V8HI 0 "register_operand")
11266 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
11267 (match_operand:SI 2 "const_0_to_255_operand")]
11271 "operands[3] = CONST0_RTX (V4HImode);")
11273 (define_insn "*vcvtps2ph"
11274 [(set (match_operand:V8HI 0 "register_operand" "=x")
11276 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11277 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11279 (match_operand:V4HI 3 "const0_operand")))]
11281 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11282 [(set_attr "type" "ssecvt")
11283 (set_attr "prefix" "vex")
11284 (set_attr "mode" "V4SF")])
11286 (define_insn "*vcvtps2ph_store"
11287 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11288 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11289 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11290 UNSPEC_VCVTPS2PH))]
11292 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11293 [(set_attr "type" "ssecvt")
11294 (set_attr "prefix" "vex")
11295 (set_attr "mode" "V4SF")])
11297 (define_insn "vcvtps2ph256"
11298 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
11299 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
11300 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11301 UNSPEC_VCVTPS2PH))]
11303 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11304 [(set_attr "type" "ssecvt")
11305 (set_attr "prefix" "vex")
11306 (set_attr "btver2_decode" "vector")
11307 (set_attr "mode" "V8SF")])
11309 ;; For gather* insn patterns
11310 (define_mode_iterator VEC_GATHER_MODE
11311 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
11312 (define_mode_attr VEC_GATHER_IDXSI
11313 [(V2DI "V4SI") (V2DF "V4SI")
11314 (V4DI "V4SI") (V4DF "V4SI")
11315 (V4SI "V4SI") (V4SF "V4SI")
11316 (V8SI "V8SI") (V8SF "V8SI")])
11317 (define_mode_attr VEC_GATHER_IDXDI
11318 [(V2DI "V2DI") (V2DF "V2DI")
11319 (V4DI "V4DI") (V4DF "V4DI")
11320 (V4SI "V2DI") (V4SF "V2DI")
11321 (V8SI "V4DI") (V8SF "V4DI")])
11322 (define_mode_attr VEC_GATHER_SRCDI
11323 [(V2DI "V2DI") (V2DF "V2DF")
11324 (V4DI "V4DI") (V4DF "V4DF")
11325 (V4SI "V4SI") (V4SF "V4SF")
11326 (V8SI "V4SI") (V8SF "V4SF")])
11328 (define_expand "avx2_gathersi<mode>"
11329 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11330 (unspec:VEC_GATHER_MODE
11331 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
11332 (mem:<ssescalarmode>
11334 [(match_operand 2 "vsib_address_operand")
11335 (match_operand:<VEC_GATHER_IDXSI>
11336 3 "register_operand")
11337 (match_operand:SI 5 "const1248_operand ")]))
11338 (mem:BLK (scratch))
11339 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
11341 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11345 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11346 operands[5]), UNSPEC_VSIBADDR);
11349 (define_insn "*avx2_gathersi<mode>"
11350 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11351 (unspec:VEC_GATHER_MODE
11352 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
11353 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11355 [(match_operand:P 3 "vsib_address_operand" "p")
11356 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
11357 (match_operand:SI 6 "const1248_operand" "n")]
11359 (mem:BLK (scratch))
11360 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
11362 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11364 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
11365 [(set_attr "type" "ssemov")
11366 (set_attr "prefix" "vex")
11367 (set_attr "mode" "<sseinsnmode>")])
11369 (define_insn "*avx2_gathersi<mode>_2"
11370 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11371 (unspec:VEC_GATHER_MODE
11373 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11375 [(match_operand:P 2 "vsib_address_operand" "p")
11376 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
11377 (match_operand:SI 5 "const1248_operand" "n")]
11379 (mem:BLK (scratch))
11380 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
11382 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11384 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
11385 [(set_attr "type" "ssemov")
11386 (set_attr "prefix" "vex")
11387 (set_attr "mode" "<sseinsnmode>")])
11389 (define_expand "avx2_gatherdi<mode>"
11390 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11391 (unspec:VEC_GATHER_MODE
11392 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
11393 (mem:<ssescalarmode>
11395 [(match_operand 2 "vsib_address_operand")
11396 (match_operand:<VEC_GATHER_IDXDI>
11397 3 "register_operand")
11398 (match_operand:SI 5 "const1248_operand ")]))
11399 (mem:BLK (scratch))
11400 (match_operand:<VEC_GATHER_SRCDI>
11401 4 "register_operand")]
11403 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11407 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11408 operands[5]), UNSPEC_VSIBADDR);
11411 (define_insn "*avx2_gatherdi<mode>"
11412 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11413 (unspec:VEC_GATHER_MODE
11414 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11415 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11417 [(match_operand:P 3 "vsib_address_operand" "p")
11418 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11419 (match_operand:SI 6 "const1248_operand" "n")]
11421 (mem:BLK (scratch))
11422 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11424 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11426 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
11427 [(set_attr "type" "ssemov")
11428 (set_attr "prefix" "vex")
11429 (set_attr "mode" "<sseinsnmode>")])
11431 (define_insn "*avx2_gatherdi<mode>_2"
11432 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11433 (unspec:VEC_GATHER_MODE
11435 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11437 [(match_operand:P 2 "vsib_address_operand" "p")
11438 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11439 (match_operand:SI 5 "const1248_operand" "n")]
11441 (mem:BLK (scratch))
11442 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11444 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11447 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
11448 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
11449 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
11451 [(set_attr "type" "ssemov")
11452 (set_attr "prefix" "vex")
11453 (set_attr "mode" "<sseinsnmode>")])
11455 (define_insn "*avx2_gatherdi<mode>_3"
11456 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11457 (vec_select:<VEC_GATHER_SRCDI>
11459 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11460 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11462 [(match_operand:P 3 "vsib_address_operand" "p")
11463 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11464 (match_operand:SI 6 "const1248_operand" "n")]
11466 (mem:BLK (scratch))
11467 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11469 (parallel [(const_int 0) (const_int 1)
11470 (const_int 2) (const_int 3)])))
11471 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11473 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
11474 [(set_attr "type" "ssemov")
11475 (set_attr "prefix" "vex")
11476 (set_attr "mode" "<sseinsnmode>")])
11478 (define_insn "*avx2_gatherdi<mode>_4"
11479 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11480 (vec_select:<VEC_GATHER_SRCDI>
11483 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11485 [(match_operand:P 2 "vsib_address_operand" "p")
11486 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11487 (match_operand:SI 5 "const1248_operand" "n")]
11489 (mem:BLK (scratch))
11490 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11492 (parallel [(const_int 0) (const_int 1)
11493 (const_int 2) (const_int 3)])))
11494 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11496 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
11497 [(set_attr "type" "ssemov")
11498 (set_attr "prefix" "vex")
11499 (set_attr "mode" "<sseinsnmode>")])