1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the insn mnemonic suffix
93 (define_mode_attr ssemodesuffix
94 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
95 (V8SI "ps") (V4DI "pd")])
96 (define_mode_attr ssescalarmodesuffix
97 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V4DF "sd")
100 ;; Mapping of the max integer size for xop rotate immediate constraint
101 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
103 ;; Mapping of vector modes back to the scalar modes
104 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
105 (V16QI "QI") (V8HI "HI")
106 (V4SI "SI") (V2DI "DI")])
108 ;; Mapping of vector modes to a vector mode of double size
109 (define_mode_attr ssedoublesizemode
110 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
111 (V8HI "V16HI") (V16QI "V32QI")
112 (V4DF "V8DF") (V8SF "V16SF")
113 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
115 ;; Number of scalar elements in each vector type
116 (define_mode_attr ssescalarnum
117 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
118 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
121 (define_mode_attr avxvecmode
122 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
123 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
124 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
125 (define_mode_attr avxvecpsmode
126 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
127 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
128 (define_mode_attr avxhalfvecmode
129 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
130 (V8SF "V4SF") (V4DF "V2DF")
131 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
132 (define_mode_attr avxscalarmode
133 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
134 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
135 (define_mode_attr avxcvtvecmode
136 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
137 (define_mode_attr avxpermvecmode
138 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
139 (define_mode_attr avxmodesuffixp
140 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
142 (define_mode_attr avxmodesuffix
143 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
144 (V8SI "256") (V8SF "256") (V4DF "256")])
146 ;; Mapping of immediate bits for blend instructions
147 (define_mode_attr blendbits
148 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
150 ;; Mapping of immediate bits for pinsr instructions
151 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
153 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
155 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
161 (define_expand "mov<mode>"
162 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
163 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
166 ix86_expand_vector_move (<MODE>mode, operands);
170 (define_insn "*avx_mov<mode>_internal"
171 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
172 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 return standard_sse_constant_opcode (insn, operands[1]);
183 switch (get_attr_mode (insn))
187 return "vmovaps\t{%1, %0|%0, %1}";
190 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
191 return "vmovaps\t{%1, %0|%0, %1}";
193 return "vmovapd\t{%1, %0|%0, %1}";
195 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
196 return "vmovaps\t{%1, %0|%0, %1}";
198 return "vmovdqa\t{%1, %0|%0, %1}";
204 [(set_attr "type" "sselog1,ssemov,ssemov")
205 (set_attr "prefix" "vex")
206 (set_attr "mode" "<avxvecmode>")])
208 ;; All of these patterns are enabled for SSE1 as well as SSE2.
209 ;; This is essential for maintaining stable calling conventions.
211 (define_expand "mov<mode>"
212 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
213 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
216 ix86_expand_vector_move (<MODE>mode, operands);
220 (define_insn "*mov<mode>_internal"
221 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
222 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
224 && (register_operand (operands[0], <MODE>mode)
225 || register_operand (operands[1], <MODE>mode))"
227 switch (which_alternative)
230 return standard_sse_constant_opcode (insn, operands[1]);
233 switch (get_attr_mode (insn))
236 return "movaps\t{%1, %0|%0, %1}";
238 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
239 return "movaps\t{%1, %0|%0, %1}";
241 return "movapd\t{%1, %0|%0, %1}";
243 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
244 return "movaps\t{%1, %0|%0, %1}";
246 return "movdqa\t{%1, %0|%0, %1}";
252 [(set_attr "type" "sselog1,ssemov,ssemov")
254 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
255 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
256 (and (eq_attr "alternative" "2")
257 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
259 (const_string "V4SF")
260 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
261 (const_string "V4SF")
262 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
263 (const_string "V2DF")
265 (const_string "TI")))])
267 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
268 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
269 ;; from memory, we'd prefer to load the memory directly into the %xmm
270 ;; register. To facilitate this happy circumstance, this pattern won't
271 ;; split until after register allocation. If the 64-bit value didn't
272 ;; come from memory, this is the best we can do. This is much better
273 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
276 (define_insn_and_split "movdi_to_sse"
278 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
279 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
280 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
281 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
283 "&& reload_completed"
286 if (register_operand (operands[1], DImode))
288 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
289 Assemble the 64-bit DImode value in an xmm register. */
290 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
291 gen_rtx_SUBREG (SImode, operands[1], 0)));
292 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
293 gen_rtx_SUBREG (SImode, operands[1], 4)));
294 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
297 else if (memory_operand (operands[1], DImode))
298 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
299 operands[1], const0_rtx));
305 [(set (match_operand:V4SF 0 "register_operand" "")
306 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
307 "TARGET_SSE && reload_completed"
310 (vec_duplicate:V4SF (match_dup 1))
314 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
315 operands[2] = CONST0_RTX (V4SFmode);
319 [(set (match_operand:V2DF 0 "register_operand" "")
320 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
321 "TARGET_SSE2 && reload_completed"
322 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
324 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
325 operands[2] = CONST0_RTX (DFmode);
328 (define_expand "push<mode>1"
329 [(match_operand:AVX256MODE 0 "register_operand" "")]
332 ix86_expand_push (<MODE>mode, operands[0]);
336 (define_expand "push<mode>1"
337 [(match_operand:SSEMODE16 0 "register_operand" "")]
340 ix86_expand_push (<MODE>mode, operands[0]);
344 (define_expand "movmisalign<mode>"
345 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
346 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
349 ix86_expand_vector_move_misalign (<MODE>mode, operands);
353 (define_expand "movmisalign<mode>"
354 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
355 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
358 ix86_expand_vector_move_misalign (<MODE>mode, operands);
362 (define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
363 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
365 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
367 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
368 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
369 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
370 [(set_attr "type" "ssemov")
371 (set_attr "movu" "1")
372 (set_attr "prefix" "vex")
373 (set_attr "mode" "<MODE>")])
375 (define_insn "sse2_movq128"
376 [(set (match_operand:V2DI 0 "register_operand" "=x")
379 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
380 (parallel [(const_int 0)]))
383 "%vmovq\t{%1, %0|%0, %1}"
384 [(set_attr "type" "ssemov")
385 (set_attr "prefix" "maybe_vex")
386 (set_attr "mode" "TI")])
388 (define_insn "<sse>_movu<ssemodesuffix>"
389 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
391 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
393 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
394 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
395 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
396 [(set_attr "type" "ssemov")
397 (set_attr "movu" "1")
398 (set_attr "mode" "<MODE>")])
400 (define_insn "avx_movdqu<avxmodesuffix>"
401 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
403 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
405 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
406 "vmovdqu\t{%1, %0|%0, %1}"
407 [(set_attr "type" "ssemov")
408 (set_attr "movu" "1")
409 (set_attr "prefix" "vex")
410 (set_attr "mode" "<avxvecmode>")])
412 (define_insn "sse2_movdqu"
413 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
414 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
416 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
417 "movdqu\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "movu" "1")
420 (set_attr "prefix_data16" "1")
421 (set_attr "mode" "TI")])
423 (define_insn "avx_movnt<mode>"
424 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
426 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
428 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
429 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
430 [(set_attr "type" "ssemov")
431 (set_attr "prefix" "vex")
432 (set_attr "mode" "<MODE>")])
434 (define_insn "<sse>_movnt<mode>"
435 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
437 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
439 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
440 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "mode" "<MODE>")])
444 (define_insn "avx_movnt<mode>"
445 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
447 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
450 "vmovntdq\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssecvt")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse2_movntv2di"
456 [(set (match_operand:V2DI 0 "memory_operand" "=m")
457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
460 "movntdq\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssemov")
462 (set_attr "prefix_data16" "1")
463 (set_attr "mode" "TI")])
465 (define_insn "sse2_movntsi"
466 [(set (match_operand:SI 0 "memory_operand" "=m")
467 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
470 "movnti\t{%1, %0|%0, %1}"
471 [(set_attr "type" "ssemov")
472 (set_attr "prefix_data16" "0")
473 (set_attr "mode" "V2DF")])
475 (define_insn "avx_lddqu<avxmodesuffix>"
476 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
478 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
481 "vlddqu\t{%1, %0|%0, %1}"
482 [(set_attr "type" "ssecvt")
483 (set_attr "movu" "1")
484 (set_attr "prefix" "vex")
485 (set_attr "mode" "<avxvecmode>")])
487 (define_insn "sse3_lddqu"
488 [(set (match_operand:V16QI 0 "register_operand" "=x")
489 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
492 "lddqu\t{%1, %0|%0, %1}"
493 [(set_attr "type" "ssemov")
494 (set_attr "movu" "1")
495 (set_attr "prefix_data16" "0")
496 (set_attr "prefix_rep" "1")
497 (set_attr "mode" "TI")])
499 ; Expand patterns for non-temporal stores. At the moment, only those
500 ; that directly map to insns are defined; it would be possible to
501 ; define patterns for other modes that would expand to several insns.
503 (define_expand "storent<mode>"
504 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
506 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
508 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
511 (define_expand "storent<mode>"
512 [(set (match_operand:MODEF 0 "memory_operand" "")
514 [(match_operand:MODEF 1 "register_operand" "")]
519 (define_expand "storentv2di"
520 [(set (match_operand:V2DI 0 "memory_operand" "")
521 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
526 (define_expand "storentsi"
527 [(set (match_operand:SI 0 "memory_operand" "")
528 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
533 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
535 ;; Parallel floating point arithmetic
537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
539 (define_expand "<code><mode>2"
540 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
542 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
543 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
544 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
546 (define_expand "<plusminus_insn><mode>3"
547 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
548 (plusminus:AVX256MODEF2P
549 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
550 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
551 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
552 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
554 (define_insn "*avx_<plusminus_insn><mode>3"
555 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
556 (plusminus:AVXMODEF2P
557 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
558 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
559 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
560 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
561 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
562 [(set_attr "type" "sseadd")
563 (set_attr "prefix" "vex")
564 (set_attr "mode" "<avxvecmode>")])
566 (define_expand "<plusminus_insn><mode>3"
567 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
568 (plusminus:SSEMODEF2P
569 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
570 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
571 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
574 (define_insn "*<plusminus_insn><mode>3"
575 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
576 (plusminus:SSEMODEF2P
577 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
579 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
580 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
581 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
582 [(set_attr "type" "sseadd")
583 (set_attr "mode" "<MODE>")])
585 (define_insn "*avx_vm<plusminus_insn><mode>3"
586 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
587 (vec_merge:SSEMODEF2P
588 (plusminus:SSEMODEF2P
589 (match_operand:SSEMODEF2P 1 "register_operand" "x")
590 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
593 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
594 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
595 [(set_attr "type" "sseadd")
596 (set_attr "prefix" "vex")
597 (set_attr "mode" "<ssescalarmode>")])
599 (define_insn "<sse>_vm<plusminus_insn><mode>3"
600 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
601 (vec_merge:SSEMODEF2P
602 (plusminus:SSEMODEF2P
603 (match_operand:SSEMODEF2P 1 "register_operand" "0")
604 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
607 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
608 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
609 [(set_attr "type" "sseadd")
610 (set_attr "mode" "<ssescalarmode>")])
612 (define_expand "mul<mode>3"
613 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
615 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
616 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
617 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
618 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
620 (define_insn "*avx_mul<mode>3"
621 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
623 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
624 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
625 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
626 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
627 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
628 [(set_attr "type" "ssemul")
629 (set_attr "prefix" "vex")
630 (set_attr "mode" "<avxvecmode>")])
632 (define_expand "mul<mode>3"
633 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
635 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
637 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
638 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
640 (define_insn "*mul<mode>3"
641 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
643 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
644 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
645 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
646 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
647 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
648 [(set_attr "type" "ssemul")
649 (set_attr "mode" "<MODE>")])
651 (define_insn "*avx_vmmul<mode>3"
652 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
653 (vec_merge:SSEMODEF2P
655 (match_operand:SSEMODEF2P 1 "register_operand" "x")
656 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
659 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
660 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
661 [(set_attr "type" "ssemul")
662 (set_attr "prefix" "vex")
663 (set_attr "mode" "<ssescalarmode>")])
665 (define_insn "<sse>_vmmul<mode>3"
666 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
667 (vec_merge:SSEMODEF2P
669 (match_operand:SSEMODEF2P 1 "register_operand" "0")
670 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
673 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
674 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
675 [(set_attr "type" "ssemul")
676 (set_attr "mode" "<ssescalarmode>")])
678 (define_expand "divv8sf3"
679 [(set (match_operand:V8SF 0 "register_operand" "")
680 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
681 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
684 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
686 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
687 && flag_finite_math_only && !flag_trapping_math
688 && flag_unsafe_math_optimizations)
690 ix86_emit_swdivsf (operands[0], operands[1],
691 operands[2], V8SFmode);
696 (define_expand "divv4df3"
697 [(set (match_operand:V4DF 0 "register_operand" "")
698 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
699 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
701 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
703 (define_insn "avx_div<mode>3"
704 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
706 (match_operand:AVXMODEF2P 1 "register_operand" "x")
707 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
708 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
710 [(set_attr "type" "ssediv")
711 (set_attr "prefix" "vex")
712 (set_attr "mode" "<MODE>")])
714 (define_expand "divv4sf3"
715 [(set (match_operand:V4SF 0 "register_operand" "")
716 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
717 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
720 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
721 && flag_finite_math_only && !flag_trapping_math
722 && flag_unsafe_math_optimizations)
724 ix86_emit_swdivsf (operands[0], operands[1],
725 operands[2], V4SFmode);
730 (define_expand "divv2df3"
731 [(set (match_operand:V2DF 0 "register_operand" "")
732 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
733 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
737 (define_insn "*avx_div<mode>3"
738 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
742 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
743 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
744 [(set_attr "type" "ssediv")
745 (set_attr "prefix" "vex")
746 (set_attr "mode" "<MODE>")])
748 (define_insn "<sse>_div<mode>3"
749 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
751 (match_operand:SSEMODEF2P 1 "register_operand" "0")
752 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
753 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
754 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
755 [(set_attr "type" "ssediv")
756 (set_attr "mode" "<MODE>")])
758 (define_insn "*avx_vmdiv<mode>3"
759 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
760 (vec_merge:SSEMODEF2P
762 (match_operand:SSEMODEF2P 1 "register_operand" "x")
763 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
766 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
767 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
768 [(set_attr "type" "ssediv")
769 (set_attr "prefix" "vex")
770 (set_attr "mode" "<ssescalarmode>")])
772 (define_insn "<sse>_vmdiv<mode>3"
773 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
774 (vec_merge:SSEMODEF2P
776 (match_operand:SSEMODEF2P 1 "register_operand" "0")
777 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
780 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
781 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
782 [(set_attr "type" "ssediv")
783 (set_attr "mode" "<ssescalarmode>")])
785 (define_insn "avx_rcpv8sf2"
786 [(set (match_operand:V8SF 0 "register_operand" "=x")
788 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
790 "vrcpps\t{%1, %0|%0, %1}"
791 [(set_attr "type" "sse")
792 (set_attr "prefix" "vex")
793 (set_attr "mode" "V8SF")])
795 (define_insn "sse_rcpv4sf2"
796 [(set (match_operand:V4SF 0 "register_operand" "=x")
798 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
800 "%vrcpps\t{%1, %0|%0, %1}"
801 [(set_attr "type" "sse")
802 (set_attr "atom_sse_attr" "rcp")
803 (set_attr "prefix" "maybe_vex")
804 (set_attr "mode" "V4SF")])
806 (define_insn "*avx_vmrcpv4sf2"
807 [(set (match_operand:V4SF 0 "register_operand" "=x")
809 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
811 (match_operand:V4SF 2 "register_operand" "x")
814 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
815 [(set_attr "type" "sse")
816 (set_attr "prefix" "vex")
817 (set_attr "mode" "SF")])
819 (define_insn "sse_vmrcpv4sf2"
820 [(set (match_operand:V4SF 0 "register_operand" "=x")
822 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
824 (match_operand:V4SF 2 "register_operand" "0")
827 "rcpss\t{%1, %0|%0, %1}"
828 [(set_attr "type" "sse")
829 (set_attr "atom_sse_attr" "rcp")
830 (set_attr "mode" "SF")])
832 (define_expand "sqrtv8sf2"
833 [(set (match_operand:V8SF 0 "register_operand" "")
834 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
837 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
838 && flag_finite_math_only && !flag_trapping_math
839 && flag_unsafe_math_optimizations)
841 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
846 (define_insn "avx_sqrtv8sf2"
847 [(set (match_operand:V8SF 0 "register_operand" "=x")
848 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
850 "vsqrtps\t{%1, %0|%0, %1}"
851 [(set_attr "type" "sse")
852 (set_attr "prefix" "vex")
853 (set_attr "mode" "V8SF")])
855 (define_expand "sqrtv4sf2"
856 [(set (match_operand:V4SF 0 "register_operand" "")
857 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
860 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
861 && flag_finite_math_only && !flag_trapping_math
862 && flag_unsafe_math_optimizations)
864 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
869 (define_insn "sse_sqrtv4sf2"
870 [(set (match_operand:V4SF 0 "register_operand" "=x")
871 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
873 "%vsqrtps\t{%1, %0|%0, %1}"
874 [(set_attr "type" "sse")
875 (set_attr "atom_sse_attr" "sqrt")
876 (set_attr "prefix" "maybe_vex")
877 (set_attr "mode" "V4SF")])
879 (define_insn "sqrtv4df2"
880 [(set (match_operand:V4DF 0 "register_operand" "=x")
881 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
883 "vsqrtpd\t{%1, %0|%0, %1}"
884 [(set_attr "type" "sse")
885 (set_attr "prefix" "vex")
886 (set_attr "mode" "V4DF")])
888 (define_insn "sqrtv2df2"
889 [(set (match_operand:V2DF 0 "register_operand" "=x")
890 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
892 "%vsqrtpd\t{%1, %0|%0, %1}"
893 [(set_attr "type" "sse")
894 (set_attr "prefix" "maybe_vex")
895 (set_attr "mode" "V2DF")])
897 (define_insn "*avx_vmsqrt<mode>2"
898 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
899 (vec_merge:SSEMODEF2P
901 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
902 (match_operand:SSEMODEF2P 2 "register_operand" "x")
904 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
905 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
906 [(set_attr "type" "sse")
907 (set_attr "prefix" "vex")
908 (set_attr "mode" "<ssescalarmode>")])
910 (define_insn "<sse>_vmsqrt<mode>2"
911 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
912 (vec_merge:SSEMODEF2P
914 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
915 (match_operand:SSEMODEF2P 2 "register_operand" "0")
917 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
918 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
919 [(set_attr "type" "sse")
920 (set_attr "atom_sse_attr" "sqrt")
921 (set_attr "mode" "<ssescalarmode>")])
923 (define_expand "rsqrtv8sf2"
924 [(set (match_operand:V8SF 0 "register_operand" "")
926 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
927 "TARGET_AVX && TARGET_SSE_MATH"
929 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
933 (define_insn "avx_rsqrtv8sf2"
934 [(set (match_operand:V8SF 0 "register_operand" "=x")
936 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
938 "vrsqrtps\t{%1, %0|%0, %1}"
939 [(set_attr "type" "sse")
940 (set_attr "prefix" "vex")
941 (set_attr "mode" "V8SF")])
943 (define_expand "rsqrtv4sf2"
944 [(set (match_operand:V4SF 0 "register_operand" "")
946 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
949 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
953 (define_insn "sse_rsqrtv4sf2"
954 [(set (match_operand:V4SF 0 "register_operand" "=x")
956 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
958 "%vrsqrtps\t{%1, %0|%0, %1}"
959 [(set_attr "type" "sse")
960 (set_attr "prefix" "maybe_vex")
961 (set_attr "mode" "V4SF")])
963 (define_insn "*avx_vmrsqrtv4sf2"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
966 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
968 (match_operand:V4SF 2 "register_operand" "x")
971 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
972 [(set_attr "type" "sse")
973 (set_attr "prefix" "vex")
974 (set_attr "mode" "SF")])
976 (define_insn "sse_vmrsqrtv4sf2"
977 [(set (match_operand:V4SF 0 "register_operand" "=x")
979 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
981 (match_operand:V4SF 2 "register_operand" "0")
984 "rsqrtss\t{%1, %0|%0, %1}"
985 [(set_attr "type" "sse")
986 (set_attr "mode" "SF")])
988 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
989 ;; isn't really correct, as those rtl operators aren't defined when
990 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
992 (define_expand "<code><mode>3"
993 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
994 (smaxmin:AVX256MODEF2P
995 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
996 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
997 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
999 if (!flag_finite_math_only)
1000 operands[1] = force_reg (<MODE>mode, operands[1]);
1001 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1004 (define_expand "<code><mode>3"
1005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1007 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1008 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1009 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1011 if (!flag_finite_math_only)
1012 operands[1] = force_reg (<MODE>mode, operands[1]);
1013 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1016 (define_insn "*avx_<code><mode>3_finite"
1017 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1019 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1020 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1021 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1022 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1023 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1024 [(set_attr "type" "sseadd")
1025 (set_attr "prefix" "vex")
1026 (set_attr "mode" "<MODE>")])
1028 (define_insn "*<code><mode>3_finite"
1029 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1031 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1032 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1033 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1034 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1035 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1036 [(set_attr "type" "sseadd")
1037 (set_attr "mode" "<MODE>")])
1039 (define_insn "*avx_<code><mode>3"
1040 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1042 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1043 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1044 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1045 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1046 [(set_attr "type" "sseadd")
1047 (set_attr "prefix" "vex")
1048 (set_attr "mode" "<avxvecmode>")])
1050 (define_insn "*<code><mode>3"
1051 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1053 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1054 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1055 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1056 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1057 [(set_attr "type" "sseadd")
1058 (set_attr "mode" "<MODE>")])
1060 (define_insn "*avx_vm<code><mode>3"
1061 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1062 (vec_merge:SSEMODEF2P
1064 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1065 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1068 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1069 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1070 [(set_attr "type" "sse")
1071 (set_attr "prefix" "vex")
1072 (set_attr "mode" "<ssescalarmode>")])
1074 (define_insn "<sse>_vm<code><mode>3"
1075 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1076 (vec_merge:SSEMODEF2P
1078 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1079 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1082 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1083 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1084 [(set_attr "type" "sseadd")
1085 (set_attr "mode" "<ssescalarmode>")])
1087 ;; These versions of the min/max patterns implement exactly the operations
1088 ;; min = (op1 < op2 ? op1 : op2)
1089 ;; max = (!(op1 < op2) ? op1 : op2)
1090 ;; Their operands are not commutative, and thus they may be used in the
1091 ;; presence of -0.0 and NaN.
1093 (define_insn "*avx_ieee_smin<mode>3"
1094 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1096 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1097 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1099 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1100 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1101 [(set_attr "type" "sseadd")
1102 (set_attr "prefix" "vex")
1103 (set_attr "mode" "<avxvecmode>")])
1105 (define_insn "*avx_ieee_smax<mode>3"
1106 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1108 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1109 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1111 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1112 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1113 [(set_attr "type" "sseadd")
1114 (set_attr "prefix" "vex")
1115 (set_attr "mode" "<avxvecmode>")])
1117 (define_insn "*ieee_smin<mode>3"
1118 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1120 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1121 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1123 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1124 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1125 [(set_attr "type" "sseadd")
1126 (set_attr "mode" "<MODE>")])
1128 (define_insn "*ieee_smax<mode>3"
1129 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1131 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1132 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1134 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1135 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1136 [(set_attr "type" "sseadd")
1137 (set_attr "mode" "<MODE>")])
1139 (define_insn "avx_addsubv8sf3"
1140 [(set (match_operand:V8SF 0 "register_operand" "=x")
1143 (match_operand:V8SF 1 "register_operand" "x")
1144 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1145 (minus:V8SF (match_dup 1) (match_dup 2))
1148 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1149 [(set_attr "type" "sseadd")
1150 (set_attr "prefix" "vex")
1151 (set_attr "mode" "V8SF")])
1153 (define_insn "avx_addsubv4df3"
1154 [(set (match_operand:V4DF 0 "register_operand" "=x")
1157 (match_operand:V4DF 1 "register_operand" "x")
1158 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1159 (minus:V4DF (match_dup 1) (match_dup 2))
1162 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1163 [(set_attr "type" "sseadd")
1164 (set_attr "prefix" "vex")
1165 (set_attr "mode" "V4DF")])
1167 (define_insn "*avx_addsubv4sf3"
1168 [(set (match_operand:V4SF 0 "register_operand" "=x")
1171 (match_operand:V4SF 1 "register_operand" "x")
1172 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1173 (minus:V4SF (match_dup 1) (match_dup 2))
1176 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "prefix" "vex")
1179 (set_attr "mode" "V4SF")])
1181 (define_insn "sse3_addsubv4sf3"
1182 [(set (match_operand:V4SF 0 "register_operand" "=x")
1185 (match_operand:V4SF 1 "register_operand" "0")
1186 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1187 (minus:V4SF (match_dup 1) (match_dup 2))
1190 "addsubps\t{%2, %0|%0, %2}"
1191 [(set_attr "type" "sseadd")
1192 (set_attr "prefix_rep" "1")
1193 (set_attr "mode" "V4SF")])
1195 (define_insn "*avx_addsubv2df3"
1196 [(set (match_operand:V2DF 0 "register_operand" "=x")
1199 (match_operand:V2DF 1 "register_operand" "x")
1200 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1201 (minus:V2DF (match_dup 1) (match_dup 2))
1204 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1205 [(set_attr "type" "sseadd")
1206 (set_attr "prefix" "vex")
1207 (set_attr "mode" "V2DF")])
1209 (define_insn "sse3_addsubv2df3"
1210 [(set (match_operand:V2DF 0 "register_operand" "=x")
1213 (match_operand:V2DF 1 "register_operand" "0")
1214 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1215 (minus:V2DF (match_dup 1) (match_dup 2))
1218 "addsubpd\t{%2, %0|%0, %2}"
1219 [(set_attr "type" "sseadd")
1220 (set_attr "atom_unit" "complex")
1221 (set_attr "mode" "V2DF")])
1223 (define_insn "avx_h<plusminus_insn>v4df3"
1224 [(set (match_operand:V4DF 0 "register_operand" "=x")
1229 (match_operand:V4DF 1 "register_operand" "x")
1230 (parallel [(const_int 0)]))
1231 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1233 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1234 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1238 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1239 (parallel [(const_int 0)]))
1240 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1242 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1243 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1245 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1246 [(set_attr "type" "sseadd")
1247 (set_attr "prefix" "vex")
1248 (set_attr "mode" "V4DF")])
1250 (define_insn "avx_h<plusminus_insn>v8sf3"
1251 [(set (match_operand:V8SF 0 "register_operand" "=x")
1257 (match_operand:V8SF 1 "register_operand" "x")
1258 (parallel [(const_int 0)]))
1259 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1261 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1262 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1266 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1267 (parallel [(const_int 0)]))
1268 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1270 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1278 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1279 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1282 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1283 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1285 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1286 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1288 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1289 [(set_attr "type" "sseadd")
1290 (set_attr "prefix" "vex")
1291 (set_attr "mode" "V8SF")])
1293 (define_insn "*avx_h<plusminus_insn>v4sf3"
1294 [(set (match_operand:V4SF 0 "register_operand" "=x")
1299 (match_operand:V4SF 1 "register_operand" "x")
1300 (parallel [(const_int 0)]))
1301 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1303 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1304 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1308 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1309 (parallel [(const_int 0)]))
1310 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1312 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1313 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1315 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1316 [(set_attr "type" "sseadd")
1317 (set_attr "prefix" "vex")
1318 (set_attr "mode" "V4SF")])
1320 (define_insn "sse3_h<plusminus_insn>v4sf3"
1321 [(set (match_operand:V4SF 0 "register_operand" "=x")
1326 (match_operand:V4SF 1 "register_operand" "0")
1327 (parallel [(const_int 0)]))
1328 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1330 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1331 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1335 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1336 (parallel [(const_int 0)]))
1337 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1339 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1340 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1342 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1343 [(set_attr "type" "sseadd")
1344 (set_attr "atom_unit" "complex")
1345 (set_attr "prefix_rep" "1")
1346 (set_attr "mode" "V4SF")])
1348 (define_insn "*avx_h<plusminus_insn>v2df3"
1349 [(set (match_operand:V2DF 0 "register_operand" "=x")
1353 (match_operand:V2DF 1 "register_operand" "x")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1358 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1359 (parallel [(const_int 0)]))
1360 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1362 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1363 [(set_attr "type" "sseadd")
1364 (set_attr "prefix" "vex")
1365 (set_attr "mode" "V2DF")])
1367 (define_insn "sse3_h<plusminus_insn>v2df3"
1368 [(set (match_operand:V2DF 0 "register_operand" "=x")
1372 (match_operand:V2DF 1 "register_operand" "0")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1377 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1378 (parallel [(const_int 0)]))
1379 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1381 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1382 [(set_attr "type" "sseadd")
1383 (set_attr "mode" "V2DF")])
1385 (define_expand "reduc_splus_v4sf"
1386 [(match_operand:V4SF 0 "register_operand" "")
1387 (match_operand:V4SF 1 "register_operand" "")]
1392 rtx tmp = gen_reg_rtx (V4SFmode);
1393 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1394 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1397 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1401 (define_expand "reduc_splus_v2df"
1402 [(match_operand:V2DF 0 "register_operand" "")
1403 (match_operand:V2DF 1 "register_operand" "")]
1406 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1410 (define_expand "reduc_smax_v4sf"
1411 [(match_operand:V4SF 0 "register_operand" "")
1412 (match_operand:V4SF 1 "register_operand" "")]
1415 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1419 (define_expand "reduc_smin_v4sf"
1420 [(match_operand:V4SF 0 "register_operand" "")
1421 (match_operand:V4SF 1 "register_operand" "")]
1424 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1430 ;; Parallel floating point comparisons
1432 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1434 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1435 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1437 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1438 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1439 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1442 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1443 [(set_attr "type" "ssecmp")
1444 (set_attr "length_immediate" "1")
1445 (set_attr "prefix" "vex")
1446 (set_attr "mode" "<MODE>")])
1448 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1449 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1450 (vec_merge:SSEMODEF2P
1452 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1453 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1454 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1459 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1460 [(set_attr "type" "ssecmp")
1461 (set_attr "length_immediate" "1")
1462 (set_attr "prefix" "vex")
1463 (set_attr "mode" "<ssescalarmode>")])
1465 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1466 ;; may generate 256bit vector compare instructions.
1467 (define_insn "*avx_maskcmp<mode>3"
1468 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1469 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1470 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1471 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1472 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1473 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1474 [(set_attr "type" "ssecmp")
1475 (set_attr "prefix" "vex")
1476 (set_attr "length_immediate" "1")
1477 (set_attr "mode" "<avxvecmode>")])
1479 (define_insn "<sse>_maskcmp<mode>3"
1480 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1481 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1482 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1483 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1485 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1486 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1487 [(set_attr "type" "ssecmp")
1488 (set_attr "length_immediate" "1")
1489 (set_attr "mode" "<MODE>")])
1491 (define_insn "*avx_vmmaskcmp<mode>3"
1492 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1493 (vec_merge:SSEMODEF2P
1494 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1495 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1496 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1499 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1500 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1501 [(set_attr "type" "ssecmp")
1502 (set_attr "prefix" "vex")
1503 (set_attr "mode" "<ssescalarmode>")])
1505 (define_insn "<sse>_vmmaskcmp<mode>3"
1506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1507 (vec_merge:SSEMODEF2P
1508 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1509 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1510 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1513 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1514 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1515 [(set_attr "type" "ssecmp")
1516 (set_attr "length_immediate" "1")
1517 (set_attr "mode" "<ssescalarmode>")])
1519 (define_insn "<sse>_comi"
1520 [(set (reg:CCFP FLAGS_REG)
1523 (match_operand:<ssevecmode> 0 "register_operand" "x")
1524 (parallel [(const_int 0)]))
1526 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1527 (parallel [(const_int 0)]))))]
1528 "SSE_FLOAT_MODE_P (<MODE>mode)"
1529 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1530 [(set_attr "type" "ssecomi")
1531 (set_attr "prefix" "maybe_vex")
1532 (set_attr "prefix_rep" "0")
1533 (set (attr "prefix_data16")
1534 (if_then_else (eq_attr "mode" "DF")
1536 (const_string "0")))
1537 (set_attr "mode" "<MODE>")])
1539 (define_insn "<sse>_ucomi"
1540 [(set (reg:CCFPU FLAGS_REG)
1543 (match_operand:<ssevecmode> 0 "register_operand" "x")
1544 (parallel [(const_int 0)]))
1546 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1547 (parallel [(const_int 0)]))))]
1548 "SSE_FLOAT_MODE_P (<MODE>mode)"
1549 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1550 [(set_attr "type" "ssecomi")
1551 (set_attr "prefix" "maybe_vex")
1552 (set_attr "prefix_rep" "0")
1553 (set (attr "prefix_data16")
1554 (if_then_else (eq_attr "mode" "DF")
1556 (const_string "0")))
1557 (set_attr "mode" "<MODE>")])
1559 (define_expand "vcond<mode>"
1560 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1561 (if_then_else:AVXMODEF2P
1562 (match_operator 3 ""
1563 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1564 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1565 (match_operand:AVXMODEF2P 1 "general_operand" "")
1566 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1567 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1568 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1570 bool ok = ix86_expand_fp_vcond (operands);
1575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1577 ;; Parallel floating point logical operations
1579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1581 (define_insn "avx_andnot<mode>3"
1582 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1585 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1586 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1587 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1588 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1589 [(set_attr "type" "sselog")
1590 (set_attr "prefix" "vex")
1591 (set_attr "mode" "<avxvecmode>")])
1593 (define_insn "<sse>_andnot<mode>3"
1594 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1597 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1598 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1599 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1600 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1601 [(set_attr "type" "sselog")
1602 (set_attr "mode" "<MODE>")])
1604 (define_expand "<code><mode>3"
1605 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1606 (any_logic:AVX256MODEF2P
1607 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1608 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1609 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1610 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1612 (define_insn "*avx_<code><mode>3"
1613 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1614 (any_logic:AVXMODEF2P
1615 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1616 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1617 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1618 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1620 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1621 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1623 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1625 [(set_attr "type" "sselog")
1626 (set_attr "prefix" "vex")
1627 (set_attr "mode" "<avxvecmode>")])
1629 (define_expand "<code><mode>3"
1630 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1631 (any_logic:SSEMODEF2P
1632 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1633 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1634 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1635 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1637 (define_insn "*<code><mode>3"
1638 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1639 (any_logic:SSEMODEF2P
1640 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1641 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1642 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1643 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1645 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1646 return "<logic>ps\t{%2, %0|%0, %2}";
1648 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1650 [(set_attr "type" "sselog")
1651 (set_attr "mode" "<MODE>")])
1653 (define_expand "copysign<mode>3"
1656 (not:SSEMODEF2P (match_dup 3))
1657 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1659 (and:SSEMODEF2P (match_dup 3)
1660 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1661 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1662 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1663 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1665 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1667 operands[4] = gen_reg_rtx (<MODE>mode);
1668 operands[5] = gen_reg_rtx (<MODE>mode);
1671 ;; Also define scalar versions. These are used for abs, neg, and
1672 ;; conditional move. Using subregs into vector modes causes register
1673 ;; allocation lossage. These patterns do not allow memory operands
1674 ;; because the native instructions read the full 128-bits.
1676 (define_insn "*avx_andnot<mode>3"
1677 [(set (match_operand:MODEF 0 "register_operand" "=x")
1680 (match_operand:MODEF 1 "register_operand" "x"))
1681 (match_operand:MODEF 2 "register_operand" "x")))]
1682 "AVX_FLOAT_MODE_P (<MODE>mode)"
1683 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1684 [(set_attr "type" "sselog")
1685 (set_attr "prefix" "vex")
1686 (set_attr "mode" "<ssevecmode>")])
1688 (define_insn "*andnot<mode>3"
1689 [(set (match_operand:MODEF 0 "register_operand" "=x")
1692 (match_operand:MODEF 1 "register_operand" "0"))
1693 (match_operand:MODEF 2 "register_operand" "x")))]
1694 "SSE_FLOAT_MODE_P (<MODE>mode)"
1695 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "sselog")
1697 (set_attr "mode" "<ssevecmode>")])
1699 (define_insn "*avx_<code><mode>3"
1700 [(set (match_operand:MODEF 0 "register_operand" "=x")
1702 (match_operand:MODEF 1 "register_operand" "x")
1703 (match_operand:MODEF 2 "register_operand" "x")))]
1704 "AVX_FLOAT_MODE_P (<MODE>mode)"
1706 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1707 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1709 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1711 [(set_attr "type" "sselog")
1712 (set_attr "prefix" "vex")
1713 (set_attr "mode" "<ssevecmode>")])
1715 (define_insn "*<code><mode>3"
1716 [(set (match_operand:MODEF 0 "register_operand" "=x")
1718 (match_operand:MODEF 1 "register_operand" "0")
1719 (match_operand:MODEF 2 "register_operand" "x")))]
1720 "SSE_FLOAT_MODE_P (<MODE>mode)"
1722 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1723 return "<logic>ps\t{%2, %0|%0, %2}";
1725 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1727 [(set_attr "type" "sselog")
1728 (set_attr "mode" "<ssevecmode>")])
1730 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1732 ;; FMA4 floating point multiply/accumulate instructions. This
1733 ;; includes the scalar version of the instructions as well as the
1736 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1738 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1739 ;; combine to generate a multiply/add with two memory references. We then
1740 ;; split this insn, into loading up the destination register with one of the
1741 ;; memory operations. If we don't manage to split the insn, reload will
1742 ;; generate the appropriate moves. The reason this is needed, is that combine
1743 ;; has already folded one of the memory references into both the multiply and
1744 ;; add insns, and it can't generate a new pseudo. I.e.:
1745 ;; (set (reg1) (mem (addr1)))
1746 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1747 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1749 (define_insn "fma4_fmadd<mode>4256"
1750 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1753 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1754 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1755 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1756 "TARGET_FMA4 && TARGET_FUSED_MADD"
1757 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1758 [(set_attr "type" "ssemuladd")
1759 (set_attr "mode" "<MODE>")])
1761 ;; Floating multiply and subtract.
1762 (define_insn "fma4_fmsub<mode>4256"
1763 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1766 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1767 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1768 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1769 "TARGET_FMA4 && TARGET_FUSED_MADD"
1770 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 ;; Floating point negative multiply and add.
1775 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1776 (define_insn "fma4_fnmadd<mode>4256"
1777 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1779 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1781 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1782 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1783 "TARGET_FMA4 && TARGET_FUSED_MADD"
1784 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1785 [(set_attr "type" "ssemuladd")
1786 (set_attr "mode" "<MODE>")])
1788 ;; Floating point negative multiply and subtract.
1789 (define_insn "fma4_fnmsub<mode>4256"
1790 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1794 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1795 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1796 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1797 "TARGET_FMA4 && TARGET_FUSED_MADD"
1798 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1799 [(set_attr "type" "ssemuladd")
1800 (set_attr "mode" "<MODE>")])
1802 (define_insn "fma4_fmadd<mode>4"
1803 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1806 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1807 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1808 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1809 "TARGET_FMA4 && TARGET_FUSED_MADD"
1810 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1811 [(set_attr "type" "ssemuladd")
1812 (set_attr "mode" "<MODE>")])
1814 ;; For the scalar operations, use operand1 for the upper words that aren't
1815 ;; modified, so restrict the forms that are generated.
1816 ;; Scalar version of fmadd.
1817 (define_insn "fma4_vmfmadd<mode>4"
1818 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1819 (vec_merge:SSEMODEF2P
1822 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1823 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1824 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1827 "TARGET_FMA4 && TARGET_FUSED_MADD"
1828 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1829 [(set_attr "type" "ssemuladd")
1830 (set_attr "mode" "<MODE>")])
1832 ;; Floating multiply and subtract.
1833 ;; Allow two memory operands the same as fmadd.
1834 (define_insn "fma4_fmsub<mode>4"
1835 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1838 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1839 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1840 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1841 "TARGET_FMA4 && TARGET_FUSED_MADD"
1842 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1843 [(set_attr "type" "ssemuladd")
1844 (set_attr "mode" "<MODE>")])
1846 ;; For the scalar operations, use operand1 for the upper words that aren't
1847 ;; modified, so restrict the forms that are generated.
1848 ;; Scalar version of fmsub.
1849 (define_insn "fma4_vmfmsub<mode>4"
1850 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1851 (vec_merge:SSEMODEF2P
1854 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1855 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1856 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1859 "TARGET_FMA4 && TARGET_FUSED_MADD"
1860 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1861 [(set_attr "type" "ssemuladd")
1862 (set_attr "mode" "<MODE>")])
1864 ;; Floating point negative multiply and add.
1865 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1866 (define_insn "fma4_fnmadd<mode>4"
1867 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1869 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1871 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1872 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1873 "TARGET_FMA4 && TARGET_FUSED_MADD"
1874 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1875 [(set_attr "type" "ssemuladd")
1876 (set_attr "mode" "<MODE>")])
1878 ;; For the scalar operations, use operand1 for the upper words that aren't
1879 ;; modified, so restrict the forms that are generated.
1880 ;; Scalar version of fnmadd.
1881 (define_insn "fma4_vmfnmadd<mode>4"
1882 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1883 (vec_merge:SSEMODEF2P
1885 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1887 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1888 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1891 "TARGET_FMA4 && TARGET_FUSED_MADD"
1892 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1893 [(set_attr "type" "ssemuladd")
1894 (set_attr "mode" "<MODE>")])
1896 ;; Floating point negative multiply and subtract.
1897 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1898 (define_insn "fma4_fnmsub<mode>4"
1899 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1903 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1904 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1905 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1906 "TARGET_FMA4 && TARGET_FUSED_MADD"
1907 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1908 [(set_attr "type" "ssemuladd")
1909 (set_attr "mode" "<MODE>")])
1911 ;; For the scalar operations, use operand1 for the upper words that aren't
1912 ;; modified, so restrict the forms that are generated.
1913 ;; Scalar version of fnmsub.
1914 (define_insn "fma4_vmfnmsub<mode>4"
1915 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1916 (vec_merge:SSEMODEF2P
1920 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1921 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1922 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1925 "TARGET_FMA4 && TARGET_FUSED_MADD"
1926 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1927 [(set_attr "type" "ssemuladd")
1928 (set_attr "mode" "<MODE>")])
1930 (define_insn "fma4i_fmadd<mode>4256"
1931 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1935 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1936 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1937 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1938 UNSPEC_FMA4_INTRINSIC))]
1940 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1941 [(set_attr "type" "ssemuladd")
1942 (set_attr "mode" "<MODE>")])
1944 (define_insn "fma4i_fmsub<mode>4256"
1945 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1949 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1950 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1951 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1952 UNSPEC_FMA4_INTRINSIC))]
1954 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1955 [(set_attr "type" "ssemuladd")
1956 (set_attr "mode" "<MODE>")])
1958 (define_insn "fma4i_fnmadd<mode>4256"
1959 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1962 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1964 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1965 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1966 UNSPEC_FMA4_INTRINSIC))]
1968 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1969 [(set_attr "type" "ssemuladd")
1970 (set_attr "mode" "<MODE>")])
1972 (define_insn "fma4i_fnmsub<mode>4256"
1973 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1978 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1979 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1980 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1981 UNSPEC_FMA4_INTRINSIC))]
1983 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1984 [(set_attr "type" "ssemuladd")
1985 (set_attr "mode" "<MODE>")])
1987 (define_insn "fma4i_fmadd<mode>4"
1988 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1992 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1993 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1994 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1995 UNSPEC_FMA4_INTRINSIC))]
1997 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1998 [(set_attr "type" "ssemuladd")
1999 (set_attr "mode" "<MODE>")])
2001 (define_insn "fma4i_fmsub<mode>4"
2002 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2006 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2007 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2008 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2009 UNSPEC_FMA4_INTRINSIC))]
2011 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2012 [(set_attr "type" "ssemuladd")
2013 (set_attr "mode" "<MODE>")])
2015 (define_insn "fma4i_fnmadd<mode>4"
2016 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2019 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2021 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2023 UNSPEC_FMA4_INTRINSIC))]
2025 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2026 [(set_attr "type" "ssemuladd")
2027 (set_attr "mode" "<MODE>")])
2029 (define_insn "fma4i_fnmsub<mode>4"
2030 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2035 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2036 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2037 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2038 UNSPEC_FMA4_INTRINSIC))]
2040 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2041 [(set_attr "type" "ssemuladd")
2042 (set_attr "mode" "<MODE>")])
2044 ;; For the scalar operations, use operand1 for the upper words that aren't
2045 ;; modified, so restrict the forms that are accepted.
2046 (define_insn "fma4i_vmfmadd<mode>4"
2047 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2049 [(vec_merge:SSEMODEF2P
2052 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2053 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2054 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2057 UNSPEC_FMA4_INTRINSIC))]
2059 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2060 [(set_attr "type" "ssemuladd")
2061 (set_attr "mode" "<ssescalarmode>")])
2063 (define_insn "fma4i_vmfmsub<mode>4"
2064 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2066 [(vec_merge:SSEMODEF2P
2069 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2070 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2071 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2074 UNSPEC_FMA4_INTRINSIC))]
2076 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2077 [(set_attr "type" "ssemuladd")
2078 (set_attr "mode" "<ssescalarmode>")])
2080 (define_insn "fma4i_vmfnmadd<mode>4"
2081 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2083 [(vec_merge:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2087 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2088 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2091 UNSPEC_FMA4_INTRINSIC))]
2093 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2094 [(set_attr "type" "ssemuladd")
2095 (set_attr "mode" "<ssescalarmode>")])
2097 (define_insn "fma4i_vmfnmsub<mode>4"
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2100 [(vec_merge:SSEMODEF2P
2104 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2105 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2106 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2109 UNSPEC_FMA4_INTRINSIC))]
2111 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2112 [(set_attr "type" "ssemuladd")
2113 (set_attr "mode" "<ssescalarmode>")])
2115 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2117 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2119 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2121 (define_insn "fma4_fmaddsubv8sf4"
2122 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2126 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2127 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2128 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2135 "TARGET_FMA4 && TARGET_FUSED_MADD"
2136 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2137 [(set_attr "type" "ssemuladd")
2138 (set_attr "mode" "V8SF")])
2140 (define_insn "fma4_fmaddsubv4df4"
2141 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2145 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2146 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2147 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2154 "TARGET_FMA4 && TARGET_FUSED_MADD"
2155 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2156 [(set_attr "type" "ssemuladd")
2157 (set_attr "mode" "V4DF")])
2159 (define_insn "fma4_fmaddsubv4sf4"
2160 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2164 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2165 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2166 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2173 "TARGET_FMA4 && TARGET_FUSED_MADD"
2174 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2175 [(set_attr "type" "ssemuladd")
2176 (set_attr "mode" "V4SF")])
2178 (define_insn "fma4_fmaddsubv2df4"
2179 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2183 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2184 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2185 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2192 "TARGET_FMA4 && TARGET_FUSED_MADD"
2193 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2194 [(set_attr "type" "ssemuladd")
2195 (set_attr "mode" "V2DF")])
2197 (define_insn "fma4_fmsubaddv8sf4"
2198 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2202 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2203 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2204 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2211 "TARGET_FMA4 && TARGET_FUSED_MADD"
2212 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2213 [(set_attr "type" "ssemuladd")
2214 (set_attr "mode" "V8SF")])
2216 (define_insn "fma4_fmsubaddv4df4"
2217 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2221 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2222 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2223 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2230 "TARGET_FMA4 && TARGET_FUSED_MADD"
2231 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2232 [(set_attr "type" "ssemuladd")
2233 (set_attr "mode" "V4DF")])
2235 (define_insn "fma4_fmsubaddv4sf4"
2236 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2240 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2241 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2242 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2249 "TARGET_FMA4 && TARGET_FUSED_MADD"
2250 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2251 [(set_attr "type" "ssemuladd")
2252 (set_attr "mode" "V4SF")])
2254 (define_insn "fma4_fmsubaddv2df4"
2255 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2259 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2260 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2261 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2268 "TARGET_FMA4 && TARGET_FUSED_MADD"
2269 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2270 [(set_attr "type" "ssemuladd")
2271 (set_attr "mode" "V2DF")])
2273 (define_insn "fma4i_fmaddsubv8sf4"
2274 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2279 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2280 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2281 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2288 UNSPEC_FMA4_INTRINSIC))]
2290 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2291 [(set_attr "type" "ssemuladd")
2292 (set_attr "mode" "V8SF")])
2294 (define_insn "fma4i_fmaddsubv4df4"
2295 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2300 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2301 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2302 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2309 UNSPEC_FMA4_INTRINSIC))]
2311 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2312 [(set_attr "type" "ssemuladd")
2313 (set_attr "mode" "V4DF")])
2315 (define_insn "fma4i_fmaddsubv4sf4"
2316 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2321 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2322 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2323 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2330 UNSPEC_FMA4_INTRINSIC))]
2332 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2333 [(set_attr "type" "ssemuladd")
2334 (set_attr "mode" "V4SF")])
2336 (define_insn "fma4i_fmaddsubv2df4"
2337 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2342 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2343 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2344 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2351 UNSPEC_FMA4_INTRINSIC))]
2353 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2354 [(set_attr "type" "ssemuladd")
2355 (set_attr "mode" "V2DF")])
2357 (define_insn "fma4i_fmsubaddv8sf4"
2358 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2363 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2364 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2365 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2372 UNSPEC_FMA4_INTRINSIC))]
2374 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2375 [(set_attr "type" "ssemuladd")
2376 (set_attr "mode" "V8SF")])
2378 (define_insn "fma4i_fmsubaddv4df4"
2379 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2384 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2385 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2386 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2393 UNSPEC_FMA4_INTRINSIC))]
2395 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2396 [(set_attr "type" "ssemuladd")
2397 (set_attr "mode" "V4DF")])
2399 (define_insn "fma4i_fmsubaddv4sf4"
2400 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2405 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2406 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2407 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2414 UNSPEC_FMA4_INTRINSIC))]
2416 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2417 [(set_attr "type" "ssemuladd")
2418 (set_attr "mode" "V4SF")])
2420 (define_insn "fma4i_fmsubaddv2df4"
2421 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2426 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2427 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2428 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2435 UNSPEC_FMA4_INTRINSIC))]
2437 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2438 [(set_attr "type" "ssemuladd")
2439 (set_attr "mode" "V2DF")])
2441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2443 ;; Parallel single-precision floating point conversion operations
2445 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2447 (define_insn "sse_cvtpi2ps"
2448 [(set (match_operand:V4SF 0 "register_operand" "=x")
2451 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2452 (match_operand:V4SF 1 "register_operand" "0")
2455 "cvtpi2ps\t{%2, %0|%0, %2}"
2456 [(set_attr "type" "ssecvt")
2457 (set_attr "mode" "V4SF")])
2459 (define_insn "sse_cvtps2pi"
2460 [(set (match_operand:V2SI 0 "register_operand" "=y")
2462 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2464 (parallel [(const_int 0) (const_int 1)])))]
2466 "cvtps2pi\t{%1, %0|%0, %1}"
2467 [(set_attr "type" "ssecvt")
2468 (set_attr "unit" "mmx")
2469 (set_attr "mode" "DI")])
2471 (define_insn "sse_cvttps2pi"
2472 [(set (match_operand:V2SI 0 "register_operand" "=y")
2474 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2475 (parallel [(const_int 0) (const_int 1)])))]
2477 "cvttps2pi\t{%1, %0|%0, %1}"
2478 [(set_attr "type" "ssecvt")
2479 (set_attr "unit" "mmx")
2480 (set_attr "prefix_rep" "0")
2481 (set_attr "mode" "SF")])
2483 (define_insn "*avx_cvtsi2ss"
2484 [(set (match_operand:V4SF 0 "register_operand" "=x")
2487 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2488 (match_operand:V4SF 1 "register_operand" "x")
2491 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2492 [(set_attr "type" "sseicvt")
2493 (set_attr "prefix" "vex")
2494 (set_attr "mode" "SF")])
2496 (define_insn "sse_cvtsi2ss"
2497 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2500 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2501 (match_operand:V4SF 1 "register_operand" "0,0")
2504 "cvtsi2ss\t{%2, %0|%0, %2}"
2505 [(set_attr "type" "sseicvt")
2506 (set_attr "athlon_decode" "vector,double")
2507 (set_attr "amdfam10_decode" "vector,double")
2508 (set_attr "mode" "SF")])
2510 (define_insn "*avx_cvtsi2ssq"
2511 [(set (match_operand:V4SF 0 "register_operand" "=x")
2514 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2515 (match_operand:V4SF 1 "register_operand" "x")
2517 "TARGET_AVX && TARGET_64BIT"
2518 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2519 [(set_attr "type" "sseicvt")
2520 (set_attr "length_vex" "4")
2521 (set_attr "prefix" "vex")
2522 (set_attr "mode" "SF")])
2524 (define_insn "sse_cvtsi2ssq"
2525 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2528 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2529 (match_operand:V4SF 1 "register_operand" "0,0")
2531 "TARGET_SSE && TARGET_64BIT"
2532 "cvtsi2ssq\t{%2, %0|%0, %2}"
2533 [(set_attr "type" "sseicvt")
2534 (set_attr "prefix_rex" "1")
2535 (set_attr "athlon_decode" "vector,double")
2536 (set_attr "amdfam10_decode" "vector,double")
2537 (set_attr "mode" "SF")])
2539 (define_insn "sse_cvtss2si"
2540 [(set (match_operand:SI 0 "register_operand" "=r,r")
2543 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2544 (parallel [(const_int 0)]))]
2545 UNSPEC_FIX_NOTRUNC))]
2547 "%vcvtss2si\t{%1, %0|%0, %1}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "athlon_decode" "double,vector")
2550 (set_attr "prefix_rep" "1")
2551 (set_attr "prefix" "maybe_vex")
2552 (set_attr "mode" "SI")])
2554 (define_insn "sse_cvtss2si_2"
2555 [(set (match_operand:SI 0 "register_operand" "=r,r")
2556 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2557 UNSPEC_FIX_NOTRUNC))]
2559 "%vcvtss2si\t{%1, %0|%0, %1}"
2560 [(set_attr "type" "sseicvt")
2561 (set_attr "athlon_decode" "double,vector")
2562 (set_attr "amdfam10_decode" "double,double")
2563 (set_attr "prefix_rep" "1")
2564 (set_attr "prefix" "maybe_vex")
2565 (set_attr "mode" "SI")])
2567 (define_insn "sse_cvtss2siq"
2568 [(set (match_operand:DI 0 "register_operand" "=r,r")
2571 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2572 (parallel [(const_int 0)]))]
2573 UNSPEC_FIX_NOTRUNC))]
2574 "TARGET_SSE && TARGET_64BIT"
2575 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2576 [(set_attr "type" "sseicvt")
2577 (set_attr "athlon_decode" "double,vector")
2578 (set_attr "prefix_rep" "1")
2579 (set_attr "prefix" "maybe_vex")
2580 (set_attr "mode" "DI")])
2582 (define_insn "sse_cvtss2siq_2"
2583 [(set (match_operand:DI 0 "register_operand" "=r,r")
2584 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2585 UNSPEC_FIX_NOTRUNC))]
2586 "TARGET_SSE && TARGET_64BIT"
2587 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2588 [(set_attr "type" "sseicvt")
2589 (set_attr "athlon_decode" "double,vector")
2590 (set_attr "amdfam10_decode" "double,double")
2591 (set_attr "prefix_rep" "1")
2592 (set_attr "prefix" "maybe_vex")
2593 (set_attr "mode" "DI")])
2595 (define_insn "sse_cvttss2si"
2596 [(set (match_operand:SI 0 "register_operand" "=r,r")
2599 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2600 (parallel [(const_int 0)]))))]
2602 "%vcvttss2si\t{%1, %0|%0, %1}"
2603 [(set_attr "type" "sseicvt")
2604 (set_attr "athlon_decode" "double,vector")
2605 (set_attr "amdfam10_decode" "double,double")
2606 (set_attr "prefix_rep" "1")
2607 (set_attr "prefix" "maybe_vex")
2608 (set_attr "mode" "SI")])
2610 (define_insn "sse_cvttss2siq"
2611 [(set (match_operand:DI 0 "register_operand" "=r,r")
2614 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2615 (parallel [(const_int 0)]))))]
2616 "TARGET_SSE && TARGET_64BIT"
2617 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2618 [(set_attr "type" "sseicvt")
2619 (set_attr "athlon_decode" "double,vector")
2620 (set_attr "amdfam10_decode" "double,double")
2621 (set_attr "prefix_rep" "1")
2622 (set_attr "prefix" "maybe_vex")
2623 (set_attr "mode" "DI")])
2625 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2626 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2627 (float:AVXMODEDCVTDQ2PS
2628 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2630 "vcvtdq2ps\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "ssecvt")
2632 (set_attr "prefix" "vex")
2633 (set_attr "mode" "<avxvecmode>")])
2635 (define_insn "sse2_cvtdq2ps"
2636 [(set (match_operand:V4SF 0 "register_operand" "=x")
2637 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2639 "cvtdq2ps\t{%1, %0|%0, %1}"
2640 [(set_attr "type" "ssecvt")
2641 (set_attr "mode" "V4SF")])
2643 (define_expand "sse2_cvtudq2ps"
2645 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2647 (lt:V4SF (match_dup 5) (match_dup 3)))
2649 (and:V4SF (match_dup 6) (match_dup 4)))
2650 (set (match_operand:V4SF 0 "register_operand" "")
2651 (plus:V4SF (match_dup 5) (match_dup 7)))]
2654 REAL_VALUE_TYPE TWO32r;
2658 real_ldexp (&TWO32r, &dconst1, 32);
2659 x = const_double_from_real_value (TWO32r, SFmode);
2661 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2662 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2664 for (i = 5; i < 8; i++)
2665 operands[i] = gen_reg_rtx (V4SFmode);
2668 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2669 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2670 (unspec:AVXMODEDCVTPS2DQ
2671 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2672 UNSPEC_FIX_NOTRUNC))]
2674 "vcvtps2dq\t{%1, %0|%0, %1}"
2675 [(set_attr "type" "ssecvt")
2676 (set_attr "prefix" "vex")
2677 (set_attr "mode" "<avxvecmode>")])
2679 (define_insn "sse2_cvtps2dq"
2680 [(set (match_operand:V4SI 0 "register_operand" "=x")
2681 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2682 UNSPEC_FIX_NOTRUNC))]
2684 "cvtps2dq\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix_data16" "1")
2687 (set_attr "mode" "TI")])
2689 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2690 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2691 (fix:AVXMODEDCVTPS2DQ
2692 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2694 "vcvttps2dq\t{%1, %0|%0, %1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "prefix" "vex")
2697 (set_attr "mode" "<avxvecmode>")])
2699 (define_insn "sse2_cvttps2dq"
2700 [(set (match_operand:V4SI 0 "register_operand" "=x")
2701 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2703 "cvttps2dq\t{%1, %0|%0, %1}"
2704 [(set_attr "type" "ssecvt")
2705 (set_attr "prefix_rep" "1")
2706 (set_attr "prefix_data16" "0")
2707 (set_attr "mode" "TI")])
2709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2711 ;; Parallel double-precision floating point conversion operations
2713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2715 (define_insn "sse2_cvtpi2pd"
2716 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2717 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2719 "cvtpi2pd\t{%1, %0|%0, %1}"
2720 [(set_attr "type" "ssecvt")
2721 (set_attr "unit" "mmx,*")
2722 (set_attr "prefix_data16" "1,*")
2723 (set_attr "mode" "V2DF")])
2725 (define_insn "sse2_cvtpd2pi"
2726 [(set (match_operand:V2SI 0 "register_operand" "=y")
2727 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2728 UNSPEC_FIX_NOTRUNC))]
2730 "cvtpd2pi\t{%1, %0|%0, %1}"
2731 [(set_attr "type" "ssecvt")
2732 (set_attr "unit" "mmx")
2733 (set_attr "prefix_data16" "1")
2734 (set_attr "mode" "DI")])
2736 (define_insn "sse2_cvttpd2pi"
2737 [(set (match_operand:V2SI 0 "register_operand" "=y")
2738 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2740 "cvttpd2pi\t{%1, %0|%0, %1}"
2741 [(set_attr "type" "ssecvt")
2742 (set_attr "unit" "mmx")
2743 (set_attr "prefix_data16" "1")
2744 (set_attr "mode" "TI")])
2746 (define_insn "*avx_cvtsi2sd"
2747 [(set (match_operand:V2DF 0 "register_operand" "=x")
2750 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2751 (match_operand:V2DF 1 "register_operand" "x")
2754 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2755 [(set_attr "type" "sseicvt")
2756 (set_attr "prefix" "vex")
2757 (set_attr "mode" "DF")])
2759 (define_insn "sse2_cvtsi2sd"
2760 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2763 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2764 (match_operand:V2DF 1 "register_operand" "0,0")
2767 "cvtsi2sd\t{%2, %0|%0, %2}"
2768 [(set_attr "type" "sseicvt")
2769 (set_attr "mode" "DF")
2770 (set_attr "athlon_decode" "double,direct")
2771 (set_attr "amdfam10_decode" "vector,double")])
2773 (define_insn "*avx_cvtsi2sdq"
2774 [(set (match_operand:V2DF 0 "register_operand" "=x")
2777 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2778 (match_operand:V2DF 1 "register_operand" "x")
2780 "TARGET_AVX && TARGET_64BIT"
2781 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2782 [(set_attr "type" "sseicvt")
2783 (set_attr "length_vex" "4")
2784 (set_attr "prefix" "vex")
2785 (set_attr "mode" "DF")])
2787 (define_insn "sse2_cvtsi2sdq"
2788 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2791 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2792 (match_operand:V2DF 1 "register_operand" "0,0")
2794 "TARGET_SSE2 && TARGET_64BIT"
2795 "cvtsi2sdq\t{%2, %0|%0, %2}"
2796 [(set_attr "type" "sseicvt")
2797 (set_attr "prefix_rex" "1")
2798 (set_attr "mode" "DF")
2799 (set_attr "athlon_decode" "double,direct")
2800 (set_attr "amdfam10_decode" "vector,double")])
2802 (define_insn "sse2_cvtsd2si"
2803 [(set (match_operand:SI 0 "register_operand" "=r,r")
2806 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2807 (parallel [(const_int 0)]))]
2808 UNSPEC_FIX_NOTRUNC))]
2810 "%vcvtsd2si\t{%1, %0|%0, %1}"
2811 [(set_attr "type" "sseicvt")
2812 (set_attr "athlon_decode" "double,vector")
2813 (set_attr "prefix_rep" "1")
2814 (set_attr "prefix" "maybe_vex")
2815 (set_attr "mode" "SI")])
2817 (define_insn "sse2_cvtsd2si_2"
2818 [(set (match_operand:SI 0 "register_operand" "=r,r")
2819 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2820 UNSPEC_FIX_NOTRUNC))]
2822 "%vcvtsd2si\t{%1, %0|%0, %1}"
2823 [(set_attr "type" "sseicvt")
2824 (set_attr "athlon_decode" "double,vector")
2825 (set_attr "amdfam10_decode" "double,double")
2826 (set_attr "prefix_rep" "1")
2827 (set_attr "prefix" "maybe_vex")
2828 (set_attr "mode" "SI")])
2830 (define_insn "sse2_cvtsd2siq"
2831 [(set (match_operand:DI 0 "register_operand" "=r,r")
2834 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2835 (parallel [(const_int 0)]))]
2836 UNSPEC_FIX_NOTRUNC))]
2837 "TARGET_SSE2 && TARGET_64BIT"
2838 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2839 [(set_attr "type" "sseicvt")
2840 (set_attr "athlon_decode" "double,vector")
2841 (set_attr "prefix_rep" "1")
2842 (set_attr "prefix" "maybe_vex")
2843 (set_attr "mode" "DI")])
2845 (define_insn "sse2_cvtsd2siq_2"
2846 [(set (match_operand:DI 0 "register_operand" "=r,r")
2847 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2848 UNSPEC_FIX_NOTRUNC))]
2849 "TARGET_SSE2 && TARGET_64BIT"
2850 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2851 [(set_attr "type" "sseicvt")
2852 (set_attr "athlon_decode" "double,vector")
2853 (set_attr "amdfam10_decode" "double,double")
2854 (set_attr "prefix_rep" "1")
2855 (set_attr "prefix" "maybe_vex")
2856 (set_attr "mode" "DI")])
2858 (define_insn "sse2_cvttsd2si"
2859 [(set (match_operand:SI 0 "register_operand" "=r,r")
2862 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2863 (parallel [(const_int 0)]))))]
2865 "%vcvttsd2si\t{%1, %0|%0, %1}"
2866 [(set_attr "type" "sseicvt")
2867 (set_attr "prefix_rep" "1")
2868 (set_attr "prefix" "maybe_vex")
2869 (set_attr "mode" "SI")
2870 (set_attr "athlon_decode" "double,vector")
2871 (set_attr "amdfam10_decode" "double,double")])
2873 (define_insn "sse2_cvttsd2siq"
2874 [(set (match_operand:DI 0 "register_operand" "=r,r")
2877 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2878 (parallel [(const_int 0)]))))]
2879 "TARGET_SSE2 && TARGET_64BIT"
2880 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2881 [(set_attr "type" "sseicvt")
2882 (set_attr "prefix_rep" "1")
2883 (set_attr "prefix" "maybe_vex")
2884 (set_attr "mode" "DI")
2885 (set_attr "athlon_decode" "double,vector")
2886 (set_attr "amdfam10_decode" "double,double")])
2888 (define_insn "avx_cvtdq2pd256"
2889 [(set (match_operand:V4DF 0 "register_operand" "=x")
2890 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2892 "vcvtdq2pd\t{%1, %0|%0, %1}"
2893 [(set_attr "type" "ssecvt")
2894 (set_attr "prefix" "vex")
2895 (set_attr "mode" "V4DF")])
2897 (define_insn "sse2_cvtdq2pd"
2898 [(set (match_operand:V2DF 0 "register_operand" "=x")
2901 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2902 (parallel [(const_int 0) (const_int 1)]))))]
2904 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2905 [(set_attr "type" "ssecvt")
2906 (set_attr "prefix" "maybe_vex")
2907 (set_attr "mode" "V2DF")])
2909 (define_insn "avx_cvtpd2dq256"
2910 [(set (match_operand:V4SI 0 "register_operand" "=x")
2911 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2912 UNSPEC_FIX_NOTRUNC))]
2914 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2915 [(set_attr "type" "ssecvt")
2916 (set_attr "prefix" "vex")
2917 (set_attr "mode" "OI")])
2919 (define_expand "sse2_cvtpd2dq"
2920 [(set (match_operand:V4SI 0 "register_operand" "")
2922 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2926 "operands[2] = CONST0_RTX (V2SImode);")
2928 (define_insn "*sse2_cvtpd2dq"
2929 [(set (match_operand:V4SI 0 "register_operand" "=x")
2931 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2933 (match_operand:V2SI 2 "const0_operand" "")))]
2935 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2936 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2937 [(set_attr "type" "ssecvt")
2938 (set_attr "prefix_rep" "1")
2939 (set_attr "prefix_data16" "0")
2940 (set_attr "prefix" "maybe_vex")
2941 (set_attr "mode" "TI")
2942 (set_attr "amdfam10_decode" "double")])
2944 (define_insn "avx_cvttpd2dq256"
2945 [(set (match_operand:V4SI 0 "register_operand" "=x")
2946 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2948 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2949 [(set_attr "type" "ssecvt")
2950 (set_attr "prefix" "vex")
2951 (set_attr "mode" "OI")])
2953 (define_expand "sse2_cvttpd2dq"
2954 [(set (match_operand:V4SI 0 "register_operand" "")
2956 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2959 "operands[2] = CONST0_RTX (V2SImode);")
2961 (define_insn "*sse2_cvttpd2dq"
2962 [(set (match_operand:V4SI 0 "register_operand" "=x")
2964 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2965 (match_operand:V2SI 2 "const0_operand" "")))]
2967 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2968 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2969 [(set_attr "type" "ssecvt")
2970 (set_attr "prefix" "maybe_vex")
2971 (set_attr "mode" "TI")
2972 (set_attr "amdfam10_decode" "double")])
2974 (define_insn "*avx_cvtsd2ss"
2975 [(set (match_operand:V4SF 0 "register_operand" "=x")
2978 (float_truncate:V2SF
2979 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2980 (match_operand:V4SF 1 "register_operand" "x")
2983 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2984 [(set_attr "type" "ssecvt")
2985 (set_attr "prefix" "vex")
2986 (set_attr "mode" "SF")])
2988 (define_insn "sse2_cvtsd2ss"
2989 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2992 (float_truncate:V2SF
2993 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2994 (match_operand:V4SF 1 "register_operand" "0,0")
2997 "cvtsd2ss\t{%2, %0|%0, %2}"
2998 [(set_attr "type" "ssecvt")
2999 (set_attr "athlon_decode" "vector,double")
3000 (set_attr "amdfam10_decode" "vector,double")
3001 (set_attr "mode" "SF")])
3003 (define_insn "*avx_cvtss2sd"
3004 [(set (match_operand:V2DF 0 "register_operand" "=x")
3008 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3009 (parallel [(const_int 0) (const_int 1)])))
3010 (match_operand:V2DF 1 "register_operand" "x")
3013 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3014 [(set_attr "type" "ssecvt")
3015 (set_attr "prefix" "vex")
3016 (set_attr "mode" "DF")])
3018 (define_insn "sse2_cvtss2sd"
3019 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3023 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3024 (parallel [(const_int 0) (const_int 1)])))
3025 (match_operand:V2DF 1 "register_operand" "0,0")
3028 "cvtss2sd\t{%2, %0|%0, %2}"
3029 [(set_attr "type" "ssecvt")
3030 (set_attr "amdfam10_decode" "vector,double")
3031 (set_attr "mode" "DF")])
3033 (define_insn "avx_cvtpd2ps256"
3034 [(set (match_operand:V4SF 0 "register_operand" "=x")
3035 (float_truncate:V4SF
3036 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3038 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3039 [(set_attr "type" "ssecvt")
3040 (set_attr "prefix" "vex")
3041 (set_attr "mode" "V4SF")])
3043 (define_expand "sse2_cvtpd2ps"
3044 [(set (match_operand:V4SF 0 "register_operand" "")
3046 (float_truncate:V2SF
3047 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3050 "operands[2] = CONST0_RTX (V2SFmode);")
3052 (define_insn "*sse2_cvtpd2ps"
3053 [(set (match_operand:V4SF 0 "register_operand" "=x")
3055 (float_truncate:V2SF
3056 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3057 (match_operand:V2SF 2 "const0_operand" "")))]
3059 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3060 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3061 [(set_attr "type" "ssecvt")
3062 (set_attr "prefix_data16" "1")
3063 (set_attr "prefix" "maybe_vex")
3064 (set_attr "mode" "V4SF")
3065 (set_attr "amdfam10_decode" "double")])
3067 (define_insn "avx_cvtps2pd256"
3068 [(set (match_operand:V4DF 0 "register_operand" "=x")
3070 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3072 "vcvtps2pd\t{%1, %0|%0, %1}"
3073 [(set_attr "type" "ssecvt")
3074 (set_attr "prefix" "vex")
3075 (set_attr "mode" "V4DF")])
3077 (define_insn "sse2_cvtps2pd"
3078 [(set (match_operand:V2DF 0 "register_operand" "=x")
3081 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3082 (parallel [(const_int 0) (const_int 1)]))))]
3084 "%vcvtps2pd\t{%1, %0|%0, %1}"
3085 [(set_attr "type" "ssecvt")
3086 (set_attr "prefix" "maybe_vex")
3087 (set_attr "mode" "V2DF")
3088 (set_attr "prefix_data16" "0")
3089 (set_attr "amdfam10_decode" "direct")])
3091 (define_expand "vec_unpacks_hi_v4sf"
3096 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3097 (parallel [(const_int 6)
3101 (set (match_operand:V2DF 0 "register_operand" "")
3105 (parallel [(const_int 0) (const_int 1)]))))]
3108 operands[2] = gen_reg_rtx (V4SFmode);
3111 (define_expand "vec_unpacks_lo_v4sf"
3112 [(set (match_operand:V2DF 0 "register_operand" "")
3115 (match_operand:V4SF 1 "nonimmediate_operand" "")
3116 (parallel [(const_int 0) (const_int 1)]))))]
3119 (define_expand "vec_unpacks_float_hi_v8hi"
3120 [(match_operand:V4SF 0 "register_operand" "")
3121 (match_operand:V8HI 1 "register_operand" "")]
3124 rtx tmp = gen_reg_rtx (V4SImode);
3126 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3127 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3131 (define_expand "vec_unpacks_float_lo_v8hi"
3132 [(match_operand:V4SF 0 "register_operand" "")
3133 (match_operand:V8HI 1 "register_operand" "")]
3136 rtx tmp = gen_reg_rtx (V4SImode);
3138 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3139 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3143 (define_expand "vec_unpacku_float_hi_v8hi"
3144 [(match_operand:V4SF 0 "register_operand" "")
3145 (match_operand:V8HI 1 "register_operand" "")]
3148 rtx tmp = gen_reg_rtx (V4SImode);
3150 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3151 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3155 (define_expand "vec_unpacku_float_lo_v8hi"
3156 [(match_operand:V4SF 0 "register_operand" "")
3157 (match_operand:V8HI 1 "register_operand" "")]
3160 rtx tmp = gen_reg_rtx (V4SImode);
3162 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3163 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3167 (define_expand "vec_unpacks_float_hi_v4si"
3170 (match_operand:V4SI 1 "nonimmediate_operand" "")
3171 (parallel [(const_int 2)
3175 (set (match_operand:V2DF 0 "register_operand" "")
3179 (parallel [(const_int 0) (const_int 1)]))))]
3181 "operands[2] = gen_reg_rtx (V4SImode);")
3183 (define_expand "vec_unpacks_float_lo_v4si"
3184 [(set (match_operand:V2DF 0 "register_operand" "")
3187 (match_operand:V4SI 1 "nonimmediate_operand" "")
3188 (parallel [(const_int 0) (const_int 1)]))))]
3191 (define_expand "vec_unpacku_float_hi_v4si"
3194 (match_operand:V4SI 1 "nonimmediate_operand" "")
3195 (parallel [(const_int 2)
3203 (parallel [(const_int 0) (const_int 1)]))))
3205 (lt:V2DF (match_dup 6) (match_dup 3)))
3207 (and:V2DF (match_dup 7) (match_dup 4)))
3208 (set (match_operand:V2DF 0 "register_operand" "")
3209 (plus:V2DF (match_dup 6) (match_dup 8)))]
3212 REAL_VALUE_TYPE TWO32r;
3216 real_ldexp (&TWO32r, &dconst1, 32);
3217 x = const_double_from_real_value (TWO32r, DFmode);
3219 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3220 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3222 operands[5] = gen_reg_rtx (V4SImode);
3224 for (i = 6; i < 9; i++)
3225 operands[i] = gen_reg_rtx (V2DFmode);
3228 (define_expand "vec_unpacku_float_lo_v4si"
3232 (match_operand:V4SI 1 "nonimmediate_operand" "")
3233 (parallel [(const_int 0) (const_int 1)]))))
3235 (lt:V2DF (match_dup 5) (match_dup 3)))
3237 (and:V2DF (match_dup 6) (match_dup 4)))
3238 (set (match_operand:V2DF 0 "register_operand" "")
3239 (plus:V2DF (match_dup 5) (match_dup 7)))]
3242 REAL_VALUE_TYPE TWO32r;
3246 real_ldexp (&TWO32r, &dconst1, 32);
3247 x = const_double_from_real_value (TWO32r, DFmode);
3249 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3250 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3252 for (i = 5; i < 8; i++)
3253 operands[i] = gen_reg_rtx (V2DFmode);
3256 (define_expand "vec_pack_trunc_v2df"
3257 [(match_operand:V4SF 0 "register_operand" "")
3258 (match_operand:V2DF 1 "nonimmediate_operand" "")
3259 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3264 r1 = gen_reg_rtx (V4SFmode);
3265 r2 = gen_reg_rtx (V4SFmode);
3267 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3268 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3269 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3273 (define_expand "vec_pack_sfix_trunc_v2df"
3274 [(match_operand:V4SI 0 "register_operand" "")
3275 (match_operand:V2DF 1 "nonimmediate_operand" "")
3276 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3281 r1 = gen_reg_rtx (V4SImode);
3282 r2 = gen_reg_rtx (V4SImode);
3284 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3285 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3286 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3287 gen_lowpart (V2DImode, r1),
3288 gen_lowpart (V2DImode, r2)));
3292 (define_expand "vec_pack_sfix_v2df"
3293 [(match_operand:V4SI 0 "register_operand" "")
3294 (match_operand:V2DF 1 "nonimmediate_operand" "")
3295 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3300 r1 = gen_reg_rtx (V4SImode);
3301 r2 = gen_reg_rtx (V4SImode);
3303 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3304 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3305 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3306 gen_lowpart (V2DImode, r1),
3307 gen_lowpart (V2DImode, r2)));
3311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3313 ;; Parallel single-precision floating point element swizzling
3315 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3317 (define_expand "sse_movhlps_exp"
3318 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3321 (match_operand:V4SF 1 "nonimmediate_operand" "")
3322 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3323 (parallel [(const_int 6)
3328 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3330 (define_insn "*avx_movhlps"
3331 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3334 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3335 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3336 (parallel [(const_int 6)
3340 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3342 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3343 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3344 vmovhps\t{%2, %0|%0, %2}"
3345 [(set_attr "type" "ssemov")
3346 (set_attr "prefix" "vex")
3347 (set_attr "mode" "V4SF,V2SF,V2SF")])
3349 (define_insn "sse_movhlps"
3350 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3353 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3354 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3355 (parallel [(const_int 6)
3359 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3361 movhlps\t{%2, %0|%0, %2}
3362 movlps\t{%H2, %0|%0, %H2}
3363 movhps\t{%2, %0|%0, %2}"
3364 [(set_attr "type" "ssemov")
3365 (set_attr "mode" "V4SF,V2SF,V2SF")])
3367 (define_expand "sse_movlhps_exp"
3368 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3371 (match_operand:V4SF 1 "nonimmediate_operand" "")
3372 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3373 (parallel [(const_int 0)
3378 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3380 (define_insn "*avx_movlhps"
3381 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3384 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3385 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3386 (parallel [(const_int 0)
3390 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3392 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3393 vmovhps\t{%2, %1, %0|%0, %1, %2}
3394 vmovlps\t{%2, %H0|%H0, %2}"
3395 [(set_attr "type" "ssemov")
3396 (set_attr "prefix" "vex")
3397 (set_attr "mode" "V4SF,V2SF,V2SF")])
3399 (define_insn "sse_movlhps"
3400 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3403 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3404 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3405 (parallel [(const_int 0)
3409 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3411 movlhps\t{%2, %0|%0, %2}
3412 movhps\t{%2, %0|%0, %2}
3413 movlps\t{%2, %H0|%H0, %2}"
3414 [(set_attr "type" "ssemov")
3415 (set_attr "mode" "V4SF,V2SF,V2SF")])
3417 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3418 (define_insn "avx_unpckhps256"
3419 [(set (match_operand:V8SF 0 "register_operand" "=x")
3422 (match_operand:V8SF 1 "register_operand" "x")
3423 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3424 (parallel [(const_int 2) (const_int 10)
3425 (const_int 3) (const_int 11)
3426 (const_int 6) (const_int 14)
3427 (const_int 7) (const_int 15)])))]
3429 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3430 [(set_attr "type" "sselog")
3431 (set_attr "prefix" "vex")
3432 (set_attr "mode" "V8SF")])
3434 (define_insn "*avx_interleave_highv4sf"
3435 [(set (match_operand:V4SF 0 "register_operand" "=x")
3438 (match_operand:V4SF 1 "register_operand" "x")
3439 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3440 (parallel [(const_int 2) (const_int 6)
3441 (const_int 3) (const_int 7)])))]
3443 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3444 [(set_attr "type" "sselog")
3445 (set_attr "prefix" "vex")
3446 (set_attr "mode" "V4SF")])
3448 (define_insn "vec_interleave_highv4sf"
3449 [(set (match_operand:V4SF 0 "register_operand" "=x")
3452 (match_operand:V4SF 1 "register_operand" "0")
3453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3454 (parallel [(const_int 2) (const_int 6)
3455 (const_int 3) (const_int 7)])))]
3457 "unpckhps\t{%2, %0|%0, %2}"
3458 [(set_attr "type" "sselog")
3459 (set_attr "mode" "V4SF")])
3461 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3462 (define_insn "avx_unpcklps256"
3463 [(set (match_operand:V8SF 0 "register_operand" "=x")
3466 (match_operand:V8SF 1 "register_operand" "x")
3467 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3468 (parallel [(const_int 0) (const_int 8)
3469 (const_int 1) (const_int 9)
3470 (const_int 4) (const_int 12)
3471 (const_int 5) (const_int 13)])))]
3473 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3474 [(set_attr "type" "sselog")
3475 (set_attr "prefix" "vex")
3476 (set_attr "mode" "V8SF")])
3478 (define_insn "*avx_interleave_lowv4sf"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x")
3482 (match_operand:V4SF 1 "register_operand" "x")
3483 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3484 (parallel [(const_int 0) (const_int 4)
3485 (const_int 1) (const_int 5)])))]
3487 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3488 [(set_attr "type" "sselog")
3489 (set_attr "prefix" "vex")
3490 (set_attr "mode" "V4SF")])
3492 (define_insn "vec_interleave_lowv4sf"
3493 [(set (match_operand:V4SF 0 "register_operand" "=x")
3496 (match_operand:V4SF 1 "register_operand" "0")
3497 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3498 (parallel [(const_int 0) (const_int 4)
3499 (const_int 1) (const_int 5)])))]
3501 "unpcklps\t{%2, %0|%0, %2}"
3502 [(set_attr "type" "sselog")
3503 (set_attr "mode" "V4SF")])
3505 ;; These are modeled with the same vec_concat as the others so that we
3506 ;; capture users of shufps that can use the new instructions
3507 (define_insn "avx_movshdup256"
3508 [(set (match_operand:V8SF 0 "register_operand" "=x")
3511 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3513 (parallel [(const_int 1) (const_int 1)
3514 (const_int 3) (const_int 3)
3515 (const_int 5) (const_int 5)
3516 (const_int 7) (const_int 7)])))]
3518 "vmovshdup\t{%1, %0|%0, %1}"
3519 [(set_attr "type" "sse")
3520 (set_attr "prefix" "vex")
3521 (set_attr "mode" "V8SF")])
3523 (define_insn "sse3_movshdup"
3524 [(set (match_operand:V4SF 0 "register_operand" "=x")
3527 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3529 (parallel [(const_int 1)
3534 "%vmovshdup\t{%1, %0|%0, %1}"
3535 [(set_attr "type" "sse")
3536 (set_attr "prefix_rep" "1")
3537 (set_attr "prefix" "maybe_vex")
3538 (set_attr "mode" "V4SF")])
3540 (define_insn "avx_movsldup256"
3541 [(set (match_operand:V8SF 0 "register_operand" "=x")
3544 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3546 (parallel [(const_int 0) (const_int 0)
3547 (const_int 2) (const_int 2)
3548 (const_int 4) (const_int 4)
3549 (const_int 6) (const_int 6)])))]
3551 "vmovsldup\t{%1, %0|%0, %1}"
3552 [(set_attr "type" "sse")
3553 (set_attr "prefix" "vex")
3554 (set_attr "mode" "V8SF")])
3556 (define_insn "sse3_movsldup"
3557 [(set (match_operand:V4SF 0 "register_operand" "=x")
3560 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3562 (parallel [(const_int 0)
3567 "%vmovsldup\t{%1, %0|%0, %1}"
3568 [(set_attr "type" "sse")
3569 (set_attr "prefix_rep" "1")
3570 (set_attr "prefix" "maybe_vex")
3571 (set_attr "mode" "V4SF")])
3573 (define_expand "avx_shufps256"
3574 [(match_operand:V8SF 0 "register_operand" "")
3575 (match_operand:V8SF 1 "register_operand" "")
3576 (match_operand:V8SF 2 "nonimmediate_operand" "")
3577 (match_operand:SI 3 "const_int_operand" "")]
3580 int mask = INTVAL (operands[3]);
3581 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3582 GEN_INT ((mask >> 0) & 3),
3583 GEN_INT ((mask >> 2) & 3),
3584 GEN_INT (((mask >> 4) & 3) + 8),
3585 GEN_INT (((mask >> 6) & 3) + 8),
3586 GEN_INT (((mask >> 0) & 3) + 4),
3587 GEN_INT (((mask >> 2) & 3) + 4),
3588 GEN_INT (((mask >> 4) & 3) + 12),
3589 GEN_INT (((mask >> 6) & 3) + 12)));
3593 ;; One bit in mask selects 2 elements.
3594 (define_insn "avx_shufps256_1"
3595 [(set (match_operand:V8SF 0 "register_operand" "=x")
3598 (match_operand:V8SF 1 "register_operand" "x")
3599 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3600 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3601 (match_operand 4 "const_0_to_3_operand" "")
3602 (match_operand 5 "const_8_to_11_operand" "")
3603 (match_operand 6 "const_8_to_11_operand" "")
3604 (match_operand 7 "const_4_to_7_operand" "")
3605 (match_operand 8 "const_4_to_7_operand" "")
3606 (match_operand 9 "const_12_to_15_operand" "")
3607 (match_operand 10 "const_12_to_15_operand" "")])))]
3609 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3610 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3611 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3612 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3615 mask = INTVAL (operands[3]);
3616 mask |= INTVAL (operands[4]) << 2;
3617 mask |= (INTVAL (operands[5]) - 8) << 4;
3618 mask |= (INTVAL (operands[6]) - 8) << 6;
3619 operands[3] = GEN_INT (mask);
3621 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3623 [(set_attr "type" "sselog")
3624 (set_attr "length_immediate" "1")
3625 (set_attr "prefix" "vex")
3626 (set_attr "mode" "V8SF")])
3628 (define_expand "sse_shufps"
3629 [(match_operand:V4SF 0 "register_operand" "")
3630 (match_operand:V4SF 1 "register_operand" "")
3631 (match_operand:V4SF 2 "nonimmediate_operand" "")
3632 (match_operand:SI 3 "const_int_operand" "")]
3635 int mask = INTVAL (operands[3]);
3636 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3637 GEN_INT ((mask >> 0) & 3),
3638 GEN_INT ((mask >> 2) & 3),
3639 GEN_INT (((mask >> 4) & 3) + 4),
3640 GEN_INT (((mask >> 6) & 3) + 4)));
3644 (define_insn "*avx_shufps_<mode>"
3645 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3646 (vec_select:SSEMODE4S
3647 (vec_concat:<ssedoublesizemode>
3648 (match_operand:SSEMODE4S 1 "register_operand" "x")
3649 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3650 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3651 (match_operand 4 "const_0_to_3_operand" "")
3652 (match_operand 5 "const_4_to_7_operand" "")
3653 (match_operand 6 "const_4_to_7_operand" "")])))]
3657 mask |= INTVAL (operands[3]) << 0;
3658 mask |= INTVAL (operands[4]) << 2;
3659 mask |= (INTVAL (operands[5]) - 4) << 4;
3660 mask |= (INTVAL (operands[6]) - 4) << 6;
3661 operands[3] = GEN_INT (mask);
3663 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3665 [(set_attr "type" "sselog")
3666 (set_attr "length_immediate" "1")
3667 (set_attr "prefix" "vex")
3668 (set_attr "mode" "V4SF")])
3670 (define_insn "sse_shufps_<mode>"
3671 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3672 (vec_select:SSEMODE4S
3673 (vec_concat:<ssedoublesizemode>
3674 (match_operand:SSEMODE4S 1 "register_operand" "0")
3675 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3676 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3677 (match_operand 4 "const_0_to_3_operand" "")
3678 (match_operand 5 "const_4_to_7_operand" "")
3679 (match_operand 6 "const_4_to_7_operand" "")])))]
3683 mask |= INTVAL (operands[3]) << 0;
3684 mask |= INTVAL (operands[4]) << 2;
3685 mask |= (INTVAL (operands[5]) - 4) << 4;
3686 mask |= (INTVAL (operands[6]) - 4) << 6;
3687 operands[3] = GEN_INT (mask);
3689 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3691 [(set_attr "type" "sselog")
3692 (set_attr "length_immediate" "1")
3693 (set_attr "mode" "V4SF")])
3695 (define_insn "sse_storehps"
3696 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3698 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3699 (parallel [(const_int 2) (const_int 3)])))]
3702 %vmovhps\t{%1, %0|%0, %1}
3703 %vmovhlps\t{%1, %d0|%d0, %1}
3704 %vmovlps\t{%H1, %d0|%d0, %H1}"
3705 [(set_attr "type" "ssemov")
3706 (set_attr "prefix" "maybe_vex")
3707 (set_attr "mode" "V2SF,V4SF,V2SF")])
3709 (define_expand "sse_loadhps_exp"
3710 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3713 (match_operand:V4SF 1 "nonimmediate_operand" "")
3714 (parallel [(const_int 0) (const_int 1)]))
3715 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3717 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3719 (define_insn "*avx_loadhps"
3720 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3723 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3724 (parallel [(const_int 0) (const_int 1)]))
3725 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3728 vmovhps\t{%2, %1, %0|%0, %1, %2}
3729 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3730 vmovlps\t{%2, %H0|%H0, %2}"
3731 [(set_attr "type" "ssemov")
3732 (set_attr "prefix" "vex")
3733 (set_attr "mode" "V2SF,V4SF,V2SF")])
3735 (define_insn "sse_loadhps"
3736 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3739 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3740 (parallel [(const_int 0) (const_int 1)]))
3741 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3744 movhps\t{%2, %0|%0, %2}
3745 movlhps\t{%2, %0|%0, %2}
3746 movlps\t{%2, %H0|%H0, %2}"
3747 [(set_attr "type" "ssemov")
3748 (set_attr "mode" "V2SF,V4SF,V2SF")])
3750 (define_insn "*avx_storelps"
3751 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3753 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3754 (parallel [(const_int 0) (const_int 1)])))]
3757 vmovlps\t{%1, %0|%0, %1}
3758 vmovaps\t{%1, %0|%0, %1}
3759 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3760 [(set_attr "type" "ssemov")
3761 (set_attr "prefix" "vex")
3762 (set_attr "mode" "V2SF,V2DF,V2SF")])
3764 (define_insn "sse_storelps"
3765 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3767 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3768 (parallel [(const_int 0) (const_int 1)])))]
3771 movlps\t{%1, %0|%0, %1}
3772 movaps\t{%1, %0|%0, %1}
3773 movlps\t{%1, %0|%0, %1}"
3774 [(set_attr "type" "ssemov")
3775 (set_attr "mode" "V2SF,V4SF,V2SF")])
3777 (define_expand "sse_loadlps_exp"
3778 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3780 (match_operand:V2SF 2 "nonimmediate_operand" "")
3782 (match_operand:V4SF 1 "nonimmediate_operand" "")
3783 (parallel [(const_int 2) (const_int 3)]))))]
3785 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3787 (define_insn "*avx_loadlps"
3788 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3790 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3792 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3793 (parallel [(const_int 2) (const_int 3)]))))]
3796 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3797 vmovlps\t{%2, %1, %0|%0, %1, %2}
3798 vmovlps\t{%2, %0|%0, %2}"
3799 [(set_attr "type" "sselog,ssemov,ssemov")
3800 (set_attr "length_immediate" "1,*,*")
3801 (set_attr "prefix" "vex")
3802 (set_attr "mode" "V4SF,V2SF,V2SF")])
3804 (define_insn "sse_loadlps"
3805 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3807 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3809 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3810 (parallel [(const_int 2) (const_int 3)]))))]
3813 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3814 movlps\t{%2, %0|%0, %2}
3815 movlps\t{%2, %0|%0, %2}"
3816 [(set_attr "type" "sselog,ssemov,ssemov")
3817 (set_attr "length_immediate" "1,*,*")
3818 (set_attr "mode" "V4SF,V2SF,V2SF")])
3820 (define_insn "*avx_movss"
3821 [(set (match_operand:V4SF 0 "register_operand" "=x")
3823 (match_operand:V4SF 2 "register_operand" "x")
3824 (match_operand:V4SF 1 "register_operand" "x")
3827 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3828 [(set_attr "type" "ssemov")
3829 (set_attr "prefix" "vex")
3830 (set_attr "mode" "SF")])
3832 (define_insn "sse_movss"
3833 [(set (match_operand:V4SF 0 "register_operand" "=x")
3835 (match_operand:V4SF 2 "register_operand" "x")
3836 (match_operand:V4SF 1 "register_operand" "0")
3839 "movss\t{%2, %0|%0, %2}"
3840 [(set_attr "type" "ssemov")
3841 (set_attr "mode" "SF")])
3843 (define_expand "vec_dupv4sf"
3844 [(set (match_operand:V4SF 0 "register_operand" "")
3846 (match_operand:SF 1 "nonimmediate_operand" "")))]
3850 operands[1] = force_reg (V4SFmode, operands[1]);
3853 (define_insn "*vec_dupv4sf_avx"
3854 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3856 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3859 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3860 vbroadcastss\t{%1, %0|%0, %1}"
3861 [(set_attr "type" "sselog1,ssemov")
3862 (set_attr "length_immediate" "1,0")
3863 (set_attr "prefix_extra" "0,1")
3864 (set_attr "prefix" "vex")
3865 (set_attr "mode" "V4SF")])
3867 (define_insn "*vec_dupv4sf"
3868 [(set (match_operand:V4SF 0 "register_operand" "=x")
3870 (match_operand:SF 1 "register_operand" "0")))]
3872 "shufps\t{$0, %0, %0|%0, %0, 0}"
3873 [(set_attr "type" "sselog1")
3874 (set_attr "length_immediate" "1")
3875 (set_attr "mode" "V4SF")])
3877 (define_insn "*vec_concatv2sf_avx"
3878 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3880 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3881 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3884 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3885 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3886 vmovss\t{%1, %0|%0, %1}
3887 punpckldq\t{%2, %0|%0, %2}
3888 movd\t{%1, %0|%0, %1}"
3889 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3890 (set_attr "length_immediate" "*,1,*,*,*")
3891 (set_attr "prefix_extra" "*,1,*,*,*")
3892 (set (attr "prefix")
3893 (if_then_else (eq_attr "alternative" "3,4")
3894 (const_string "orig")
3895 (const_string "vex")))
3896 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3898 ;; Although insertps takes register source, we prefer
3899 ;; unpcklps with register source since it is shorter.
3900 (define_insn "*vec_concatv2sf_sse4_1"
3901 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3903 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3904 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3907 unpcklps\t{%2, %0|%0, %2}
3908 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3909 movss\t{%1, %0|%0, %1}
3910 punpckldq\t{%2, %0|%0, %2}
3911 movd\t{%1, %0|%0, %1}"
3912 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3913 (set_attr "prefix_data16" "*,1,*,*,*")
3914 (set_attr "prefix_extra" "*,1,*,*,*")
3915 (set_attr "length_immediate" "*,1,*,*,*")
3916 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3918 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3919 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3920 ;; alternatives pretty much forces the MMX alternative to be chosen.
3921 (define_insn "*vec_concatv2sf_sse"
3922 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3924 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3925 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3928 unpcklps\t{%2, %0|%0, %2}
3929 movss\t{%1, %0|%0, %1}
3930 punpckldq\t{%2, %0|%0, %2}
3931 movd\t{%1, %0|%0, %1}"
3932 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3933 (set_attr "mode" "V4SF,SF,DI,DI")])
3935 (define_insn "*vec_concatv4sf_avx"
3936 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3938 (match_operand:V2SF 1 "register_operand" " x,x")
3939 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3942 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3943 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3944 [(set_attr "type" "ssemov")
3945 (set_attr "prefix" "vex")
3946 (set_attr "mode" "V4SF,V2SF")])
3948 (define_insn "*vec_concatv4sf_sse"
3949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3951 (match_operand:V2SF 1 "register_operand" " 0,0")
3952 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3955 movlhps\t{%2, %0|%0, %2}
3956 movhps\t{%2, %0|%0, %2}"
3957 [(set_attr "type" "ssemov")
3958 (set_attr "mode" "V4SF,V2SF")])
3960 (define_expand "vec_init<mode>"
3961 [(match_operand:SSEMODE 0 "register_operand" "")
3962 (match_operand 1 "" "")]
3965 ix86_expand_vector_init (false, operands[0], operands[1]);
3969 (define_insn "*vec_set<mode>_0_avx"
3970 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3971 (vec_merge:SSEMODE4S
3972 (vec_duplicate:SSEMODE4S
3973 (match_operand:<ssescalarmode> 2
3974 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3975 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3979 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3980 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3981 vmovd\t{%2, %0|%0, %2}
3982 vmovss\t{%2, %1, %0|%0, %1, %2}
3983 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3985 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3986 (set_attr "prefix_extra" "*,*,*,*,1,*")
3987 (set_attr "length_immediate" "*,*,*,*,1,*")
3988 (set_attr "prefix" "vex")
3989 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3991 (define_insn "*vec_set<mode>_0_sse4_1"
3992 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3993 (vec_merge:SSEMODE4S
3994 (vec_duplicate:SSEMODE4S
3995 (match_operand:<ssescalarmode> 2
3996 "general_operand" " x,m,*r,x,*rm,*rfF"))
3997 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4001 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4002 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4003 movd\t{%2, %0|%0, %2}
4004 movss\t{%2, %0|%0, %2}
4005 pinsrd\t{$0, %2, %0|%0, %2, 0}
4007 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4008 (set_attr "prefix_extra" "*,*,*,*,1,*")
4009 (set_attr "length_immediate" "*,*,*,*,1,*")
4010 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4012 (define_insn "*vec_set<mode>_0_sse2"
4013 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4014 (vec_merge:SSEMODE4S
4015 (vec_duplicate:SSEMODE4S
4016 (match_operand:<ssescalarmode> 2
4017 "general_operand" " m,*r,x,x*rfF"))
4018 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4022 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4023 movd\t{%2, %0|%0, %2}
4024 movss\t{%2, %0|%0, %2}
4026 [(set_attr "type" "ssemov")
4027 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4029 (define_insn "vec_set<mode>_0"
4030 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4031 (vec_merge:SSEMODE4S
4032 (vec_duplicate:SSEMODE4S
4033 (match_operand:<ssescalarmode> 2
4034 "general_operand" " m,x,x*rfF"))
4035 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4039 movss\t{%2, %0|%0, %2}
4040 movss\t{%2, %0|%0, %2}
4042 [(set_attr "type" "ssemov")
4043 (set_attr "mode" "SF,SF,*")])
4045 ;; A subset is vec_setv4sf.
4046 (define_insn "*vec_setv4sf_avx"
4047 [(set (match_operand:V4SF 0 "register_operand" "=x")
4050 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4051 (match_operand:V4SF 1 "register_operand" "x")
4052 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4055 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4056 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4058 [(set_attr "type" "sselog")
4059 (set_attr "prefix_extra" "1")
4060 (set_attr "length_immediate" "1")
4061 (set_attr "prefix" "vex")
4062 (set_attr "mode" "V4SF")])
4064 (define_insn "*vec_setv4sf_sse4_1"
4065 [(set (match_operand:V4SF 0 "register_operand" "=x")
4068 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4069 (match_operand:V4SF 1 "register_operand" "0")
4070 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4073 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4074 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4076 [(set_attr "type" "sselog")
4077 (set_attr "prefix_data16" "1")
4078 (set_attr "prefix_extra" "1")
4079 (set_attr "length_immediate" "1")
4080 (set_attr "mode" "V4SF")])
4082 (define_insn "*avx_insertps"
4083 [(set (match_operand:V4SF 0 "register_operand" "=x")
4084 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4085 (match_operand:V4SF 1 "register_operand" "x")
4086 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4089 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4090 [(set_attr "type" "sselog")
4091 (set_attr "prefix" "vex")
4092 (set_attr "prefix_extra" "1")
4093 (set_attr "length_immediate" "1")
4094 (set_attr "mode" "V4SF")])
4096 (define_insn "sse4_1_insertps"
4097 [(set (match_operand:V4SF 0 "register_operand" "=x")
4098 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4099 (match_operand:V4SF 1 "register_operand" "0")
4100 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4103 "insertps\t{%3, %2, %0|%0, %2, %3}";
4104 [(set_attr "type" "sselog")
4105 (set_attr "prefix_data16" "1")
4106 (set_attr "prefix_extra" "1")
4107 (set_attr "length_immediate" "1")
4108 (set_attr "mode" "V4SF")])
4111 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4112 (vec_merge:SSEMODE4S
4113 (vec_duplicate:SSEMODE4S
4114 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4117 "TARGET_SSE && reload_completed"
4120 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4125 (define_expand "vec_set<mode>"
4126 [(match_operand:SSEMODE 0 "register_operand" "")
4127 (match_operand:<ssescalarmode> 1 "register_operand" "")
4128 (match_operand 2 "const_int_operand" "")]
4131 ix86_expand_vector_set (false, operands[0], operands[1],
4132 INTVAL (operands[2]));
4136 (define_insn_and_split "*vec_extractv4sf_0"
4137 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4139 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4140 (parallel [(const_int 0)])))]
4141 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4143 "&& reload_completed"
4146 rtx op1 = operands[1];
4148 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4150 op1 = gen_lowpart (SFmode, op1);
4151 emit_move_insn (operands[0], op1);
4155 (define_expand "avx_vextractf128<mode>"
4156 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4157 (match_operand:AVX256MODE 1 "register_operand" "")
4158 (match_operand:SI 2 "const_0_to_1_operand" "")]
4161 switch (INTVAL (operands[2]))
4164 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4167 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4175 (define_insn_and_split "vec_extract_lo_<mode>"
4176 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4177 (vec_select:<avxhalfvecmode>
4178 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4179 (parallel [(const_int 0) (const_int 1)])))]
4182 "&& reload_completed"
4185 rtx op1 = operands[1];
4187 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4189 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4190 emit_move_insn (operands[0], op1);
4194 (define_insn "vec_extract_hi_<mode>"
4195 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4196 (vec_select:<avxhalfvecmode>
4197 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4198 (parallel [(const_int 2) (const_int 3)])))]
4200 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4201 [(set_attr "type" "sselog")
4202 (set_attr "prefix_extra" "1")
4203 (set_attr "length_immediate" "1")
4204 (set_attr "memory" "none,store")
4205 (set_attr "prefix" "vex")
4206 (set_attr "mode" "V8SF")])
4208 (define_insn_and_split "vec_extract_lo_<mode>"
4209 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4210 (vec_select:<avxhalfvecmode>
4211 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4212 (parallel [(const_int 0) (const_int 1)
4213 (const_int 2) (const_int 3)])))]
4216 "&& reload_completed"
4219 rtx op1 = operands[1];
4221 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4223 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4224 emit_move_insn (operands[0], op1);
4228 (define_insn "vec_extract_hi_<mode>"
4229 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4230 (vec_select:<avxhalfvecmode>
4231 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4232 (parallel [(const_int 4) (const_int 5)
4233 (const_int 6) (const_int 7)])))]
4235 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4236 [(set_attr "type" "sselog")
4237 (set_attr "prefix_extra" "1")
4238 (set_attr "length_immediate" "1")
4239 (set_attr "memory" "none,store")
4240 (set_attr "prefix" "vex")
4241 (set_attr "mode" "V8SF")])
4243 (define_insn_and_split "vec_extract_lo_v16hi"
4244 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4246 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4247 (parallel [(const_int 0) (const_int 1)
4248 (const_int 2) (const_int 3)
4249 (const_int 4) (const_int 5)
4250 (const_int 6) (const_int 7)])))]
4253 "&& reload_completed"
4256 rtx op1 = operands[1];
4258 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4260 op1 = gen_lowpart (V8HImode, op1);
4261 emit_move_insn (operands[0], op1);
4265 (define_insn "vec_extract_hi_v16hi"
4266 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4268 (match_operand:V16HI 1 "register_operand" "x,x")
4269 (parallel [(const_int 8) (const_int 9)
4270 (const_int 10) (const_int 11)
4271 (const_int 12) (const_int 13)
4272 (const_int 14) (const_int 15)])))]
4274 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4275 [(set_attr "type" "sselog")
4276 (set_attr "prefix_extra" "1")
4277 (set_attr "length_immediate" "1")
4278 (set_attr "memory" "none,store")
4279 (set_attr "prefix" "vex")
4280 (set_attr "mode" "V8SF")])
4282 (define_insn_and_split "vec_extract_lo_v32qi"
4283 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4285 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4286 (parallel [(const_int 0) (const_int 1)
4287 (const_int 2) (const_int 3)
4288 (const_int 4) (const_int 5)
4289 (const_int 6) (const_int 7)
4290 (const_int 8) (const_int 9)
4291 (const_int 10) (const_int 11)
4292 (const_int 12) (const_int 13)
4293 (const_int 14) (const_int 15)])))]
4296 "&& reload_completed"
4299 rtx op1 = operands[1];
4301 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4303 op1 = gen_lowpart (V16QImode, op1);
4304 emit_move_insn (operands[0], op1);
4308 (define_insn "vec_extract_hi_v32qi"
4309 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4311 (match_operand:V32QI 1 "register_operand" "x,x")
4312 (parallel [(const_int 16) (const_int 17)
4313 (const_int 18) (const_int 19)
4314 (const_int 20) (const_int 21)
4315 (const_int 22) (const_int 23)
4316 (const_int 24) (const_int 25)
4317 (const_int 26) (const_int 27)
4318 (const_int 28) (const_int 29)
4319 (const_int 30) (const_int 31)])))]
4321 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4322 [(set_attr "type" "sselog")
4323 (set_attr "prefix_extra" "1")
4324 (set_attr "length_immediate" "1")
4325 (set_attr "memory" "none,store")
4326 (set_attr "prefix" "vex")
4327 (set_attr "mode" "V8SF")])
4329 (define_insn "*sse4_1_extractps"
4330 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4332 (match_operand:V4SF 1 "register_operand" "x")
4333 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4335 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4336 [(set_attr "type" "sselog")
4337 (set_attr "prefix_data16" "1")
4338 (set_attr "prefix_extra" "1")
4339 (set_attr "length_immediate" "1")
4340 (set_attr "prefix" "maybe_vex")
4341 (set_attr "mode" "V4SF")])
4343 (define_insn_and_split "*vec_extract_v4sf_mem"
4344 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4346 (match_operand:V4SF 1 "memory_operand" "o")
4347 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4353 int i = INTVAL (operands[2]);
4355 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4359 (define_expand "vec_extract<mode>"
4360 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4361 (match_operand:SSEMODE 1 "register_operand" "")
4362 (match_operand 2 "const_int_operand" "")]
4365 ix86_expand_vector_extract (false, operands[0], operands[1],
4366 INTVAL (operands[2]));
4370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4372 ;; Parallel double-precision floating point element swizzling
4374 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4376 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4377 (define_insn "avx_unpckhpd256"
4378 [(set (match_operand:V4DF 0 "register_operand" "=x")
4381 (match_operand:V4DF 1 "register_operand" "x")
4382 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4383 (parallel [(const_int 1) (const_int 5)
4384 (const_int 3) (const_int 7)])))]
4386 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4387 [(set_attr "type" "sselog")
4388 (set_attr "prefix" "vex")
4389 (set_attr "mode" "V4DF")])
4391 (define_expand "vec_interleave_highv2df"
4392 [(set (match_operand:V2DF 0 "register_operand" "")
4395 (match_operand:V2DF 1 "nonimmediate_operand" "")
4396 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4397 (parallel [(const_int 1)
4401 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4402 operands[2] = force_reg (V2DFmode, operands[2]);
4405 (define_insn "*avx_interleave_highv2df"
4406 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4409 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4410 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4411 (parallel [(const_int 1)
4413 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4415 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4416 vmovddup\t{%H1, %0|%0, %H1}
4417 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4418 vmovhpd\t{%1, %0|%0, %1}"
4419 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4420 (set_attr "prefix" "vex")
4421 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4423 (define_insn "*sse3_interleave_highv2df"
4424 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4427 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4428 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4429 (parallel [(const_int 1)
4431 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4433 unpckhpd\t{%2, %0|%0, %2}
4434 movddup\t{%H1, %0|%0, %H1}
4435 movlpd\t{%H1, %0|%0, %H1}
4436 movhpd\t{%1, %0|%0, %1}"
4437 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4438 (set_attr "prefix_data16" "*,*,1,1")
4439 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4441 (define_insn "*sse2_interleave_highv2df"
4442 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4445 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4446 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4447 (parallel [(const_int 1)
4449 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4451 unpckhpd\t{%2, %0|%0, %2}
4452 movlpd\t{%H1, %0|%0, %H1}
4453 movhpd\t{%1, %0|%0, %1}"
4454 [(set_attr "type" "sselog,ssemov,ssemov")
4455 (set_attr "prefix_data16" "*,1,1")
4456 (set_attr "mode" "V2DF,V1DF,V1DF")])
4458 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4459 (define_expand "avx_movddup256"
4460 [(set (match_operand:V4DF 0 "register_operand" "")
4463 (match_operand:V4DF 1 "nonimmediate_operand" "")
4465 (parallel [(const_int 0) (const_int 4)
4466 (const_int 2) (const_int 6)])))]
4470 (define_expand "avx_unpcklpd256"
4471 [(set (match_operand:V4DF 0 "register_operand" "")
4474 (match_operand:V4DF 1 "register_operand" "")
4475 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4476 (parallel [(const_int 0) (const_int 4)
4477 (const_int 2) (const_int 6)])))]
4481 (define_insn "*avx_unpcklpd256"
4482 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4485 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4486 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4487 (parallel [(const_int 0) (const_int 4)
4488 (const_int 2) (const_int 6)])))]
4490 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4492 vmovddup\t{%1, %0|%0, %1}
4493 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4494 [(set_attr "type" "sselog")
4495 (set_attr "prefix" "vex")
4496 (set_attr "mode" "V4DF")])
4498 (define_expand "vec_interleave_lowv2df"
4499 [(set (match_operand:V2DF 0 "register_operand" "")
4502 (match_operand:V2DF 1 "nonimmediate_operand" "")
4503 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4504 (parallel [(const_int 0)
4508 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4509 operands[1] = force_reg (V2DFmode, operands[1]);
4512 (define_insn "*avx_interleave_lowv2df"
4513 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4516 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4517 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4518 (parallel [(const_int 0)
4520 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4522 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4523 vmovddup\t{%1, %0|%0, %1}
4524 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4525 vmovlpd\t{%2, %H0|%H0, %2}"
4526 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4527 (set_attr "prefix" "vex")
4528 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4530 (define_insn "*sse3_interleave_lowv2df"
4531 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4534 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4535 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4536 (parallel [(const_int 0)
4538 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4540 unpcklpd\t{%2, %0|%0, %2}
4541 movddup\t{%1, %0|%0, %1}
4542 movhpd\t{%2, %0|%0, %2}
4543 movlpd\t{%2, %H0|%H0, %2}"
4544 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4545 (set_attr "prefix_data16" "*,*,1,1")
4546 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4548 (define_insn "*sse2_interleave_lowv2df"
4549 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4552 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4553 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4554 (parallel [(const_int 0)
4556 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4558 unpcklpd\t{%2, %0|%0, %2}
4559 movhpd\t{%2, %0|%0, %2}
4560 movlpd\t{%2, %H0|%H0, %2}"
4561 [(set_attr "type" "sselog,ssemov,ssemov")
4562 (set_attr "prefix_data16" "*,1,1")
4563 (set_attr "mode" "V2DF,V1DF,V1DF")])
4566 [(set (match_operand:V2DF 0 "memory_operand" "")
4569 (match_operand:V2DF 1 "register_operand" "")
4571 (parallel [(const_int 0)
4573 "TARGET_SSE3 && reload_completed"
4576 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4577 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4578 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4583 [(set (match_operand:V2DF 0 "register_operand" "")
4586 (match_operand:V2DF 1 "memory_operand" "")
4588 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4589 (match_operand:SI 3 "const_int_operand" "")])))]
4590 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4591 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4593 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4596 (define_expand "avx_shufpd256"
4597 [(match_operand:V4DF 0 "register_operand" "")
4598 (match_operand:V4DF 1 "register_operand" "")
4599 (match_operand:V4DF 2 "nonimmediate_operand" "")
4600 (match_operand:SI 3 "const_int_operand" "")]
4603 int mask = INTVAL (operands[3]);
4604 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4606 GEN_INT (mask & 2 ? 5 : 4),
4607 GEN_INT (mask & 4 ? 3 : 2),
4608 GEN_INT (mask & 8 ? 7 : 6)));
4612 (define_insn "avx_shufpd256_1"
4613 [(set (match_operand:V4DF 0 "register_operand" "=x")
4616 (match_operand:V4DF 1 "register_operand" "x")
4617 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4618 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4619 (match_operand 4 "const_4_to_5_operand" "")
4620 (match_operand 5 "const_2_to_3_operand" "")
4621 (match_operand 6 "const_6_to_7_operand" "")])))]
4625 mask = INTVAL (operands[3]);
4626 mask |= (INTVAL (operands[4]) - 4) << 1;
4627 mask |= (INTVAL (operands[5]) - 2) << 2;
4628 mask |= (INTVAL (operands[6]) - 6) << 3;
4629 operands[3] = GEN_INT (mask);
4631 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4633 [(set_attr "type" "sselog")
4634 (set_attr "length_immediate" "1")
4635 (set_attr "prefix" "vex")
4636 (set_attr "mode" "V4DF")])
4638 (define_expand "sse2_shufpd"
4639 [(match_operand:V2DF 0 "register_operand" "")
4640 (match_operand:V2DF 1 "register_operand" "")
4641 (match_operand:V2DF 2 "nonimmediate_operand" "")
4642 (match_operand:SI 3 "const_int_operand" "")]
4645 int mask = INTVAL (operands[3]);
4646 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4648 GEN_INT (mask & 2 ? 3 : 2)));
4652 (define_expand "vec_extract_even<mode>"
4653 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4654 (match_operand:SSEMODE_EO 1 "register_operand" "")
4655 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4658 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4662 (define_expand "vec_extract_odd<mode>"
4663 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4664 (match_operand:SSEMODE_EO 1 "register_operand" "")
4665 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4668 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4672 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4673 (define_insn "*avx_interleave_highv2di"
4674 [(set (match_operand:V2DI 0 "register_operand" "=x")
4677 (match_operand:V2DI 1 "register_operand" "x")
4678 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4679 (parallel [(const_int 1)
4682 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4683 [(set_attr "type" "sselog")
4684 (set_attr "prefix" "vex")
4685 (set_attr "mode" "TI")])
4687 (define_insn "vec_interleave_highv2di"
4688 [(set (match_operand:V2DI 0 "register_operand" "=x")
4691 (match_operand:V2DI 1 "register_operand" "0")
4692 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4693 (parallel [(const_int 1)
4696 "punpckhqdq\t{%2, %0|%0, %2}"
4697 [(set_attr "type" "sselog")
4698 (set_attr "prefix_data16" "1")
4699 (set_attr "mode" "TI")])
4701 (define_insn "*avx_interleave_lowv2di"
4702 [(set (match_operand:V2DI 0 "register_operand" "=x")
4705 (match_operand:V2DI 1 "register_operand" "x")
4706 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4707 (parallel [(const_int 0)
4710 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4711 [(set_attr "type" "sselog")
4712 (set_attr "prefix" "vex")
4713 (set_attr "mode" "TI")])
4715 (define_insn "vec_interleave_lowv2di"
4716 [(set (match_operand:V2DI 0 "register_operand" "=x")
4719 (match_operand:V2DI 1 "register_operand" "0")
4720 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4721 (parallel [(const_int 0)
4724 "punpcklqdq\t{%2, %0|%0, %2}"
4725 [(set_attr "type" "sselog")
4726 (set_attr "prefix_data16" "1")
4727 (set_attr "mode" "TI")])
4729 (define_insn "*avx_shufpd_<mode>"
4730 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4731 (vec_select:SSEMODE2D
4732 (vec_concat:<ssedoublesizemode>
4733 (match_operand:SSEMODE2D 1 "register_operand" "x")
4734 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4735 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4736 (match_operand 4 "const_2_to_3_operand" "")])))]
4740 mask = INTVAL (operands[3]);
4741 mask |= (INTVAL (operands[4]) - 2) << 1;
4742 operands[3] = GEN_INT (mask);
4744 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4746 [(set_attr "type" "sselog")
4747 (set_attr "length_immediate" "1")
4748 (set_attr "prefix" "vex")
4749 (set_attr "mode" "V2DF")])
4751 (define_insn "sse2_shufpd_<mode>"
4752 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4753 (vec_select:SSEMODE2D
4754 (vec_concat:<ssedoublesizemode>
4755 (match_operand:SSEMODE2D 1 "register_operand" "0")
4756 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4757 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4758 (match_operand 4 "const_2_to_3_operand" "")])))]
4762 mask = INTVAL (operands[3]);
4763 mask |= (INTVAL (operands[4]) - 2) << 1;
4764 operands[3] = GEN_INT (mask);
4766 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4768 [(set_attr "type" "sselog")
4769 (set_attr "length_immediate" "1")
4770 (set_attr "mode" "V2DF")])
4772 ;; Avoid combining registers from different units in a single alternative,
4773 ;; see comment above inline_secondary_memory_needed function in i386.c
4774 (define_insn "*avx_storehpd"
4775 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4777 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4778 (parallel [(const_int 1)])))]
4779 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4781 vmovhpd\t{%1, %0|%0, %1}
4782 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4786 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4787 (set_attr "prefix" "vex")
4788 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4790 (define_insn "sse2_storehpd"
4791 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4793 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4794 (parallel [(const_int 1)])))]
4795 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4797 movhpd\t{%1, %0|%0, %1}
4802 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4803 (set_attr "prefix_data16" "1,*,*,*,*")
4804 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4807 [(set (match_operand:DF 0 "register_operand" "")
4809 (match_operand:V2DF 1 "memory_operand" "")
4810 (parallel [(const_int 1)])))]
4811 "TARGET_SSE2 && reload_completed"
4812 [(set (match_dup 0) (match_dup 1))]
4814 operands[1] = adjust_address (operands[1], DFmode, 8);
4817 ;; Avoid combining registers from different units in a single alternative,
4818 ;; see comment above inline_secondary_memory_needed function in i386.c
4819 (define_insn "sse2_storelpd"
4820 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4822 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4823 (parallel [(const_int 0)])))]
4824 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4826 %vmovlpd\t{%1, %0|%0, %1}
4831 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4832 (set_attr "prefix_data16" "1,*,*,*,*")
4833 (set_attr "prefix" "maybe_vex")
4834 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4837 [(set (match_operand:DF 0 "register_operand" "")
4839 (match_operand:V2DF 1 "nonimmediate_operand" "")
4840 (parallel [(const_int 0)])))]
4841 "TARGET_SSE2 && reload_completed"
4844 rtx op1 = operands[1];
4846 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4848 op1 = gen_lowpart (DFmode, op1);
4849 emit_move_insn (operands[0], op1);
4853 (define_expand "sse2_loadhpd_exp"
4854 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4857 (match_operand:V2DF 1 "nonimmediate_operand" "")
4858 (parallel [(const_int 0)]))
4859 (match_operand:DF 2 "nonimmediate_operand" "")))]
4861 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4863 ;; Avoid combining registers from different units in a single alternative,
4864 ;; see comment above inline_secondary_memory_needed function in i386.c
4865 (define_insn "*avx_loadhpd"
4866 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4869 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4870 (parallel [(const_int 0)]))
4871 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4872 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4874 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4875 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4879 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4880 (set_attr "prefix" "vex")
4881 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4883 (define_insn "sse2_loadhpd"
4884 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4887 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4888 (parallel [(const_int 0)]))
4889 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4890 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4892 movhpd\t{%2, %0|%0, %2}
4893 unpcklpd\t{%2, %0|%0, %2}
4894 shufpd\t{$1, %1, %0|%0, %1, 1}
4898 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4899 (set_attr "prefix_data16" "1,*,*,*,*,*")
4900 (set_attr "length_immediate" "*,*,1,*,*,*")
4901 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4904 [(set (match_operand:V2DF 0 "memory_operand" "")
4906 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4907 (match_operand:DF 1 "register_operand" "")))]
4908 "TARGET_SSE2 && reload_completed"
4909 [(set (match_dup 0) (match_dup 1))]
4911 operands[0] = adjust_address (operands[0], DFmode, 8);
4914 (define_expand "sse2_loadlpd_exp"
4915 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4917 (match_operand:DF 2 "nonimmediate_operand" "")
4919 (match_operand:V2DF 1 "nonimmediate_operand" "")
4920 (parallel [(const_int 1)]))))]
4922 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4924 ;; Avoid combining registers from different units in a single alternative,
4925 ;; see comment above inline_secondary_memory_needed function in i386.c
4926 (define_insn "*avx_loadlpd"
4927 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4929 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4931 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4932 (parallel [(const_int 1)]))))]
4933 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4935 vmovsd\t{%2, %0|%0, %2}
4936 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4937 vmovsd\t{%2, %1, %0|%0, %1, %2}
4938 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4942 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4943 (set_attr "prefix" "vex")
4944 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4946 (define_insn "sse2_loadlpd"
4947 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4949 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4951 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4952 (parallel [(const_int 1)]))))]
4953 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4955 movsd\t{%2, %0|%0, %2}
4956 movlpd\t{%2, %0|%0, %2}
4957 movsd\t{%2, %0|%0, %2}
4958 shufpd\t{$2, %2, %0|%0, %2, 2}
4959 movhpd\t{%H1, %0|%0, %H1}
4963 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4964 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4965 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4966 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4969 [(set (match_operand:V2DF 0 "memory_operand" "")
4971 (match_operand:DF 1 "register_operand" "")
4972 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4973 "TARGET_SSE2 && reload_completed"
4974 [(set (match_dup 0) (match_dup 1))]
4976 operands[0] = adjust_address (operands[0], DFmode, 8);
4979 ;; Not sure these two are ever used, but it doesn't hurt to have
4981 (define_insn "*vec_extractv2df_1_sse"
4982 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4984 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4985 (parallel [(const_int 1)])))]
4986 "!TARGET_SSE2 && TARGET_SSE
4987 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4989 movhps\t{%1, %0|%0, %1}
4990 movhlps\t{%1, %0|%0, %1}
4991 movlps\t{%H1, %0|%0, %H1}"
4992 [(set_attr "type" "ssemov")
4993 (set_attr "mode" "V2SF,V4SF,V2SF")])
4995 (define_insn "*vec_extractv2df_0_sse"
4996 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4998 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4999 (parallel [(const_int 0)])))]
5000 "!TARGET_SSE2 && TARGET_SSE
5001 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5003 movlps\t{%1, %0|%0, %1}
5004 movaps\t{%1, %0|%0, %1}
5005 movlps\t{%1, %0|%0, %1}"
5006 [(set_attr "type" "ssemov")
5007 (set_attr "mode" "V2SF,V4SF,V2SF")])
5009 (define_insn "*avx_movsd"
5010 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5012 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5013 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5017 vmovsd\t{%2, %1, %0|%0, %1, %2}
5018 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5019 vmovlpd\t{%2, %0|%0, %2}
5020 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5021 vmovhps\t{%1, %H0|%H0, %1}"
5022 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5023 (set_attr "prefix" "vex")
5024 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5026 (define_insn "sse2_movsd"
5027 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5029 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5030 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5034 movsd\t{%2, %0|%0, %2}
5035 movlpd\t{%2, %0|%0, %2}
5036 movlpd\t{%2, %0|%0, %2}
5037 shufpd\t{$2, %2, %0|%0, %2, 2}
5038 movhps\t{%H1, %0|%0, %H1}
5039 movhps\t{%1, %H0|%H0, %1}"
5040 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5041 (set_attr "prefix_data16" "*,1,1,*,*,*")
5042 (set_attr "length_immediate" "*,*,*,1,*,*")
5043 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5045 (define_insn "*vec_dupv2df_sse3"
5046 [(set (match_operand:V2DF 0 "register_operand" "=x")
5048 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5050 "%vmovddup\t{%1, %0|%0, %1}"
5051 [(set_attr "type" "sselog1")
5052 (set_attr "prefix" "maybe_vex")
5053 (set_attr "mode" "DF")])
5055 (define_insn "vec_dupv2df"
5056 [(set (match_operand:V2DF 0 "register_operand" "=x")
5058 (match_operand:DF 1 "register_operand" "0")))]
5061 [(set_attr "type" "sselog1")
5062 (set_attr "mode" "V2DF")])
5064 (define_insn "*vec_concatv2df_sse3"
5065 [(set (match_operand:V2DF 0 "register_operand" "=x")
5067 (match_operand:DF 1 "nonimmediate_operand" "xm")
5070 "%vmovddup\t{%1, %0|%0, %1}"
5071 [(set_attr "type" "sselog1")
5072 (set_attr "prefix" "maybe_vex")
5073 (set_attr "mode" "DF")])
5075 (define_insn "*vec_concatv2df_avx"
5076 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5078 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5079 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5082 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5083 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5084 vmovsd\t{%1, %0|%0, %1}"
5085 [(set_attr "type" "ssemov")
5086 (set_attr "prefix" "vex")
5087 (set_attr "mode" "DF,V1DF,DF")])
5089 (define_insn "*vec_concatv2df"
5090 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5092 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5093 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5096 unpcklpd\t{%2, %0|%0, %2}
5097 movhpd\t{%2, %0|%0, %2}
5098 movsd\t{%1, %0|%0, %1}
5099 movlhps\t{%2, %0|%0, %2}
5100 movhps\t{%2, %0|%0, %2}"
5101 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5102 (set_attr "prefix_data16" "*,1,*,*,*")
5103 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5107 ;; Parallel integral arithmetic
5109 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5111 (define_expand "neg<mode>2"
5112 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5115 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5117 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5119 (define_expand "<plusminus_insn><mode>3"
5120 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5122 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5123 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5125 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5127 (define_insn "*avx_<plusminus_insn><mode>3"
5128 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5130 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5131 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5132 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5133 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5134 [(set_attr "type" "sseiadd")
5135 (set_attr "prefix" "vex")
5136 (set_attr "mode" "TI")])
5138 (define_insn "*<plusminus_insn><mode>3"
5139 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5141 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5142 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5143 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5144 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5145 [(set_attr "type" "sseiadd")
5146 (set_attr "prefix_data16" "1")
5147 (set_attr "mode" "TI")])
5149 (define_expand "sse2_<plusminus_insn><mode>3"
5150 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5151 (sat_plusminus:SSEMODE12
5152 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5153 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5155 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5157 (define_insn "*avx_<plusminus_insn><mode>3"
5158 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5159 (sat_plusminus:SSEMODE12
5160 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5161 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5162 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5163 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5164 [(set_attr "type" "sseiadd")
5165 (set_attr "prefix" "vex")
5166 (set_attr "mode" "TI")])
5168 (define_insn "*sse2_<plusminus_insn><mode>3"
5169 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5170 (sat_plusminus:SSEMODE12
5171 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5172 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5173 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5174 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5175 [(set_attr "type" "sseiadd")
5176 (set_attr "prefix_data16" "1")
5177 (set_attr "mode" "TI")])
5179 (define_insn_and_split "mulv16qi3"
5180 [(set (match_operand:V16QI 0 "register_operand" "")
5181 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5182 (match_operand:V16QI 2 "register_operand" "")))]
5184 && can_create_pseudo_p ()"
5192 for (i = 0; i < 6; ++i)
5193 t[i] = gen_reg_rtx (V16QImode);
5195 /* Unpack data such that we've got a source byte in each low byte of
5196 each word. We don't care what goes into the high byte of each word.
5197 Rather than trying to get zero in there, most convenient is to let
5198 it be a copy of the low byte. */
5199 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5200 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5201 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5202 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5204 /* Multiply words. The end-of-line annotations here give a picture of what
5205 the output of that instruction looks like. Dot means don't care; the
5206 letters are the bytes of the result with A being the most significant. */
5207 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5208 gen_lowpart (V8HImode, t[0]),
5209 gen_lowpart (V8HImode, t[1])));
5210 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5211 gen_lowpart (V8HImode, t[2]),
5212 gen_lowpart (V8HImode, t[3])));
5214 /* Extract the even bytes and merge them back together. */
5215 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5219 (define_expand "mulv8hi3"
5220 [(set (match_operand:V8HI 0 "register_operand" "")
5221 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5222 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5224 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5226 (define_insn "*avx_mulv8hi3"
5227 [(set (match_operand:V8HI 0 "register_operand" "=x")
5228 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5229 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5230 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5231 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5232 [(set_attr "type" "sseimul")
5233 (set_attr "prefix" "vex")
5234 (set_attr "mode" "TI")])
5236 (define_insn "*mulv8hi3"
5237 [(set (match_operand:V8HI 0 "register_operand" "=x")
5238 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5240 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5241 "pmullw\t{%2, %0|%0, %2}"
5242 [(set_attr "type" "sseimul")
5243 (set_attr "prefix_data16" "1")
5244 (set_attr "mode" "TI")])
5246 (define_expand "smulv8hi3_highpart"
5247 [(set (match_operand:V8HI 0 "register_operand" "")
5252 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5254 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5257 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5259 (define_insn "*avxv8hi3_highpart"
5260 [(set (match_operand:V8HI 0 "register_operand" "=x")
5265 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5267 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5269 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5270 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5271 [(set_attr "type" "sseimul")
5272 (set_attr "prefix" "vex")
5273 (set_attr "mode" "TI")])
5275 (define_insn "*smulv8hi3_highpart"
5276 [(set (match_operand:V8HI 0 "register_operand" "=x")
5281 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5283 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5285 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5286 "pmulhw\t{%2, %0|%0, %2}"
5287 [(set_attr "type" "sseimul")
5288 (set_attr "prefix_data16" "1")
5289 (set_attr "mode" "TI")])
5291 (define_expand "umulv8hi3_highpart"
5292 [(set (match_operand:V8HI 0 "register_operand" "")
5297 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5299 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5302 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5304 (define_insn "*avx_umulv8hi3_highpart"
5305 [(set (match_operand:V8HI 0 "register_operand" "=x")
5310 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5312 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5314 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5315 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5316 [(set_attr "type" "sseimul")
5317 (set_attr "prefix" "vex")
5318 (set_attr "mode" "TI")])
5320 (define_insn "*umulv8hi3_highpart"
5321 [(set (match_operand:V8HI 0 "register_operand" "=x")
5326 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5328 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5330 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5331 "pmulhuw\t{%2, %0|%0, %2}"
5332 [(set_attr "type" "sseimul")
5333 (set_attr "prefix_data16" "1")
5334 (set_attr "mode" "TI")])
5336 (define_expand "sse2_umulv2siv2di3"
5337 [(set (match_operand:V2DI 0 "register_operand" "")
5341 (match_operand:V4SI 1 "nonimmediate_operand" "")
5342 (parallel [(const_int 0) (const_int 2)])))
5345 (match_operand:V4SI 2 "nonimmediate_operand" "")
5346 (parallel [(const_int 0) (const_int 2)])))))]
5348 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5350 (define_insn "*avx_umulv2siv2di3"
5351 [(set (match_operand:V2DI 0 "register_operand" "=x")
5355 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5356 (parallel [(const_int 0) (const_int 2)])))
5359 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5360 (parallel [(const_int 0) (const_int 2)])))))]
5361 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5362 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5363 [(set_attr "type" "sseimul")
5364 (set_attr "prefix" "vex")
5365 (set_attr "mode" "TI")])
5367 (define_insn "*sse2_umulv2siv2di3"
5368 [(set (match_operand:V2DI 0 "register_operand" "=x")
5372 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5373 (parallel [(const_int 0) (const_int 2)])))
5376 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5377 (parallel [(const_int 0) (const_int 2)])))))]
5378 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5379 "pmuludq\t{%2, %0|%0, %2}"
5380 [(set_attr "type" "sseimul")
5381 (set_attr "prefix_data16" "1")
5382 (set_attr "mode" "TI")])
5384 (define_expand "sse4_1_mulv2siv2di3"
5385 [(set (match_operand:V2DI 0 "register_operand" "")
5389 (match_operand:V4SI 1 "nonimmediate_operand" "")
5390 (parallel [(const_int 0) (const_int 2)])))
5393 (match_operand:V4SI 2 "nonimmediate_operand" "")
5394 (parallel [(const_int 0) (const_int 2)])))))]
5396 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5398 (define_insn "*avx_mulv2siv2di3"
5399 [(set (match_operand:V2DI 0 "register_operand" "=x")
5403 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5404 (parallel [(const_int 0) (const_int 2)])))
5407 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5408 (parallel [(const_int 0) (const_int 2)])))))]
5409 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5410 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5411 [(set_attr "type" "sseimul")
5412 (set_attr "prefix_extra" "1")
5413 (set_attr "prefix" "vex")
5414 (set_attr "mode" "TI")])
5416 (define_insn "*sse4_1_mulv2siv2di3"
5417 [(set (match_operand:V2DI 0 "register_operand" "=x")
5421 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5422 (parallel [(const_int 0) (const_int 2)])))
5425 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5426 (parallel [(const_int 0) (const_int 2)])))))]
5427 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5428 "pmuldq\t{%2, %0|%0, %2}"
5429 [(set_attr "type" "sseimul")
5430 (set_attr "prefix_extra" "1")
5431 (set_attr "mode" "TI")])
5433 (define_expand "sse2_pmaddwd"
5434 [(set (match_operand:V4SI 0 "register_operand" "")
5439 (match_operand:V8HI 1 "nonimmediate_operand" "")
5440 (parallel [(const_int 0)
5446 (match_operand:V8HI 2 "nonimmediate_operand" "")
5447 (parallel [(const_int 0)
5453 (vec_select:V4HI (match_dup 1)
5454 (parallel [(const_int 1)
5459 (vec_select:V4HI (match_dup 2)
5460 (parallel [(const_int 1)
5463 (const_int 7)]))))))]
5465 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5467 (define_insn "*avx_pmaddwd"
5468 [(set (match_operand:V4SI 0 "register_operand" "=x")
5473 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5474 (parallel [(const_int 0)
5480 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5481 (parallel [(const_int 0)
5487 (vec_select:V4HI (match_dup 1)
5488 (parallel [(const_int 1)
5493 (vec_select:V4HI (match_dup 2)
5494 (parallel [(const_int 1)
5497 (const_int 7)]))))))]
5498 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5499 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5500 [(set_attr "type" "sseiadd")
5501 (set_attr "prefix" "vex")
5502 (set_attr "mode" "TI")])
5504 (define_insn "*sse2_pmaddwd"
5505 [(set (match_operand:V4SI 0 "register_operand" "=x")
5510 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5511 (parallel [(const_int 0)
5517 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5518 (parallel [(const_int 0)
5524 (vec_select:V4HI (match_dup 1)
5525 (parallel [(const_int 1)
5530 (vec_select:V4HI (match_dup 2)
5531 (parallel [(const_int 1)
5534 (const_int 7)]))))))]
5535 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5536 "pmaddwd\t{%2, %0|%0, %2}"
5537 [(set_attr "type" "sseiadd")
5538 (set_attr "atom_unit" "simul")
5539 (set_attr "prefix_data16" "1")
5540 (set_attr "mode" "TI")])
5542 (define_expand "mulv4si3"
5543 [(set (match_operand:V4SI 0 "register_operand" "")
5544 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5545 (match_operand:V4SI 2 "register_operand" "")))]
5548 if (TARGET_SSE4_1 || TARGET_AVX)
5549 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5552 (define_insn "*avx_mulv4si3"
5553 [(set (match_operand:V4SI 0 "register_operand" "=x")
5554 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5555 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5556 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5557 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5558 [(set_attr "type" "sseimul")
5559 (set_attr "prefix_extra" "1")
5560 (set_attr "prefix" "vex")
5561 (set_attr "mode" "TI")])
5563 (define_insn "*sse4_1_mulv4si3"
5564 [(set (match_operand:V4SI 0 "register_operand" "=x")
5565 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5566 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5567 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5568 "pmulld\t{%2, %0|%0, %2}"
5569 [(set_attr "type" "sseimul")
5570 (set_attr "prefix_extra" "1")
5571 (set_attr "mode" "TI")])
5573 (define_insn_and_split "*sse2_mulv4si3"
5574 [(set (match_operand:V4SI 0 "register_operand" "")
5575 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5576 (match_operand:V4SI 2 "register_operand" "")))]
5577 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5578 && can_create_pseudo_p ()"
5583 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5589 t1 = gen_reg_rtx (V4SImode);
5590 t2 = gen_reg_rtx (V4SImode);
5591 t3 = gen_reg_rtx (V4SImode);
5592 t4 = gen_reg_rtx (V4SImode);
5593 t5 = gen_reg_rtx (V4SImode);
5594 t6 = gen_reg_rtx (V4SImode);
5595 thirtytwo = GEN_INT (32);
5597 /* Multiply elements 2 and 0. */
5598 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5601 /* Shift both input vectors down one element, so that elements 3
5602 and 1 are now in the slots for elements 2 and 0. For K8, at
5603 least, this is faster than using a shuffle. */
5604 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5605 gen_lowpart (V1TImode, op1),
5607 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5608 gen_lowpart (V1TImode, op2),
5610 /* Multiply elements 3 and 1. */
5611 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5614 /* Move the results in element 2 down to element 1; we don't care
5615 what goes in elements 2 and 3. */
5616 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5617 const0_rtx, const0_rtx));
5618 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5619 const0_rtx, const0_rtx));
5621 /* Merge the parts back together. */
5622 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5626 (define_insn_and_split "mulv2di3"
5627 [(set (match_operand:V2DI 0 "register_operand" "")
5628 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5629 (match_operand:V2DI 2 "register_operand" "")))]
5631 && can_create_pseudo_p ()"
5636 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5645 /* op1: A,B,C,D, op2: E,F,G,H */
5646 op1 = gen_lowpart (V4SImode, op1);
5647 op2 = gen_lowpart (V4SImode, op2);
5649 t1 = gen_reg_rtx (V4SImode);
5650 t2 = gen_reg_rtx (V4SImode);
5651 t3 = gen_reg_rtx (V2DImode);
5652 t4 = gen_reg_rtx (V2DImode);
5655 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5661 /* t2: (B*E),(A*F),(D*G),(C*H) */
5662 emit_insn (gen_mulv4si3 (t2, t1, op2));
5664 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5665 emit_insn (gen_xop_phadddq (t3, t2));
5667 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5668 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5670 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5671 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5675 t1 = gen_reg_rtx (V2DImode);
5676 t2 = gen_reg_rtx (V2DImode);
5677 t3 = gen_reg_rtx (V2DImode);
5678 t4 = gen_reg_rtx (V2DImode);
5679 t5 = gen_reg_rtx (V2DImode);
5680 t6 = gen_reg_rtx (V2DImode);
5681 thirtytwo = GEN_INT (32);
5683 /* Multiply low parts. */
5684 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5685 gen_lowpart (V4SImode, op2)));
5687 /* Shift input vectors left 32 bits so we can multiply high parts. */
5688 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5689 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5691 /* Multiply high parts by low parts. */
5692 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5693 gen_lowpart (V4SImode, t3)));
5694 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5695 gen_lowpart (V4SImode, t2)));
5697 /* Shift them back. */
5698 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5699 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5701 /* Add the three parts together. */
5702 emit_insn (gen_addv2di3 (t6, t1, t4));
5703 emit_insn (gen_addv2di3 (op0, t6, t5));
5708 (define_expand "vec_widen_smult_hi_v8hi"
5709 [(match_operand:V4SI 0 "register_operand" "")
5710 (match_operand:V8HI 1 "register_operand" "")
5711 (match_operand:V8HI 2 "register_operand" "")]
5714 rtx op1, op2, t1, t2, dest;
5718 t1 = gen_reg_rtx (V8HImode);
5719 t2 = gen_reg_rtx (V8HImode);
5720 dest = gen_lowpart (V8HImode, operands[0]);
5722 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5723 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5724 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5728 (define_expand "vec_widen_smult_lo_v8hi"
5729 [(match_operand:V4SI 0 "register_operand" "")
5730 (match_operand:V8HI 1 "register_operand" "")
5731 (match_operand:V8HI 2 "register_operand" "")]
5734 rtx op1, op2, t1, t2, dest;
5738 t1 = gen_reg_rtx (V8HImode);
5739 t2 = gen_reg_rtx (V8HImode);
5740 dest = gen_lowpart (V8HImode, operands[0]);
5742 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5743 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5744 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5748 (define_expand "vec_widen_umult_hi_v8hi"
5749 [(match_operand:V4SI 0 "register_operand" "")
5750 (match_operand:V8HI 1 "register_operand" "")
5751 (match_operand:V8HI 2 "register_operand" "")]
5754 rtx op1, op2, t1, t2, dest;
5758 t1 = gen_reg_rtx (V8HImode);
5759 t2 = gen_reg_rtx (V8HImode);
5760 dest = gen_lowpart (V8HImode, operands[0]);
5762 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5763 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5764 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5768 (define_expand "vec_widen_umult_lo_v8hi"
5769 [(match_operand:V4SI 0 "register_operand" "")
5770 (match_operand:V8HI 1 "register_operand" "")
5771 (match_operand:V8HI 2 "register_operand" "")]
5774 rtx op1, op2, t1, t2, dest;
5778 t1 = gen_reg_rtx (V8HImode);
5779 t2 = gen_reg_rtx (V8HImode);
5780 dest = gen_lowpart (V8HImode, operands[0]);
5782 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5783 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5784 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5788 (define_expand "vec_widen_smult_hi_v4si"
5789 [(match_operand:V2DI 0 "register_operand" "")
5790 (match_operand:V4SI 1 "register_operand" "")
5791 (match_operand:V4SI 2 "register_operand" "")]
5796 t1 = gen_reg_rtx (V4SImode);
5797 t2 = gen_reg_rtx (V4SImode);
5799 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5804 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5809 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5813 (define_expand "vec_widen_smult_lo_v4si"
5814 [(match_operand:V2DI 0 "register_operand" "")
5815 (match_operand:V4SI 1 "register_operand" "")
5816 (match_operand:V4SI 2 "register_operand" "")]
5821 t1 = gen_reg_rtx (V4SImode);
5822 t2 = gen_reg_rtx (V4SImode);
5824 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5829 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5834 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5838 (define_expand "vec_widen_umult_hi_v4si"
5839 [(match_operand:V2DI 0 "register_operand" "")
5840 (match_operand:V4SI 1 "register_operand" "")
5841 (match_operand:V4SI 2 "register_operand" "")]
5844 rtx op1, op2, t1, t2;
5848 t1 = gen_reg_rtx (V4SImode);
5849 t2 = gen_reg_rtx (V4SImode);
5851 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5852 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5853 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5857 (define_expand "vec_widen_umult_lo_v4si"
5858 [(match_operand:V2DI 0 "register_operand" "")
5859 (match_operand:V4SI 1 "register_operand" "")
5860 (match_operand:V4SI 2 "register_operand" "")]
5863 rtx op1, op2, t1, t2;
5867 t1 = gen_reg_rtx (V4SImode);
5868 t2 = gen_reg_rtx (V4SImode);
5870 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5871 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5872 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5876 (define_expand "sdot_prodv8hi"
5877 [(match_operand:V4SI 0 "register_operand" "")
5878 (match_operand:V8HI 1 "register_operand" "")
5879 (match_operand:V8HI 2 "register_operand" "")
5880 (match_operand:V4SI 3 "register_operand" "")]
5883 rtx t = gen_reg_rtx (V4SImode);
5884 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5885 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5889 (define_expand "udot_prodv4si"
5890 [(match_operand:V2DI 0 "register_operand" "")
5891 (match_operand:V4SI 1 "register_operand" "")
5892 (match_operand:V4SI 2 "register_operand" "")
5893 (match_operand:V2DI 3 "register_operand" "")]
5898 t1 = gen_reg_rtx (V2DImode);
5899 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5900 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5902 t2 = gen_reg_rtx (V4SImode);
5903 t3 = gen_reg_rtx (V4SImode);
5904 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5905 gen_lowpart (V1TImode, operands[1]),
5907 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5908 gen_lowpart (V1TImode, operands[2]),
5911 t4 = gen_reg_rtx (V2DImode);
5912 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5914 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5918 (define_insn "*avx_ashr<mode>3"
5919 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5921 (match_operand:SSEMODE24 1 "register_operand" "x")
5922 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5924 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5925 [(set_attr "type" "sseishft")
5926 (set_attr "prefix" "vex")
5927 (set (attr "length_immediate")
5928 (if_then_else (match_operand 2 "const_int_operand" "")
5930 (const_string "0")))
5931 (set_attr "mode" "TI")])
5933 (define_insn "ashr<mode>3"
5934 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5936 (match_operand:SSEMODE24 1 "register_operand" "0")
5937 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5939 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5940 [(set_attr "type" "sseishft")
5941 (set_attr "prefix_data16" "1")
5942 (set (attr "length_immediate")
5943 (if_then_else (match_operand 2 "const_int_operand" "")
5945 (const_string "0")))
5946 (set_attr "mode" "TI")])
5948 (define_insn "*avx_lshrv1ti3"
5949 [(set (match_operand:V1TI 0 "register_operand" "=x")
5951 (match_operand:V1TI 1 "register_operand" "x")
5952 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5955 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5956 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5958 [(set_attr "type" "sseishft")
5959 (set_attr "prefix" "vex")
5960 (set_attr "length_immediate" "1")
5961 (set_attr "mode" "TI")])
5963 (define_insn "*avx_lshr<mode>3"
5964 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5965 (lshiftrt:SSEMODE248
5966 (match_operand:SSEMODE248 1 "register_operand" "x")
5967 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5969 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5970 [(set_attr "type" "sseishft")
5971 (set_attr "prefix" "vex")
5972 (set (attr "length_immediate")
5973 (if_then_else (match_operand 2 "const_int_operand" "")
5975 (const_string "0")))
5976 (set_attr "mode" "TI")])
5978 (define_insn "sse2_lshrv1ti3"
5979 [(set (match_operand:V1TI 0 "register_operand" "=x")
5981 (match_operand:V1TI 1 "register_operand" "0")
5982 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5985 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5986 return "psrldq\t{%2, %0|%0, %2}";
5988 [(set_attr "type" "sseishft")
5989 (set_attr "prefix_data16" "1")
5990 (set_attr "length_immediate" "1")
5991 (set_attr "atom_unit" "sishuf")
5992 (set_attr "mode" "TI")])
5994 (define_insn "lshr<mode>3"
5995 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5996 (lshiftrt:SSEMODE248
5997 (match_operand:SSEMODE248 1 "register_operand" "0")
5998 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6000 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6001 [(set_attr "type" "sseishft")
6002 (set_attr "prefix_data16" "1")
6003 (set (attr "length_immediate")
6004 (if_then_else (match_operand 2 "const_int_operand" "")
6006 (const_string "0")))
6007 (set_attr "mode" "TI")])
6009 (define_insn "*avx_ashlv1ti3"
6010 [(set (match_operand:V1TI 0 "register_operand" "=x")
6011 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6012 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6015 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6016 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6018 [(set_attr "type" "sseishft")
6019 (set_attr "prefix" "vex")
6020 (set_attr "length_immediate" "1")
6021 (set_attr "mode" "TI")])
6023 (define_insn "*avx_ashl<mode>3"
6024 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6026 (match_operand:SSEMODE248 1 "register_operand" "x")
6027 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6029 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6030 [(set_attr "type" "sseishft")
6031 (set_attr "prefix" "vex")
6032 (set (attr "length_immediate")
6033 (if_then_else (match_operand 2 "const_int_operand" "")
6035 (const_string "0")))
6036 (set_attr "mode" "TI")])
6038 (define_insn "sse2_ashlv1ti3"
6039 [(set (match_operand:V1TI 0 "register_operand" "=x")
6040 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6041 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6044 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6045 return "pslldq\t{%2, %0|%0, %2}";
6047 [(set_attr "type" "sseishft")
6048 (set_attr "prefix_data16" "1")
6049 (set_attr "length_immediate" "1")
6050 (set_attr "mode" "TI")])
6052 (define_insn "ashl<mode>3"
6053 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6055 (match_operand:SSEMODE248 1 "register_operand" "0")
6056 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6058 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6059 [(set_attr "type" "sseishft")
6060 (set_attr "prefix_data16" "1")
6061 (set (attr "length_immediate")
6062 (if_then_else (match_operand 2 "const_int_operand" "")
6064 (const_string "0")))
6065 (set_attr "mode" "TI")])
6067 (define_expand "vec_shl_<mode>"
6068 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6070 (match_operand:SSEMODEI 1 "register_operand" "")
6071 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6074 operands[0] = gen_lowpart (V1TImode, operands[0]);
6075 operands[1] = gen_lowpart (V1TImode, operands[1]);
6078 (define_expand "vec_shr_<mode>"
6079 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6081 (match_operand:SSEMODEI 1 "register_operand" "")
6082 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6085 operands[0] = gen_lowpart (V1TImode, operands[0]);
6086 operands[1] = gen_lowpart (V1TImode, operands[1]);
6089 (define_insn "*avx_<code><mode>3"
6090 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6092 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6093 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6094 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6095 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6096 [(set_attr "type" "sseiadd")
6097 (set (attr "prefix_extra")
6099 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6102 (const_string "0")))
6103 (set_attr "prefix" "vex")
6104 (set_attr "mode" "TI")])
6106 (define_expand "<code>v16qi3"
6107 [(set (match_operand:V16QI 0 "register_operand" "")
6109 (match_operand:V16QI 1 "nonimmediate_operand" "")
6110 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6112 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6114 (define_insn "*<code>v16qi3"
6115 [(set (match_operand:V16QI 0 "register_operand" "=x")
6117 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6118 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6119 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6120 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6121 [(set_attr "type" "sseiadd")
6122 (set_attr "prefix_data16" "1")
6123 (set_attr "mode" "TI")])
6125 (define_expand "<code>v8hi3"
6126 [(set (match_operand:V8HI 0 "register_operand" "")
6128 (match_operand:V8HI 1 "nonimmediate_operand" "")
6129 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6131 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6133 (define_insn "*<code>v8hi3"
6134 [(set (match_operand:V8HI 0 "register_operand" "=x")
6136 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6137 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6138 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6139 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6140 [(set_attr "type" "sseiadd")
6141 (set_attr "prefix_data16" "1")
6142 (set_attr "mode" "TI")])
6144 (define_expand "umaxv8hi3"
6145 [(set (match_operand:V8HI 0 "register_operand" "")
6146 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6147 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6151 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6154 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6155 if (rtx_equal_p (op3, op2))
6156 op3 = gen_reg_rtx (V8HImode);
6157 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6158 emit_insn (gen_addv8hi3 (op0, op3, op2));
6163 (define_expand "smax<mode>3"
6164 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6165 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6166 (match_operand:SSEMODE14 2 "register_operand" "")))]
6170 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6176 xops[0] = operands[0];
6177 xops[1] = operands[1];
6178 xops[2] = operands[2];
6179 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6180 xops[4] = operands[1];
6181 xops[5] = operands[2];
6182 ok = ix86_expand_int_vcond (xops);
6188 (define_insn "*sse4_1_<code><mode>3"
6189 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6191 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6192 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6193 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6194 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6195 [(set_attr "type" "sseiadd")
6196 (set_attr "prefix_extra" "1")
6197 (set_attr "mode" "TI")])
6199 (define_expand "smaxv2di3"
6200 [(set (match_operand:V2DI 0 "register_operand" "")
6201 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6202 (match_operand:V2DI 2 "register_operand" "")))]
6208 xops[0] = operands[0];
6209 xops[1] = operands[1];
6210 xops[2] = operands[2];
6211 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6212 xops[4] = operands[1];
6213 xops[5] = operands[2];
6214 ok = ix86_expand_int_vcond (xops);
6219 (define_expand "umaxv4si3"
6220 [(set (match_operand:V4SI 0 "register_operand" "")
6221 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6222 (match_operand:V4SI 2 "register_operand" "")))]
6226 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6232 xops[0] = operands[0];
6233 xops[1] = operands[1];
6234 xops[2] = operands[2];
6235 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6236 xops[4] = operands[1];
6237 xops[5] = operands[2];
6238 ok = ix86_expand_int_vcond (xops);
6244 (define_insn "*sse4_1_<code><mode>3"
6245 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6247 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6248 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6249 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6250 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6251 [(set_attr "type" "sseiadd")
6252 (set_attr "prefix_extra" "1")
6253 (set_attr "mode" "TI")])
6255 (define_expand "umaxv2di3"
6256 [(set (match_operand:V2DI 0 "register_operand" "")
6257 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6258 (match_operand:V2DI 2 "register_operand" "")))]
6264 xops[0] = operands[0];
6265 xops[1] = operands[1];
6266 xops[2] = operands[2];
6267 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6268 xops[4] = operands[1];
6269 xops[5] = operands[2];
6270 ok = ix86_expand_int_vcond (xops);
6275 (define_expand "smin<mode>3"
6276 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6277 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6278 (match_operand:SSEMODE14 2 "register_operand" "")))]
6282 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6288 xops[0] = operands[0];
6289 xops[1] = operands[2];
6290 xops[2] = operands[1];
6291 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6292 xops[4] = operands[1];
6293 xops[5] = operands[2];
6294 ok = ix86_expand_int_vcond (xops);
6300 (define_expand "sminv2di3"
6301 [(set (match_operand:V2DI 0 "register_operand" "")
6302 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6303 (match_operand:V2DI 2 "register_operand" "")))]
6309 xops[0] = operands[0];
6310 xops[1] = operands[2];
6311 xops[2] = operands[1];
6312 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6313 xops[4] = operands[1];
6314 xops[5] = operands[2];
6315 ok = ix86_expand_int_vcond (xops);
6320 (define_expand "umin<mode>3"
6321 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6322 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6323 (match_operand:SSEMODE24 2 "register_operand" "")))]
6327 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6333 xops[0] = operands[0];
6334 xops[1] = operands[2];
6335 xops[2] = operands[1];
6336 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6337 xops[4] = operands[1];
6338 xops[5] = operands[2];
6339 ok = ix86_expand_int_vcond (xops);
6345 (define_expand "uminv2di3"
6346 [(set (match_operand:V2DI 0 "register_operand" "")
6347 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6348 (match_operand:V2DI 2 "register_operand" "")))]
6354 xops[0] = operands[0];
6355 xops[1] = operands[2];
6356 xops[2] = operands[1];
6357 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6358 xops[4] = operands[1];
6359 xops[5] = operands[2];
6360 ok = ix86_expand_int_vcond (xops);
6365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6367 ;; Parallel integral comparisons
6369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6371 (define_expand "sse2_eq<mode>3"
6372 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6374 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6375 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6376 "TARGET_SSE2 && !TARGET_XOP "
6377 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6379 (define_insn "*avx_eq<mode>3"
6380 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6382 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6383 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6384 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6385 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6386 [(set_attr "type" "ssecmp")
6387 (set (attr "prefix_extra")
6388 (if_then_else (match_operand:V2DI 0 "" "")
6390 (const_string "*")))
6391 (set_attr "prefix" "vex")
6392 (set_attr "mode" "TI")])
6394 (define_insn "*sse2_eq<mode>3"
6395 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6397 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6398 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6399 "TARGET_SSE2 && !TARGET_XOP
6400 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6401 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6402 [(set_attr "type" "ssecmp")
6403 (set_attr "prefix_data16" "1")
6404 (set_attr "mode" "TI")])
6406 (define_expand "sse4_1_eqv2di3"
6407 [(set (match_operand:V2DI 0 "register_operand" "")
6409 (match_operand:V2DI 1 "nonimmediate_operand" "")
6410 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6412 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6414 (define_insn "*sse4_1_eqv2di3"
6415 [(set (match_operand:V2DI 0 "register_operand" "=x")
6417 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6418 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6419 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6420 "pcmpeqq\t{%2, %0|%0, %2}"
6421 [(set_attr "type" "ssecmp")
6422 (set_attr "prefix_extra" "1")
6423 (set_attr "mode" "TI")])
6425 (define_insn "*avx_gt<mode>3"
6426 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6428 (match_operand:SSEMODE1248 1 "register_operand" "x")
6429 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6431 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6432 [(set_attr "type" "ssecmp")
6433 (set (attr "prefix_extra")
6434 (if_then_else (match_operand:V2DI 0 "" "")
6436 (const_string "*")))
6437 (set_attr "prefix" "vex")
6438 (set_attr "mode" "TI")])
6440 (define_insn "sse2_gt<mode>3"
6441 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6443 (match_operand:SSEMODE124 1 "register_operand" "0")
6444 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6445 "TARGET_SSE2 && !TARGET_XOP"
6446 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6447 [(set_attr "type" "ssecmp")
6448 (set_attr "prefix_data16" "1")
6449 (set_attr "mode" "TI")])
6451 (define_insn "sse4_2_gtv2di3"
6452 [(set (match_operand:V2DI 0 "register_operand" "=x")
6454 (match_operand:V2DI 1 "register_operand" "0")
6455 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6457 "pcmpgtq\t{%2, %0|%0, %2}"
6458 [(set_attr "type" "ssecmp")
6459 (set_attr "prefix_extra" "1")
6460 (set_attr "mode" "TI")])
6462 (define_expand "vcond<mode>"
6463 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6464 (if_then_else:SSEMODE124C8
6465 (match_operator 3 ""
6466 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6467 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6468 (match_operand:SSEMODE124C8 1 "general_operand" "")
6469 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6472 bool ok = ix86_expand_int_vcond (operands);
6477 (define_expand "vcondu<mode>"
6478 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6479 (if_then_else:SSEMODE124C8
6480 (match_operator 3 ""
6481 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6482 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6483 (match_operand:SSEMODE124C8 1 "general_operand" "")
6484 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6487 bool ok = ix86_expand_int_vcond (operands);
6492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6494 ;; Parallel bitwise logical operations
6496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6498 (define_expand "one_cmpl<mode>2"
6499 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6500 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6504 int i, n = GET_MODE_NUNITS (<MODE>mode);
6505 rtvec v = rtvec_alloc (n);
6507 for (i = 0; i < n; ++i)
6508 RTVEC_ELT (v, i) = constm1_rtx;
6510 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6513 (define_insn "*avx_andnot<mode>3"
6514 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6516 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6517 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6519 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6520 [(set_attr "type" "sselog")
6521 (set_attr "prefix" "vex")
6522 (set_attr "mode" "<avxvecpsmode>")])
6524 (define_insn "*sse_andnot<mode>3"
6525 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6527 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6528 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6529 "(TARGET_SSE && !TARGET_SSE2)"
6530 "andnps\t{%2, %0|%0, %2}"
6531 [(set_attr "type" "sselog")
6532 (set_attr "mode" "V4SF")])
6534 (define_insn "*avx_andnot<mode>3"
6535 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6537 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6538 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6540 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6541 [(set_attr "type" "sselog")
6542 (set_attr "prefix" "vex")
6543 (set_attr "mode" "TI")])
6545 (define_insn "sse2_andnot<mode>3"
6546 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6548 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6549 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6551 "pandn\t{%2, %0|%0, %2}"
6552 [(set_attr "type" "sselog")
6553 (set_attr "prefix_data16" "1")
6554 (set_attr "mode" "TI")])
6556 (define_insn "*andnottf3"
6557 [(set (match_operand:TF 0 "register_operand" "=x")
6559 (not:TF (match_operand:TF 1 "register_operand" "0"))
6560 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6562 "pandn\t{%2, %0|%0, %2}"
6563 [(set_attr "type" "sselog")
6564 (set_attr "prefix_data16" "1")
6565 (set_attr "mode" "TI")])
6567 (define_expand "<code><mode>3"
6568 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6570 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6571 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6573 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6575 (define_insn "*avx_<code><mode>3"
6576 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6577 (any_logic:AVX256MODEI
6578 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6579 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6581 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6582 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6583 [(set_attr "type" "sselog")
6584 (set_attr "prefix" "vex")
6585 (set_attr "mode" "<avxvecpsmode>")])
6587 (define_insn "*sse_<code><mode>3"
6588 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6590 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6591 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6592 "(TARGET_SSE && !TARGET_SSE2)
6593 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6594 "<logic>ps\t{%2, %0|%0, %2}"
6595 [(set_attr "type" "sselog")
6596 (set_attr "mode" "V4SF")])
6598 (define_insn "*avx_<code><mode>3"
6599 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6601 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6602 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6604 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6605 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6606 [(set_attr "type" "sselog")
6607 (set_attr "prefix" "vex")
6608 (set_attr "mode" "TI")])
6610 (define_insn "*sse2_<code><mode>3"
6611 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6613 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6614 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6615 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6616 "p<logic>\t{%2, %0|%0, %2}"
6617 [(set_attr "type" "sselog")
6618 (set_attr "prefix_data16" "1")
6619 (set_attr "mode" "TI")])
6621 (define_expand "<code>tf3"
6622 [(set (match_operand:TF 0 "register_operand" "")
6624 (match_operand:TF 1 "nonimmediate_operand" "")
6625 (match_operand:TF 2 "nonimmediate_operand" "")))]
6627 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6629 (define_insn "*<code>tf3"
6630 [(set (match_operand:TF 0 "register_operand" "=x")
6632 (match_operand:TF 1 "nonimmediate_operand" "%0")
6633 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6634 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6635 "p<logic>\t{%2, %0|%0, %2}"
6636 [(set_attr "type" "sselog")
6637 (set_attr "prefix_data16" "1")
6638 (set_attr "mode" "TI")])
6640 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6642 ;; Parallel integral element swizzling
6644 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6646 (define_expand "vec_pack_trunc_v8hi"
6647 [(match_operand:V16QI 0 "register_operand" "")
6648 (match_operand:V8HI 1 "register_operand" "")
6649 (match_operand:V8HI 2 "register_operand" "")]
6652 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6653 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6654 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6658 (define_expand "vec_pack_trunc_v4si"
6659 [(match_operand:V8HI 0 "register_operand" "")
6660 (match_operand:V4SI 1 "register_operand" "")
6661 (match_operand:V4SI 2 "register_operand" "")]
6664 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6665 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6666 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6670 (define_expand "vec_pack_trunc_v2di"
6671 [(match_operand:V4SI 0 "register_operand" "")
6672 (match_operand:V2DI 1 "register_operand" "")
6673 (match_operand:V2DI 2 "register_operand" "")]
6676 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6677 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6678 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6682 (define_insn "*avx_packsswb"
6683 [(set (match_operand:V16QI 0 "register_operand" "=x")
6686 (match_operand:V8HI 1 "register_operand" "x"))
6688 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6690 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6691 [(set_attr "type" "sselog")
6692 (set_attr "prefix" "vex")
6693 (set_attr "mode" "TI")])
6695 (define_insn "sse2_packsswb"
6696 [(set (match_operand:V16QI 0 "register_operand" "=x")
6699 (match_operand:V8HI 1 "register_operand" "0"))
6701 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6703 "packsswb\t{%2, %0|%0, %2}"
6704 [(set_attr "type" "sselog")
6705 (set_attr "prefix_data16" "1")
6706 (set_attr "mode" "TI")])
6708 (define_insn "*avx_packssdw"
6709 [(set (match_operand:V8HI 0 "register_operand" "=x")
6712 (match_operand:V4SI 1 "register_operand" "x"))
6714 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6716 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6717 [(set_attr "type" "sselog")
6718 (set_attr "prefix" "vex")
6719 (set_attr "mode" "TI")])
6721 (define_insn "sse2_packssdw"
6722 [(set (match_operand:V8HI 0 "register_operand" "=x")
6725 (match_operand:V4SI 1 "register_operand" "0"))
6727 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6729 "packssdw\t{%2, %0|%0, %2}"
6730 [(set_attr "type" "sselog")
6731 (set_attr "prefix_data16" "1")
6732 (set_attr "mode" "TI")])
6734 (define_insn "*avx_packuswb"
6735 [(set (match_operand:V16QI 0 "register_operand" "=x")
6738 (match_operand:V8HI 1 "register_operand" "x"))
6740 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6742 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6743 [(set_attr "type" "sselog")
6744 (set_attr "prefix" "vex")
6745 (set_attr "mode" "TI")])
6747 (define_insn "sse2_packuswb"
6748 [(set (match_operand:V16QI 0 "register_operand" "=x")
6751 (match_operand:V8HI 1 "register_operand" "0"))
6753 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6755 "packuswb\t{%2, %0|%0, %2}"
6756 [(set_attr "type" "sselog")
6757 (set_attr "prefix_data16" "1")
6758 (set_attr "mode" "TI")])
6760 (define_insn "*avx_interleave_highv16qi"
6761 [(set (match_operand:V16QI 0 "register_operand" "=x")
6764 (match_operand:V16QI 1 "register_operand" "x")
6765 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6766 (parallel [(const_int 8) (const_int 24)
6767 (const_int 9) (const_int 25)
6768 (const_int 10) (const_int 26)
6769 (const_int 11) (const_int 27)
6770 (const_int 12) (const_int 28)
6771 (const_int 13) (const_int 29)
6772 (const_int 14) (const_int 30)
6773 (const_int 15) (const_int 31)])))]
6775 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6776 [(set_attr "type" "sselog")
6777 (set_attr "prefix" "vex")
6778 (set_attr "mode" "TI")])
6780 (define_insn "vec_interleave_highv16qi"
6781 [(set (match_operand:V16QI 0 "register_operand" "=x")
6784 (match_operand:V16QI 1 "register_operand" "0")
6785 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6786 (parallel [(const_int 8) (const_int 24)
6787 (const_int 9) (const_int 25)
6788 (const_int 10) (const_int 26)
6789 (const_int 11) (const_int 27)
6790 (const_int 12) (const_int 28)
6791 (const_int 13) (const_int 29)
6792 (const_int 14) (const_int 30)
6793 (const_int 15) (const_int 31)])))]
6795 "punpckhbw\t{%2, %0|%0, %2}"
6796 [(set_attr "type" "sselog")
6797 (set_attr "prefix_data16" "1")
6798 (set_attr "mode" "TI")])
6800 (define_insn "*avx_interleave_lowv16qi"
6801 [(set (match_operand:V16QI 0 "register_operand" "=x")
6804 (match_operand:V16QI 1 "register_operand" "x")
6805 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6806 (parallel [(const_int 0) (const_int 16)
6807 (const_int 1) (const_int 17)
6808 (const_int 2) (const_int 18)
6809 (const_int 3) (const_int 19)
6810 (const_int 4) (const_int 20)
6811 (const_int 5) (const_int 21)
6812 (const_int 6) (const_int 22)
6813 (const_int 7) (const_int 23)])))]
6815 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6816 [(set_attr "type" "sselog")
6817 (set_attr "prefix" "vex")
6818 (set_attr "mode" "TI")])
6820 (define_insn "vec_interleave_lowv16qi"
6821 [(set (match_operand:V16QI 0 "register_operand" "=x")
6824 (match_operand:V16QI 1 "register_operand" "0")
6825 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6826 (parallel [(const_int 0) (const_int 16)
6827 (const_int 1) (const_int 17)
6828 (const_int 2) (const_int 18)
6829 (const_int 3) (const_int 19)
6830 (const_int 4) (const_int 20)
6831 (const_int 5) (const_int 21)
6832 (const_int 6) (const_int 22)
6833 (const_int 7) (const_int 23)])))]
6835 "punpcklbw\t{%2, %0|%0, %2}"
6836 [(set_attr "type" "sselog")
6837 (set_attr "prefix_data16" "1")
6838 (set_attr "mode" "TI")])
6840 (define_insn "*avx_interleave_highv8hi"
6841 [(set (match_operand:V8HI 0 "register_operand" "=x")
6844 (match_operand:V8HI 1 "register_operand" "x")
6845 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6846 (parallel [(const_int 4) (const_int 12)
6847 (const_int 5) (const_int 13)
6848 (const_int 6) (const_int 14)
6849 (const_int 7) (const_int 15)])))]
6851 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6852 [(set_attr "type" "sselog")
6853 (set_attr "prefix" "vex")
6854 (set_attr "mode" "TI")])
6856 (define_insn "vec_interleave_highv8hi"
6857 [(set (match_operand:V8HI 0 "register_operand" "=x")
6860 (match_operand:V8HI 1 "register_operand" "0")
6861 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6862 (parallel [(const_int 4) (const_int 12)
6863 (const_int 5) (const_int 13)
6864 (const_int 6) (const_int 14)
6865 (const_int 7) (const_int 15)])))]
6867 "punpckhwd\t{%2, %0|%0, %2}"
6868 [(set_attr "type" "sselog")
6869 (set_attr "prefix_data16" "1")
6870 (set_attr "mode" "TI")])
6872 (define_insn "*avx_interleave_lowv8hi"
6873 [(set (match_operand:V8HI 0 "register_operand" "=x")
6876 (match_operand:V8HI 1 "register_operand" "x")
6877 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6878 (parallel [(const_int 0) (const_int 8)
6879 (const_int 1) (const_int 9)
6880 (const_int 2) (const_int 10)
6881 (const_int 3) (const_int 11)])))]
6883 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6884 [(set_attr "type" "sselog")
6885 (set_attr "prefix" "vex")
6886 (set_attr "mode" "TI")])
6888 (define_insn "vec_interleave_lowv8hi"
6889 [(set (match_operand:V8HI 0 "register_operand" "=x")
6892 (match_operand:V8HI 1 "register_operand" "0")
6893 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6894 (parallel [(const_int 0) (const_int 8)
6895 (const_int 1) (const_int 9)
6896 (const_int 2) (const_int 10)
6897 (const_int 3) (const_int 11)])))]
6899 "punpcklwd\t{%2, %0|%0, %2}"
6900 [(set_attr "type" "sselog")
6901 (set_attr "prefix_data16" "1")
6902 (set_attr "mode" "TI")])
6904 (define_insn "*avx_interleave_highv4si"
6905 [(set (match_operand:V4SI 0 "register_operand" "=x")
6908 (match_operand:V4SI 1 "register_operand" "x")
6909 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6910 (parallel [(const_int 2) (const_int 6)
6911 (const_int 3) (const_int 7)])))]
6913 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6914 [(set_attr "type" "sselog")
6915 (set_attr "prefix" "vex")
6916 (set_attr "mode" "TI")])
6918 (define_insn "vec_interleave_highv4si"
6919 [(set (match_operand:V4SI 0 "register_operand" "=x")
6922 (match_operand:V4SI 1 "register_operand" "0")
6923 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6924 (parallel [(const_int 2) (const_int 6)
6925 (const_int 3) (const_int 7)])))]
6927 "punpckhdq\t{%2, %0|%0, %2}"
6928 [(set_attr "type" "sselog")
6929 (set_attr "prefix_data16" "1")
6930 (set_attr "mode" "TI")])
6932 (define_insn "*avx_interleave_lowv4si"
6933 [(set (match_operand:V4SI 0 "register_operand" "=x")
6936 (match_operand:V4SI 1 "register_operand" "x")
6937 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6938 (parallel [(const_int 0) (const_int 4)
6939 (const_int 1) (const_int 5)])))]
6941 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6942 [(set_attr "type" "sselog")
6943 (set_attr "prefix" "vex")
6944 (set_attr "mode" "TI")])
6946 (define_insn "vec_interleave_lowv4si"
6947 [(set (match_operand:V4SI 0 "register_operand" "=x")
6950 (match_operand:V4SI 1 "register_operand" "0")
6951 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6952 (parallel [(const_int 0) (const_int 4)
6953 (const_int 1) (const_int 5)])))]
6955 "punpckldq\t{%2, %0|%0, %2}"
6956 [(set_attr "type" "sselog")
6957 (set_attr "prefix_data16" "1")
6958 (set_attr "mode" "TI")])
6960 (define_insn "*avx_pinsr<ssevecsize>"
6961 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6962 (vec_merge:SSEMODE124
6963 (vec_duplicate:SSEMODE124
6964 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6965 (match_operand:SSEMODE124 1 "register_operand" "x")
6966 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6969 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6970 if (MEM_P (operands[2]))
6971 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6973 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6975 [(set_attr "type" "sselog")
6976 (set (attr "prefix_extra")
6977 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6979 (const_string "1")))
6980 (set_attr "length_immediate" "1")
6981 (set_attr "prefix" "vex")
6982 (set_attr "mode" "TI")])
6984 (define_insn "*sse4_1_pinsrb"
6985 [(set (match_operand:V16QI 0 "register_operand" "=x")
6987 (vec_duplicate:V16QI
6988 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6989 (match_operand:V16QI 1 "register_operand" "0")
6990 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6993 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6994 if (MEM_P (operands[2]))
6995 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6997 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6999 [(set_attr "type" "sselog")
7000 (set_attr "prefix_extra" "1")
7001 (set_attr "length_immediate" "1")
7002 (set_attr "mode" "TI")])
7004 (define_insn "*sse2_pinsrw"
7005 [(set (match_operand:V8HI 0 "register_operand" "=x")
7008 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7009 (match_operand:V8HI 1 "register_operand" "0")
7010 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7013 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7014 if (MEM_P (operands[2]))
7015 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7017 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7019 [(set_attr "type" "sselog")
7020 (set_attr "prefix_data16" "1")
7021 (set_attr "length_immediate" "1")
7022 (set_attr "mode" "TI")])
7024 ;; It must come before sse2_loadld since it is preferred.
7025 (define_insn "*sse4_1_pinsrd"
7026 [(set (match_operand:V4SI 0 "register_operand" "=x")
7029 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7030 (match_operand:V4SI 1 "register_operand" "0")
7031 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7034 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7035 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7037 [(set_attr "type" "sselog")
7038 (set_attr "prefix_extra" "1")
7039 (set_attr "length_immediate" "1")
7040 (set_attr "mode" "TI")])
7042 (define_insn "*avx_pinsrq"
7043 [(set (match_operand:V2DI 0 "register_operand" "=x")
7046 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7047 (match_operand:V2DI 1 "register_operand" "x")
7048 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7049 "TARGET_AVX && TARGET_64BIT"
7051 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7052 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7054 [(set_attr "type" "sselog")
7055 (set_attr "prefix_extra" "1")
7056 (set_attr "length_immediate" "1")
7057 (set_attr "prefix" "vex")
7058 (set_attr "mode" "TI")])
7060 (define_insn "*sse4_1_pinsrq"
7061 [(set (match_operand:V2DI 0 "register_operand" "=x")
7064 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7065 (match_operand:V2DI 1 "register_operand" "0")
7066 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7067 "TARGET_SSE4_1 && TARGET_64BIT"
7069 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7070 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7072 [(set_attr "type" "sselog")
7073 (set_attr "prefix_rex" "1")
7074 (set_attr "prefix_extra" "1")
7075 (set_attr "length_immediate" "1")
7076 (set_attr "mode" "TI")])
7078 (define_insn "*sse4_1_pextrb"
7079 [(set (match_operand:SI 0 "register_operand" "=r")
7082 (match_operand:V16QI 1 "register_operand" "x")
7083 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7085 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7086 [(set_attr "type" "sselog")
7087 (set_attr "prefix_extra" "1")
7088 (set_attr "length_immediate" "1")
7089 (set_attr "prefix" "maybe_vex")
7090 (set_attr "mode" "TI")])
7092 (define_insn "*sse4_1_pextrb_memory"
7093 [(set (match_operand:QI 0 "memory_operand" "=m")
7095 (match_operand:V16QI 1 "register_operand" "x")
7096 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7098 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7099 [(set_attr "type" "sselog")
7100 (set_attr "prefix_extra" "1")
7101 (set_attr "length_immediate" "1")
7102 (set_attr "prefix" "maybe_vex")
7103 (set_attr "mode" "TI")])
7105 (define_insn "*sse2_pextrw"
7106 [(set (match_operand:SI 0 "register_operand" "=r")
7109 (match_operand:V8HI 1 "register_operand" "x")
7110 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7112 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7113 [(set_attr "type" "sselog")
7114 (set_attr "prefix_data16" "1")
7115 (set_attr "length_immediate" "1")
7116 (set_attr "prefix" "maybe_vex")
7117 (set_attr "mode" "TI")])
7119 (define_insn "*sse4_1_pextrw_memory"
7120 [(set (match_operand:HI 0 "memory_operand" "=m")
7122 (match_operand:V8HI 1 "register_operand" "x")
7123 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7125 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7126 [(set_attr "type" "sselog")
7127 (set_attr "prefix_extra" "1")
7128 (set_attr "length_immediate" "1")
7129 (set_attr "prefix" "maybe_vex")
7130 (set_attr "mode" "TI")])
7132 (define_insn "*sse4_1_pextrd"
7133 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7135 (match_operand:V4SI 1 "register_operand" "x")
7136 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7138 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7139 [(set_attr "type" "sselog")
7140 (set_attr "prefix_extra" "1")
7141 (set_attr "length_immediate" "1")
7142 (set_attr "prefix" "maybe_vex")
7143 (set_attr "mode" "TI")])
7145 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7146 (define_insn "*sse4_1_pextrq"
7147 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7149 (match_operand:V2DI 1 "register_operand" "x")
7150 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7151 "TARGET_SSE4_1 && TARGET_64BIT"
7152 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7153 [(set_attr "type" "sselog")
7154 (set_attr "prefix_rex" "1")
7155 (set_attr "prefix_extra" "1")
7156 (set_attr "length_immediate" "1")
7157 (set_attr "prefix" "maybe_vex")
7158 (set_attr "mode" "TI")])
7160 (define_expand "sse2_pshufd"
7161 [(match_operand:V4SI 0 "register_operand" "")
7162 (match_operand:V4SI 1 "nonimmediate_operand" "")
7163 (match_operand:SI 2 "const_int_operand" "")]
7166 int mask = INTVAL (operands[2]);
7167 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7168 GEN_INT ((mask >> 0) & 3),
7169 GEN_INT ((mask >> 2) & 3),
7170 GEN_INT ((mask >> 4) & 3),
7171 GEN_INT ((mask >> 6) & 3)));
7175 (define_insn "sse2_pshufd_1"
7176 [(set (match_operand:V4SI 0 "register_operand" "=x")
7178 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7179 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7180 (match_operand 3 "const_0_to_3_operand" "")
7181 (match_operand 4 "const_0_to_3_operand" "")
7182 (match_operand 5 "const_0_to_3_operand" "")])))]
7186 mask |= INTVAL (operands[2]) << 0;
7187 mask |= INTVAL (operands[3]) << 2;
7188 mask |= INTVAL (operands[4]) << 4;
7189 mask |= INTVAL (operands[5]) << 6;
7190 operands[2] = GEN_INT (mask);
7192 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7194 [(set_attr "type" "sselog1")
7195 (set_attr "prefix_data16" "1")
7196 (set_attr "prefix" "maybe_vex")
7197 (set_attr "length_immediate" "1")
7198 (set_attr "mode" "TI")])
7200 (define_expand "sse2_pshuflw"
7201 [(match_operand:V8HI 0 "register_operand" "")
7202 (match_operand:V8HI 1 "nonimmediate_operand" "")
7203 (match_operand:SI 2 "const_int_operand" "")]
7206 int mask = INTVAL (operands[2]);
7207 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7208 GEN_INT ((mask >> 0) & 3),
7209 GEN_INT ((mask >> 2) & 3),
7210 GEN_INT ((mask >> 4) & 3),
7211 GEN_INT ((mask >> 6) & 3)));
7215 (define_insn "sse2_pshuflw_1"
7216 [(set (match_operand:V8HI 0 "register_operand" "=x")
7218 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7219 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7220 (match_operand 3 "const_0_to_3_operand" "")
7221 (match_operand 4 "const_0_to_3_operand" "")
7222 (match_operand 5 "const_0_to_3_operand" "")
7230 mask |= INTVAL (operands[2]) << 0;
7231 mask |= INTVAL (operands[3]) << 2;
7232 mask |= INTVAL (operands[4]) << 4;
7233 mask |= INTVAL (operands[5]) << 6;
7234 operands[2] = GEN_INT (mask);
7236 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7238 [(set_attr "type" "sselog")
7239 (set_attr "prefix_data16" "0")
7240 (set_attr "prefix_rep" "1")
7241 (set_attr "prefix" "maybe_vex")
7242 (set_attr "length_immediate" "1")
7243 (set_attr "mode" "TI")])
7245 (define_expand "sse2_pshufhw"
7246 [(match_operand:V8HI 0 "register_operand" "")
7247 (match_operand:V8HI 1 "nonimmediate_operand" "")
7248 (match_operand:SI 2 "const_int_operand" "")]
7251 int mask = INTVAL (operands[2]);
7252 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7253 GEN_INT (((mask >> 0) & 3) + 4),
7254 GEN_INT (((mask >> 2) & 3) + 4),
7255 GEN_INT (((mask >> 4) & 3) + 4),
7256 GEN_INT (((mask >> 6) & 3) + 4)));
7260 (define_insn "sse2_pshufhw_1"
7261 [(set (match_operand:V8HI 0 "register_operand" "=x")
7263 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7264 (parallel [(const_int 0)
7268 (match_operand 2 "const_4_to_7_operand" "")
7269 (match_operand 3 "const_4_to_7_operand" "")
7270 (match_operand 4 "const_4_to_7_operand" "")
7271 (match_operand 5 "const_4_to_7_operand" "")])))]
7275 mask |= (INTVAL (operands[2]) - 4) << 0;
7276 mask |= (INTVAL (operands[3]) - 4) << 2;
7277 mask |= (INTVAL (operands[4]) - 4) << 4;
7278 mask |= (INTVAL (operands[5]) - 4) << 6;
7279 operands[2] = GEN_INT (mask);
7281 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7283 [(set_attr "type" "sselog")
7284 (set_attr "prefix_rep" "1")
7285 (set_attr "prefix_data16" "0")
7286 (set_attr "prefix" "maybe_vex")
7287 (set_attr "length_immediate" "1")
7288 (set_attr "mode" "TI")])
7290 (define_expand "sse2_loadd"
7291 [(set (match_operand:V4SI 0 "register_operand" "")
7294 (match_operand:SI 1 "nonimmediate_operand" ""))
7298 "operands[2] = CONST0_RTX (V4SImode);")
7300 (define_insn "*avx_loadld"
7301 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7304 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7305 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7309 vmovd\t{%2, %0|%0, %2}
7310 vmovd\t{%2, %0|%0, %2}
7311 vmovss\t{%2, %1, %0|%0, %1, %2}"
7312 [(set_attr "type" "ssemov")
7313 (set_attr "prefix" "vex")
7314 (set_attr "mode" "TI,TI,V4SF")])
7316 (define_insn "sse2_loadld"
7317 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7320 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7321 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7325 movd\t{%2, %0|%0, %2}
7326 movd\t{%2, %0|%0, %2}
7327 movss\t{%2, %0|%0, %2}
7328 movss\t{%2, %0|%0, %2}"
7329 [(set_attr "type" "ssemov")
7330 (set_attr "mode" "TI,TI,V4SF,SF")])
7332 (define_insn_and_split "sse2_stored"
7333 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7335 (match_operand:V4SI 1 "register_operand" "x,Yi")
7336 (parallel [(const_int 0)])))]
7339 "&& reload_completed
7340 && (TARGET_INTER_UNIT_MOVES
7341 || MEM_P (operands [0])
7342 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7343 [(set (match_dup 0) (match_dup 1))]
7345 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7348 (define_insn_and_split "*vec_ext_v4si_mem"
7349 [(set (match_operand:SI 0 "register_operand" "=r")
7351 (match_operand:V4SI 1 "memory_operand" "o")
7352 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7358 int i = INTVAL (operands[2]);
7360 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7364 (define_expand "sse_storeq"
7365 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7367 (match_operand:V2DI 1 "register_operand" "")
7368 (parallel [(const_int 0)])))]
7372 (define_insn "*sse2_storeq_rex64"
7373 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7375 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7376 (parallel [(const_int 0)])))]
7377 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7381 %vmov{q}\t{%1, %0|%0, %1}"
7382 [(set_attr "type" "*,*,imov")
7383 (set_attr "prefix" "*,*,maybe_vex")
7384 (set_attr "mode" "*,*,DI")])
7386 (define_insn "*sse2_storeq"
7387 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7389 (match_operand:V2DI 1 "register_operand" "x")
7390 (parallel [(const_int 0)])))]
7395 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7397 (match_operand:V2DI 1 "register_operand" "")
7398 (parallel [(const_int 0)])))]
7401 && (TARGET_INTER_UNIT_MOVES
7402 || MEM_P (operands [0])
7403 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7404 [(set (match_dup 0) (match_dup 1))]
7406 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7409 (define_insn "*vec_extractv2di_1_rex64_avx"
7410 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7412 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7413 (parallel [(const_int 1)])))]
7416 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7418 vmovhps\t{%1, %0|%0, %1}
7419 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7420 vmovq\t{%H1, %0|%0, %H1}
7421 vmov{q}\t{%H1, %0|%0, %H1}"
7422 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7423 (set_attr "length_immediate" "*,1,*,*")
7424 (set_attr "memory" "*,none,*,*")
7425 (set_attr "prefix" "vex")
7426 (set_attr "mode" "V2SF,TI,TI,DI")])
7428 (define_insn "*vec_extractv2di_1_rex64"
7429 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7431 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7432 (parallel [(const_int 1)])))]
7433 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7435 movhps\t{%1, %0|%0, %1}
7436 psrldq\t{$8, %0|%0, 8}
7437 movq\t{%H1, %0|%0, %H1}
7438 mov{q}\t{%H1, %0|%0, %H1}"
7439 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7440 (set_attr "length_immediate" "*,1,*,*")
7441 (set_attr "memory" "*,none,*,*")
7442 (set_attr "mode" "V2SF,TI,TI,DI")])
7444 (define_insn "*vec_extractv2di_1_avx"
7445 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7447 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7448 (parallel [(const_int 1)])))]
7451 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7453 vmovhps\t{%1, %0|%0, %1}
7454 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7455 vmovq\t{%H1, %0|%0, %H1}"
7456 [(set_attr "type" "ssemov,sseishft1,ssemov")
7457 (set_attr "length_immediate" "*,1,*")
7458 (set_attr "memory" "*,none,*")
7459 (set_attr "prefix" "vex")
7460 (set_attr "mode" "V2SF,TI,TI")])
7462 (define_insn "*vec_extractv2di_1_sse2"
7463 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7465 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7466 (parallel [(const_int 1)])))]
7468 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7470 movhps\t{%1, %0|%0, %1}
7471 psrldq\t{$8, %0|%0, 8}
7472 movq\t{%H1, %0|%0, %H1}"
7473 [(set_attr "type" "ssemov,sseishft1,ssemov")
7474 (set_attr "length_immediate" "*,1,*")
7475 (set_attr "memory" "*,none,*")
7476 (set_attr "mode" "V2SF,TI,TI")])
7478 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7479 (define_insn "*vec_extractv2di_1_sse"
7480 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7482 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7483 (parallel [(const_int 1)])))]
7484 "!TARGET_SSE2 && TARGET_SSE
7485 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7487 movhps\t{%1, %0|%0, %1}
7488 movhlps\t{%1, %0|%0, %1}
7489 movlps\t{%H1, %0|%0, %H1}"
7490 [(set_attr "type" "ssemov")
7491 (set_attr "mode" "V2SF,V4SF,V2SF")])
7493 (define_insn "*vec_dupv4si_avx"
7494 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7496 (match_operand:SI 1 "register_operand" "x,m")))]
7499 vpshufd\t{$0, %1, %0|%0, %1, 0}
7500 vbroadcastss\t{%1, %0|%0, %1}"
7501 [(set_attr "type" "sselog1,ssemov")
7502 (set_attr "length_immediate" "1,0")
7503 (set_attr "prefix_extra" "0,1")
7504 (set_attr "prefix" "vex")
7505 (set_attr "mode" "TI,V4SF")])
7507 (define_insn "*vec_dupv4si"
7508 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7510 (match_operand:SI 1 "register_operand" " Y2,0")))]
7513 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7514 shufps\t{$0, %0, %0|%0, %0, 0}"
7515 [(set_attr "type" "sselog1")
7516 (set_attr "length_immediate" "1")
7517 (set_attr "mode" "TI,V4SF")])
7519 (define_insn "*vec_dupv2di_avx"
7520 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7522 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7525 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7526 vmovddup\t{%1, %0|%0, %1}"
7527 [(set_attr "type" "sselog1")
7528 (set_attr "prefix" "vex")
7529 (set_attr "mode" "TI,DF")])
7531 (define_insn "*vec_dupv2di_sse3"
7532 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7534 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7538 movddup\t{%1, %0|%0, %1}"
7539 [(set_attr "type" "sselog1")
7540 (set_attr "mode" "TI,DF")])
7542 (define_insn "*vec_dupv2di"
7543 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7545 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7550 [(set_attr "type" "sselog1,ssemov")
7551 (set_attr "mode" "TI,V4SF")])
7553 (define_insn "*vec_concatv2si_avx"
7554 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7556 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7557 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7560 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7561 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7562 vmovd\t{%1, %0|%0, %1}
7563 punpckldq\t{%2, %0|%0, %2}
7564 movd\t{%1, %0|%0, %1}"
7565 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7566 (set_attr "prefix_extra" "1,*,*,*,*")
7567 (set_attr "length_immediate" "1,*,*,*,*")
7568 (set (attr "prefix")
7569 (if_then_else (eq_attr "alternative" "3,4")
7570 (const_string "orig")
7571 (const_string "vex")))
7572 (set_attr "mode" "TI,TI,TI,DI,DI")])
7574 (define_insn "*vec_concatv2si_sse4_1"
7575 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7577 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7578 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7581 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7582 punpckldq\t{%2, %0|%0, %2}
7583 movd\t{%1, %0|%0, %1}
7584 punpckldq\t{%2, %0|%0, %2}
7585 movd\t{%1, %0|%0, %1}"
7586 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7587 (set_attr "prefix_extra" "1,*,*,*,*")
7588 (set_attr "length_immediate" "1,*,*,*,*")
7589 (set_attr "mode" "TI,TI,TI,DI,DI")])
7591 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7592 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7593 ;; alternatives pretty much forces the MMX alternative to be chosen.
7594 (define_insn "*vec_concatv2si_sse2"
7595 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7597 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7598 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7601 punpckldq\t{%2, %0|%0, %2}
7602 movd\t{%1, %0|%0, %1}
7603 punpckldq\t{%2, %0|%0, %2}
7604 movd\t{%1, %0|%0, %1}"
7605 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7606 (set_attr "mode" "TI,TI,DI,DI")])
7608 (define_insn "*vec_concatv2si_sse"
7609 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7611 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7612 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7615 unpcklps\t{%2, %0|%0, %2}
7616 movss\t{%1, %0|%0, %1}
7617 punpckldq\t{%2, %0|%0, %2}
7618 movd\t{%1, %0|%0, %1}"
7619 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7620 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7622 (define_insn "*vec_concatv4si_1_avx"
7623 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7625 (match_operand:V2SI 1 "register_operand" " x,x")
7626 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7629 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7630 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7631 [(set_attr "type" "sselog,ssemov")
7632 (set_attr "prefix" "vex")
7633 (set_attr "mode" "TI,V2SF")])
7635 (define_insn "*vec_concatv4si_1"
7636 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7638 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7639 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7642 punpcklqdq\t{%2, %0|%0, %2}
7643 movlhps\t{%2, %0|%0, %2}
7644 movhps\t{%2, %0|%0, %2}"
7645 [(set_attr "type" "sselog,ssemov,ssemov")
7646 (set_attr "mode" "TI,V4SF,V2SF")])
7648 (define_insn "*vec_concatv2di_avx"
7649 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7651 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7652 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7653 "!TARGET_64BIT && TARGET_AVX"
7655 vmovq\t{%1, %0|%0, %1}
7656 movq2dq\t{%1, %0|%0, %1}
7657 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7658 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7659 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7660 (set (attr "prefix")
7661 (if_then_else (eq_attr "alternative" "1")
7662 (const_string "orig")
7663 (const_string "vex")))
7664 (set_attr "mode" "TI,TI,TI,V2SF")])
7666 (define_insn "vec_concatv2di"
7667 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7669 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7670 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7671 "!TARGET_64BIT && TARGET_SSE"
7673 movq\t{%1, %0|%0, %1}
7674 movq2dq\t{%1, %0|%0, %1}
7675 punpcklqdq\t{%2, %0|%0, %2}
7676 movlhps\t{%2, %0|%0, %2}
7677 movhps\t{%2, %0|%0, %2}"
7678 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7679 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7681 (define_insn "*vec_concatv2di_rex64_avx"
7682 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7684 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7685 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7686 "TARGET_64BIT && TARGET_AVX"
7688 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7689 vmovq\t{%1, %0|%0, %1}
7690 vmovq\t{%1, %0|%0, %1}
7691 movq2dq\t{%1, %0|%0, %1}
7692 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7693 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7694 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7695 (set_attr "prefix_extra" "1,*,*,*,*,*")
7696 (set_attr "length_immediate" "1,*,*,*,*,*")
7697 (set (attr "prefix")
7698 (if_then_else (eq_attr "alternative" "3")
7699 (const_string "orig")
7700 (const_string "vex")))
7701 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7703 (define_insn "*vec_concatv2di_rex64_sse4_1"
7704 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7706 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7707 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7708 "TARGET_64BIT && TARGET_SSE4_1"
7710 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7711 movq\t{%1, %0|%0, %1}
7712 movq\t{%1, %0|%0, %1}
7713 movq2dq\t{%1, %0|%0, %1}
7714 punpcklqdq\t{%2, %0|%0, %2}
7715 movlhps\t{%2, %0|%0, %2}
7716 movhps\t{%2, %0|%0, %2}"
7717 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7718 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7719 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7720 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7721 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7723 (define_insn "*vec_concatv2di_rex64_sse"
7724 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7726 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7727 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7728 "TARGET_64BIT && TARGET_SSE"
7730 movq\t{%1, %0|%0, %1}
7731 movq\t{%1, %0|%0, %1}
7732 movq2dq\t{%1, %0|%0, %1}
7733 punpcklqdq\t{%2, %0|%0, %2}
7734 movlhps\t{%2, %0|%0, %2}
7735 movhps\t{%2, %0|%0, %2}"
7736 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7737 (set_attr "prefix_rex" "*,1,*,*,*,*")
7738 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7740 (define_expand "vec_unpacku_hi_v16qi"
7741 [(match_operand:V8HI 0 "register_operand" "")
7742 (match_operand:V16QI 1 "register_operand" "")]
7746 ix86_expand_sse4_unpack (operands, true, true);
7748 ix86_expand_sse_unpack (operands, true, true);
7752 (define_expand "vec_unpacks_hi_v16qi"
7753 [(match_operand:V8HI 0 "register_operand" "")
7754 (match_operand:V16QI 1 "register_operand" "")]
7758 ix86_expand_sse4_unpack (operands, false, true);
7760 ix86_expand_sse_unpack (operands, false, true);
7764 (define_expand "vec_unpacku_lo_v16qi"
7765 [(match_operand:V8HI 0 "register_operand" "")
7766 (match_operand:V16QI 1 "register_operand" "")]
7770 ix86_expand_sse4_unpack (operands, true, false);
7772 ix86_expand_sse_unpack (operands, true, false);
7776 (define_expand "vec_unpacks_lo_v16qi"
7777 [(match_operand:V8HI 0 "register_operand" "")
7778 (match_operand:V16QI 1 "register_operand" "")]
7782 ix86_expand_sse4_unpack (operands, false, false);
7784 ix86_expand_sse_unpack (operands, false, false);
7788 (define_expand "vec_unpacku_hi_v8hi"
7789 [(match_operand:V4SI 0 "register_operand" "")
7790 (match_operand:V8HI 1 "register_operand" "")]
7794 ix86_expand_sse4_unpack (operands, true, true);
7796 ix86_expand_sse_unpack (operands, true, true);
7800 (define_expand "vec_unpacks_hi_v8hi"
7801 [(match_operand:V4SI 0 "register_operand" "")
7802 (match_operand:V8HI 1 "register_operand" "")]
7806 ix86_expand_sse4_unpack (operands, false, true);
7808 ix86_expand_sse_unpack (operands, false, true);
7812 (define_expand "vec_unpacku_lo_v8hi"
7813 [(match_operand:V4SI 0 "register_operand" "")
7814 (match_operand:V8HI 1 "register_operand" "")]
7818 ix86_expand_sse4_unpack (operands, true, false);
7820 ix86_expand_sse_unpack (operands, true, false);
7824 (define_expand "vec_unpacks_lo_v8hi"
7825 [(match_operand:V4SI 0 "register_operand" "")
7826 (match_operand:V8HI 1 "register_operand" "")]
7830 ix86_expand_sse4_unpack (operands, false, false);
7832 ix86_expand_sse_unpack (operands, false, false);
7836 (define_expand "vec_unpacku_hi_v4si"
7837 [(match_operand:V2DI 0 "register_operand" "")
7838 (match_operand:V4SI 1 "register_operand" "")]
7842 ix86_expand_sse4_unpack (operands, true, true);
7844 ix86_expand_sse_unpack (operands, true, true);
7848 (define_expand "vec_unpacks_hi_v4si"
7849 [(match_operand:V2DI 0 "register_operand" "")
7850 (match_operand:V4SI 1 "register_operand" "")]
7854 ix86_expand_sse4_unpack (operands, false, true);
7856 ix86_expand_sse_unpack (operands, false, true);
7860 (define_expand "vec_unpacku_lo_v4si"
7861 [(match_operand:V2DI 0 "register_operand" "")
7862 (match_operand:V4SI 1 "register_operand" "")]
7866 ix86_expand_sse4_unpack (operands, true, false);
7868 ix86_expand_sse_unpack (operands, true, false);
7872 (define_expand "vec_unpacks_lo_v4si"
7873 [(match_operand:V2DI 0 "register_operand" "")
7874 (match_operand:V4SI 1 "register_operand" "")]
7878 ix86_expand_sse4_unpack (operands, false, false);
7880 ix86_expand_sse_unpack (operands, false, false);
7884 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7888 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7890 (define_expand "sse2_uavgv16qi3"
7891 [(set (match_operand:V16QI 0 "register_operand" "")
7897 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7899 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7900 (const_vector:V16QI [(const_int 1) (const_int 1)
7901 (const_int 1) (const_int 1)
7902 (const_int 1) (const_int 1)
7903 (const_int 1) (const_int 1)
7904 (const_int 1) (const_int 1)
7905 (const_int 1) (const_int 1)
7906 (const_int 1) (const_int 1)
7907 (const_int 1) (const_int 1)]))
7910 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7912 (define_insn "*avx_uavgv16qi3"
7913 [(set (match_operand:V16QI 0 "register_operand" "=x")
7919 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7921 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7922 (const_vector:V16QI [(const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)
7925 (const_int 1) (const_int 1)
7926 (const_int 1) (const_int 1)
7927 (const_int 1) (const_int 1)
7928 (const_int 1) (const_int 1)
7929 (const_int 1) (const_int 1)]))
7931 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7932 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7933 [(set_attr "type" "sseiadd")
7934 (set_attr "prefix" "vex")
7935 (set_attr "mode" "TI")])
7937 (define_insn "*sse2_uavgv16qi3"
7938 [(set (match_operand:V16QI 0 "register_operand" "=x")
7944 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7946 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7947 (const_vector:V16QI [(const_int 1) (const_int 1)
7948 (const_int 1) (const_int 1)
7949 (const_int 1) (const_int 1)
7950 (const_int 1) (const_int 1)
7951 (const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)
7954 (const_int 1) (const_int 1)]))
7956 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7957 "pavgb\t{%2, %0|%0, %2}"
7958 [(set_attr "type" "sseiadd")
7959 (set_attr "prefix_data16" "1")
7960 (set_attr "mode" "TI")])
7962 (define_expand "sse2_uavgv8hi3"
7963 [(set (match_operand:V8HI 0 "register_operand" "")
7969 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7971 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7972 (const_vector:V8HI [(const_int 1) (const_int 1)
7973 (const_int 1) (const_int 1)
7974 (const_int 1) (const_int 1)
7975 (const_int 1) (const_int 1)]))
7978 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7980 (define_insn "*avx_uavgv8hi3"
7981 [(set (match_operand:V8HI 0 "register_operand" "=x")
7987 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7989 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7990 (const_vector:V8HI [(const_int 1) (const_int 1)
7991 (const_int 1) (const_int 1)
7992 (const_int 1) (const_int 1)
7993 (const_int 1) (const_int 1)]))
7995 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7996 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7997 [(set_attr "type" "sseiadd")
7998 (set_attr "prefix" "vex")
7999 (set_attr "mode" "TI")])
8001 (define_insn "*sse2_uavgv8hi3"
8002 [(set (match_operand:V8HI 0 "register_operand" "=x")
8008 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8010 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8011 (const_vector:V8HI [(const_int 1) (const_int 1)
8012 (const_int 1) (const_int 1)
8013 (const_int 1) (const_int 1)
8014 (const_int 1) (const_int 1)]))
8016 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8017 "pavgw\t{%2, %0|%0, %2}"
8018 [(set_attr "type" "sseiadd")
8019 (set_attr "prefix_data16" "1")
8020 (set_attr "mode" "TI")])
8022 ;; The correct representation for this is absolutely enormous, and
8023 ;; surely not generally useful.
8024 (define_insn "*avx_psadbw"
8025 [(set (match_operand:V2DI 0 "register_operand" "=x")
8026 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8027 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8030 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8031 [(set_attr "type" "sseiadd")
8032 (set_attr "prefix" "vex")
8033 (set_attr "mode" "TI")])
8035 (define_insn "sse2_psadbw"
8036 [(set (match_operand:V2DI 0 "register_operand" "=x")
8037 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8038 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8041 "psadbw\t{%2, %0|%0, %2}"
8042 [(set_attr "type" "sseiadd")
8043 (set_attr "atom_unit" "simul")
8044 (set_attr "prefix_data16" "1")
8045 (set_attr "mode" "TI")])
8047 (define_insn "avx_movmsk<ssemodesuffix>256"
8048 [(set (match_operand:SI 0 "register_operand" "=r")
8050 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8052 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8053 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8054 [(set_attr "type" "ssecvt")
8055 (set_attr "prefix" "vex")
8056 (set_attr "mode" "<MODE>")])
8058 (define_insn "<sse>_movmsk<ssemodesuffix>"
8059 [(set (match_operand:SI 0 "register_operand" "=r")
8061 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8063 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8064 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8065 [(set_attr "type" "ssemov")
8066 (set_attr "prefix" "maybe_vex")
8067 (set_attr "mode" "<MODE>")])
8069 (define_insn "sse2_pmovmskb"
8070 [(set (match_operand:SI 0 "register_operand" "=r")
8071 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8074 "%vpmovmskb\t{%1, %0|%0, %1}"
8075 [(set_attr "type" "ssemov")
8076 (set_attr "prefix_data16" "1")
8077 (set_attr "prefix" "maybe_vex")
8078 (set_attr "mode" "SI")])
8080 (define_expand "sse2_maskmovdqu"
8081 [(set (match_operand:V16QI 0 "memory_operand" "")
8082 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8083 (match_operand:V16QI 2 "register_operand" "")
8089 (define_insn "*sse2_maskmovdqu"
8090 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8091 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8092 (match_operand:V16QI 2 "register_operand" "x")
8093 (mem:V16QI (match_dup 0))]
8095 "TARGET_SSE2 && !TARGET_64BIT"
8096 ;; @@@ check ordering of operands in intel/nonintel syntax
8097 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8098 [(set_attr "type" "ssemov")
8099 (set_attr "prefix_data16" "1")
8100 ;; The implicit %rdi operand confuses default length_vex computation.
8101 (set_attr "length_vex" "3")
8102 (set_attr "prefix" "maybe_vex")
8103 (set_attr "mode" "TI")])
8105 (define_insn "*sse2_maskmovdqu_rex64"
8106 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8107 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8108 (match_operand:V16QI 2 "register_operand" "x")
8109 (mem:V16QI (match_dup 0))]
8111 "TARGET_SSE2 && TARGET_64BIT"
8112 ;; @@@ check ordering of operands in intel/nonintel syntax
8113 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8114 [(set_attr "type" "ssemov")
8115 (set_attr "prefix_data16" "1")
8116 ;; The implicit %rdi operand confuses default length_vex computation.
8117 (set (attr "length_vex")
8118 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8119 (set_attr "prefix" "maybe_vex")
8120 (set_attr "mode" "TI")])
8122 (define_insn "sse_ldmxcsr"
8123 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8127 [(set_attr "type" "sse")
8128 (set_attr "atom_sse_attr" "mxcsr")
8129 (set_attr "prefix" "maybe_vex")
8130 (set_attr "memory" "load")])
8132 (define_insn "sse_stmxcsr"
8133 [(set (match_operand:SI 0 "memory_operand" "=m")
8134 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8137 [(set_attr "type" "sse")
8138 (set_attr "atom_sse_attr" "mxcsr")
8139 (set_attr "prefix" "maybe_vex")
8140 (set_attr "memory" "store")])
8142 (define_expand "sse_sfence"
8144 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8145 "TARGET_SSE || TARGET_3DNOW_A"
8147 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8148 MEM_VOLATILE_P (operands[0]) = 1;
8151 (define_insn "*sse_sfence"
8152 [(set (match_operand:BLK 0 "" "")
8153 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8154 "TARGET_SSE || TARGET_3DNOW_A"
8156 [(set_attr "type" "sse")
8157 (set_attr "length_address" "0")
8158 (set_attr "atom_sse_attr" "fence")
8159 (set_attr "memory" "unknown")])
8161 (define_insn "sse2_clflush"
8162 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8166 [(set_attr "type" "sse")
8167 (set_attr "atom_sse_attr" "fence")
8168 (set_attr "memory" "unknown")])
8170 (define_expand "sse2_mfence"
8172 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8175 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8176 MEM_VOLATILE_P (operands[0]) = 1;
8179 (define_insn "*sse2_mfence"
8180 [(set (match_operand:BLK 0 "" "")
8181 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8182 "TARGET_64BIT || TARGET_SSE2"
8184 [(set_attr "type" "sse")
8185 (set_attr "length_address" "0")
8186 (set_attr "atom_sse_attr" "fence")
8187 (set_attr "memory" "unknown")])
8189 (define_expand "sse2_lfence"
8191 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8194 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8195 MEM_VOLATILE_P (operands[0]) = 1;
8198 (define_insn "*sse2_lfence"
8199 [(set (match_operand:BLK 0 "" "")
8200 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8203 [(set_attr "type" "sse")
8204 (set_attr "length_address" "0")
8205 (set_attr "atom_sse_attr" "lfence")
8206 (set_attr "memory" "unknown")])
8208 (define_insn "sse3_mwait"
8209 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8210 (match_operand:SI 1 "register_operand" "c")]
8213 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8214 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8215 ;; we only need to set up 32bit registers.
8217 [(set_attr "length" "3")])
8219 (define_insn "sse3_monitor"
8220 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8221 (match_operand:SI 1 "register_operand" "c")
8222 (match_operand:SI 2 "register_operand" "d")]
8224 "TARGET_SSE3 && !TARGET_64BIT"
8225 "monitor\t%0, %1, %2"
8226 [(set_attr "length" "3")])
8228 (define_insn "sse3_monitor64"
8229 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8230 (match_operand:SI 1 "register_operand" "c")
8231 (match_operand:SI 2 "register_operand" "d")]
8233 "TARGET_SSE3 && TARGET_64BIT"
8234 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8235 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8236 ;; zero extended to 64bit, we only need to set up 32bit registers.
8238 [(set_attr "length" "3")])
8240 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8242 ;; SSSE3 instructions
8244 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8246 (define_insn "*avx_phaddwv8hi3"
8247 [(set (match_operand:V8HI 0 "register_operand" "=x")
8253 (match_operand:V8HI 1 "register_operand" "x")
8254 (parallel [(const_int 0)]))
8255 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8257 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8258 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8261 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8262 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8264 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8265 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8270 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8271 (parallel [(const_int 0)]))
8272 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8275 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8278 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8281 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8284 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8285 [(set_attr "type" "sseiadd")
8286 (set_attr "prefix_extra" "1")
8287 (set_attr "prefix" "vex")
8288 (set_attr "mode" "TI")])
8290 (define_insn "ssse3_phaddwv8hi3"
8291 [(set (match_operand:V8HI 0 "register_operand" "=x")
8297 (match_operand:V8HI 1 "register_operand" "0")
8298 (parallel [(const_int 0)]))
8299 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8302 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8305 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8308 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8309 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8314 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8315 (parallel [(const_int 0)]))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8319 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8322 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8323 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8325 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8326 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8328 "phaddw\t{%2, %0|%0, %2}"
8329 [(set_attr "type" "sseiadd")
8330 (set_attr "atom_unit" "complex")
8331 (set_attr "prefix_data16" "1")
8332 (set_attr "prefix_extra" "1")
8333 (set_attr "mode" "TI")])
8335 (define_insn "ssse3_phaddwv4hi3"
8336 [(set (match_operand:V4HI 0 "register_operand" "=y")
8341 (match_operand:V4HI 1 "register_operand" "0")
8342 (parallel [(const_int 0)]))
8343 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8345 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8346 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8350 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8351 (parallel [(const_int 0)]))
8352 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8355 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8357 "phaddw\t{%2, %0|%0, %2}"
8358 [(set_attr "type" "sseiadd")
8359 (set_attr "atom_unit" "complex")
8360 (set_attr "prefix_extra" "1")
8361 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8362 (set_attr "mode" "DI")])
8364 (define_insn "*avx_phadddv4si3"
8365 [(set (match_operand:V4SI 0 "register_operand" "=x")
8370 (match_operand:V4SI 1 "register_operand" "x")
8371 (parallel [(const_int 0)]))
8372 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8374 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8375 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8379 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8380 (parallel [(const_int 0)]))
8381 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8383 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8384 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8386 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8387 [(set_attr "type" "sseiadd")
8388 (set_attr "prefix_extra" "1")
8389 (set_attr "prefix" "vex")
8390 (set_attr "mode" "TI")])
8392 (define_insn "ssse3_phadddv4si3"
8393 [(set (match_operand:V4SI 0 "register_operand" "=x")
8398 (match_operand:V4SI 1 "register_operand" "0")
8399 (parallel [(const_int 0)]))
8400 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8402 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8403 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8407 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8408 (parallel [(const_int 0)]))
8409 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8411 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8412 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8414 "phaddd\t{%2, %0|%0, %2}"
8415 [(set_attr "type" "sseiadd")
8416 (set_attr "atom_unit" "complex")
8417 (set_attr "prefix_data16" "1")
8418 (set_attr "prefix_extra" "1")
8419 (set_attr "mode" "TI")])
8421 (define_insn "ssse3_phadddv2si3"
8422 [(set (match_operand:V2SI 0 "register_operand" "=y")
8426 (match_operand:V2SI 1 "register_operand" "0")
8427 (parallel [(const_int 0)]))
8428 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8431 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8432 (parallel [(const_int 0)]))
8433 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8435 "phaddd\t{%2, %0|%0, %2}"
8436 [(set_attr "type" "sseiadd")
8437 (set_attr "atom_unit" "complex")
8438 (set_attr "prefix_extra" "1")
8439 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8440 (set_attr "mode" "DI")])
8442 (define_insn "*avx_phaddswv8hi3"
8443 [(set (match_operand:V8HI 0 "register_operand" "=x")
8449 (match_operand:V8HI 1 "register_operand" "x")
8450 (parallel [(const_int 0)]))
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8453 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8454 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8457 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8458 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8460 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8461 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8466 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8467 (parallel [(const_int 0)]))
8468 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8471 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8474 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8475 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8480 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8481 [(set_attr "type" "sseiadd")
8482 (set_attr "prefix_extra" "1")
8483 (set_attr "prefix" "vex")
8484 (set_attr "mode" "TI")])
8486 (define_insn "ssse3_phaddswv8hi3"
8487 [(set (match_operand:V8HI 0 "register_operand" "=x")
8493 (match_operand:V8HI 1 "register_operand" "0")
8494 (parallel [(const_int 0)]))
8495 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8497 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8498 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8501 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8502 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8504 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8510 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8511 (parallel [(const_int 0)]))
8512 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8514 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8515 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8518 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8519 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8521 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8522 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8524 "phaddsw\t{%2, %0|%0, %2}"
8525 [(set_attr "type" "sseiadd")
8526 (set_attr "atom_unit" "complex")
8527 (set_attr "prefix_data16" "1")
8528 (set_attr "prefix_extra" "1")
8529 (set_attr "mode" "TI")])
8531 (define_insn "ssse3_phaddswv4hi3"
8532 [(set (match_operand:V4HI 0 "register_operand" "=y")
8537 (match_operand:V4HI 1 "register_operand" "0")
8538 (parallel [(const_int 0)]))
8539 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8541 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8542 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8546 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8547 (parallel [(const_int 0)]))
8548 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8551 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8553 "phaddsw\t{%2, %0|%0, %2}"
8554 [(set_attr "type" "sseiadd")
8555 (set_attr "atom_unit" "complex")
8556 (set_attr "prefix_extra" "1")
8557 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8558 (set_attr "mode" "DI")])
8560 (define_insn "*avx_phsubwv8hi3"
8561 [(set (match_operand:V8HI 0 "register_operand" "=x")
8567 (match_operand:V8HI 1 "register_operand" "x")
8568 (parallel [(const_int 0)]))
8569 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8571 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8572 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8575 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8576 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8578 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8579 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8584 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8585 (parallel [(const_int 0)]))
8586 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8588 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8589 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8592 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8593 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8595 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8596 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8598 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8599 [(set_attr "type" "sseiadd")
8600 (set_attr "prefix_extra" "1")
8601 (set_attr "prefix" "vex")
8602 (set_attr "mode" "TI")])
8604 (define_insn "ssse3_phsubwv8hi3"
8605 [(set (match_operand:V8HI 0 "register_operand" "=x")
8611 (match_operand:V8HI 1 "register_operand" "0")
8612 (parallel [(const_int 0)]))
8613 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8615 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8616 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8619 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8620 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8622 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8623 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8628 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8629 (parallel [(const_int 0)]))
8630 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8633 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8636 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8637 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8640 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8642 "phsubw\t{%2, %0|%0, %2}"
8643 [(set_attr "type" "sseiadd")
8644 (set_attr "atom_unit" "complex")
8645 (set_attr "prefix_data16" "1")
8646 (set_attr "prefix_extra" "1")
8647 (set_attr "mode" "TI")])
8649 (define_insn "ssse3_phsubwv4hi3"
8650 [(set (match_operand:V4HI 0 "register_operand" "=y")
8655 (match_operand:V4HI 1 "register_operand" "0")
8656 (parallel [(const_int 0)]))
8657 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8660 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8664 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8665 (parallel [(const_int 0)]))
8666 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8669 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8671 "phsubw\t{%2, %0|%0, %2}"
8672 [(set_attr "type" "sseiadd")
8673 (set_attr "atom_unit" "complex")
8674 (set_attr "prefix_extra" "1")
8675 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8676 (set_attr "mode" "DI")])
8678 (define_insn "*avx_phsubdv4si3"
8679 [(set (match_operand:V4SI 0 "register_operand" "=x")
8684 (match_operand:V4SI 1 "register_operand" "x")
8685 (parallel [(const_int 0)]))
8686 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8688 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8689 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8693 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8694 (parallel [(const_int 0)]))
8695 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8697 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8698 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8700 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8701 [(set_attr "type" "sseiadd")
8702 (set_attr "prefix_extra" "1")
8703 (set_attr "prefix" "vex")
8704 (set_attr "mode" "TI")])
8706 (define_insn "ssse3_phsubdv4si3"
8707 [(set (match_operand:V4SI 0 "register_operand" "=x")
8712 (match_operand:V4SI 1 "register_operand" "0")
8713 (parallel [(const_int 0)]))
8714 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8716 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8717 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8721 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8722 (parallel [(const_int 0)]))
8723 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8725 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8726 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8728 "phsubd\t{%2, %0|%0, %2}"
8729 [(set_attr "type" "sseiadd")
8730 (set_attr "atom_unit" "complex")
8731 (set_attr "prefix_data16" "1")
8732 (set_attr "prefix_extra" "1")
8733 (set_attr "mode" "TI")])
8735 (define_insn "ssse3_phsubdv2si3"
8736 [(set (match_operand:V2SI 0 "register_operand" "=y")
8740 (match_operand:V2SI 1 "register_operand" "0")
8741 (parallel [(const_int 0)]))
8742 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8745 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8746 (parallel [(const_int 0)]))
8747 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8749 "phsubd\t{%2, %0|%0, %2}"
8750 [(set_attr "type" "sseiadd")
8751 (set_attr "atom_unit" "complex")
8752 (set_attr "prefix_extra" "1")
8753 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8754 (set_attr "mode" "DI")])
8756 (define_insn "*avx_phsubswv8hi3"
8757 [(set (match_operand:V8HI 0 "register_operand" "=x")
8763 (match_operand:V8HI 1 "register_operand" "x")
8764 (parallel [(const_int 0)]))
8765 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8768 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8772 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8774 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8780 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8781 (parallel [(const_int 0)]))
8782 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8784 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8785 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8788 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8789 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8791 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8792 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8794 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8795 [(set_attr "type" "sseiadd")
8796 (set_attr "prefix_extra" "1")
8797 (set_attr "prefix" "vex")
8798 (set_attr "mode" "TI")])
8800 (define_insn "ssse3_phsubswv8hi3"
8801 [(set (match_operand:V8HI 0 "register_operand" "=x")
8807 (match_operand:V8HI 1 "register_operand" "0")
8808 (parallel [(const_int 0)]))
8809 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8811 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8812 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8815 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8816 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8818 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8819 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8824 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8825 (parallel [(const_int 0)]))
8826 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8828 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8829 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8832 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8833 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8835 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8836 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8838 "phsubsw\t{%2, %0|%0, %2}"
8839 [(set_attr "type" "sseiadd")
8840 (set_attr "atom_unit" "complex")
8841 (set_attr "prefix_data16" "1")
8842 (set_attr "prefix_extra" "1")
8843 (set_attr "mode" "TI")])
8845 (define_insn "ssse3_phsubswv4hi3"
8846 [(set (match_operand:V4HI 0 "register_operand" "=y")
8851 (match_operand:V4HI 1 "register_operand" "0")
8852 (parallel [(const_int 0)]))
8853 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8856 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8860 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8861 (parallel [(const_int 0)]))
8862 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8864 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8865 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8867 "phsubsw\t{%2, %0|%0, %2}"
8868 [(set_attr "type" "sseiadd")
8869 (set_attr "atom_unit" "complex")
8870 (set_attr "prefix_extra" "1")
8871 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8872 (set_attr "mode" "DI")])
8874 (define_insn "*avx_pmaddubsw128"
8875 [(set (match_operand:V8HI 0 "register_operand" "=x")
8880 (match_operand:V16QI 1 "register_operand" "x")
8881 (parallel [(const_int 0)
8891 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8892 (parallel [(const_int 0)
8902 (vec_select:V16QI (match_dup 1)
8903 (parallel [(const_int 1)
8912 (vec_select:V16QI (match_dup 2)
8913 (parallel [(const_int 1)
8920 (const_int 15)]))))))]
8922 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8923 [(set_attr "type" "sseiadd")
8924 (set_attr "prefix_extra" "1")
8925 (set_attr "prefix" "vex")
8926 (set_attr "mode" "TI")])
8928 (define_insn "ssse3_pmaddubsw128"
8929 [(set (match_operand:V8HI 0 "register_operand" "=x")
8934 (match_operand:V16QI 1 "register_operand" "0")
8935 (parallel [(const_int 0)
8945 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8946 (parallel [(const_int 0)
8956 (vec_select:V16QI (match_dup 1)
8957 (parallel [(const_int 1)
8966 (vec_select:V16QI (match_dup 2)
8967 (parallel [(const_int 1)
8974 (const_int 15)]))))))]
8976 "pmaddubsw\t{%2, %0|%0, %2}"
8977 [(set_attr "type" "sseiadd")
8978 (set_attr "atom_unit" "simul")
8979 (set_attr "prefix_data16" "1")
8980 (set_attr "prefix_extra" "1")
8981 (set_attr "mode" "TI")])
8983 (define_insn "ssse3_pmaddubsw"
8984 [(set (match_operand:V4HI 0 "register_operand" "=y")
8989 (match_operand:V8QI 1 "register_operand" "0")
8990 (parallel [(const_int 0)
8996 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8997 (parallel [(const_int 0)
9003 (vec_select:V8QI (match_dup 1)
9004 (parallel [(const_int 1)
9009 (vec_select:V8QI (match_dup 2)
9010 (parallel [(const_int 1)
9013 (const_int 7)]))))))]
9015 "pmaddubsw\t{%2, %0|%0, %2}"
9016 [(set_attr "type" "sseiadd")
9017 (set_attr "atom_unit" "simul")
9018 (set_attr "prefix_extra" "1")
9019 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9020 (set_attr "mode" "DI")])
9022 (define_expand "ssse3_pmulhrswv8hi3"
9023 [(set (match_operand:V8HI 0 "register_operand" "")
9030 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9032 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9034 (const_vector:V8HI [(const_int 1) (const_int 1)
9035 (const_int 1) (const_int 1)
9036 (const_int 1) (const_int 1)
9037 (const_int 1) (const_int 1)]))
9040 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9042 (define_insn "*avx_pmulhrswv8hi3"
9043 [(set (match_operand:V8HI 0 "register_operand" "=x")
9050 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9052 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9054 (const_vector:V8HI [(const_int 1) (const_int 1)
9055 (const_int 1) (const_int 1)
9056 (const_int 1) (const_int 1)
9057 (const_int 1) (const_int 1)]))
9059 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9060 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9061 [(set_attr "type" "sseimul")
9062 (set_attr "prefix_extra" "1")
9063 (set_attr "prefix" "vex")
9064 (set_attr "mode" "TI")])
9066 (define_insn "*ssse3_pmulhrswv8hi3"
9067 [(set (match_operand:V8HI 0 "register_operand" "=x")
9074 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9076 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9078 (const_vector:V8HI [(const_int 1) (const_int 1)
9079 (const_int 1) (const_int 1)
9080 (const_int 1) (const_int 1)
9081 (const_int 1) (const_int 1)]))
9083 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9084 "pmulhrsw\t{%2, %0|%0, %2}"
9085 [(set_attr "type" "sseimul")
9086 (set_attr "prefix_data16" "1")
9087 (set_attr "prefix_extra" "1")
9088 (set_attr "mode" "TI")])
9090 (define_expand "ssse3_pmulhrswv4hi3"
9091 [(set (match_operand:V4HI 0 "register_operand" "")
9098 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9100 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9102 (const_vector:V4HI [(const_int 1) (const_int 1)
9103 (const_int 1) (const_int 1)]))
9106 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9108 (define_insn "*ssse3_pmulhrswv4hi3"
9109 [(set (match_operand:V4HI 0 "register_operand" "=y")
9116 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9118 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9120 (const_vector:V4HI [(const_int 1) (const_int 1)
9121 (const_int 1) (const_int 1)]))
9123 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9124 "pmulhrsw\t{%2, %0|%0, %2}"
9125 [(set_attr "type" "sseimul")
9126 (set_attr "prefix_extra" "1")
9127 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9128 (set_attr "mode" "DI")])
9130 (define_insn "*avx_pshufbv16qi3"
9131 [(set (match_operand:V16QI 0 "register_operand" "=x")
9132 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9133 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9136 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9137 [(set_attr "type" "sselog1")
9138 (set_attr "prefix_extra" "1")
9139 (set_attr "prefix" "vex")
9140 (set_attr "mode" "TI")])
9142 (define_insn "ssse3_pshufbv16qi3"
9143 [(set (match_operand:V16QI 0 "register_operand" "=x")
9144 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9145 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9148 "pshufb\t{%2, %0|%0, %2}";
9149 [(set_attr "type" "sselog1")
9150 (set_attr "prefix_data16" "1")
9151 (set_attr "prefix_extra" "1")
9152 (set_attr "mode" "TI")])
9154 (define_insn "ssse3_pshufbv8qi3"
9155 [(set (match_operand:V8QI 0 "register_operand" "=y")
9156 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9157 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9160 "pshufb\t{%2, %0|%0, %2}";
9161 [(set_attr "type" "sselog1")
9162 (set_attr "prefix_extra" "1")
9163 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9164 (set_attr "mode" "DI")])
9166 (define_insn "*avx_psign<mode>3"
9167 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9169 [(match_operand:SSEMODE124 1 "register_operand" "x")
9170 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9173 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9174 [(set_attr "type" "sselog1")
9175 (set_attr "prefix_extra" "1")
9176 (set_attr "prefix" "vex")
9177 (set_attr "mode" "TI")])
9179 (define_insn "ssse3_psign<mode>3"
9180 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9182 [(match_operand:SSEMODE124 1 "register_operand" "0")
9183 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9186 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9187 [(set_attr "type" "sselog1")
9188 (set_attr "prefix_data16" "1")
9189 (set_attr "prefix_extra" "1")
9190 (set_attr "mode" "TI")])
9192 (define_insn "ssse3_psign<mode>3"
9193 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9195 [(match_operand:MMXMODEI 1 "register_operand" "0")
9196 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9199 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9200 [(set_attr "type" "sselog1")
9201 (set_attr "prefix_extra" "1")
9202 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9203 (set_attr "mode" "DI")])
9205 (define_insn "*avx_palignrti"
9206 [(set (match_operand:TI 0 "register_operand" "=x")
9207 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9208 (match_operand:TI 2 "nonimmediate_operand" "xm")
9209 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9213 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9214 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9216 [(set_attr "type" "sseishft")
9217 (set_attr "prefix_extra" "1")
9218 (set_attr "length_immediate" "1")
9219 (set_attr "prefix" "vex")
9220 (set_attr "mode" "TI")])
9222 (define_insn "ssse3_palignrti"
9223 [(set (match_operand:TI 0 "register_operand" "=x")
9224 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9225 (match_operand:TI 2 "nonimmediate_operand" "xm")
9226 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9230 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9231 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9233 [(set_attr "type" "sseishft")
9234 (set_attr "atom_unit" "sishuf")
9235 (set_attr "prefix_data16" "1")
9236 (set_attr "prefix_extra" "1")
9237 (set_attr "length_immediate" "1")
9238 (set_attr "mode" "TI")])
9240 (define_insn "ssse3_palignrdi"
9241 [(set (match_operand:DI 0 "register_operand" "=y")
9242 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9243 (match_operand:DI 2 "nonimmediate_operand" "ym")
9244 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9248 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9249 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9251 [(set_attr "type" "sseishft")
9252 (set_attr "atom_unit" "sishuf")
9253 (set_attr "prefix_extra" "1")
9254 (set_attr "length_immediate" "1")
9255 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9256 (set_attr "mode" "DI")])
9258 (define_insn "abs<mode>2"
9259 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9260 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9262 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9263 [(set_attr "type" "sselog1")
9264 (set_attr "prefix_data16" "1")
9265 (set_attr "prefix_extra" "1")
9266 (set_attr "prefix" "maybe_vex")
9267 (set_attr "mode" "TI")])
9269 (define_insn "abs<mode>2"
9270 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9271 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9273 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9274 [(set_attr "type" "sselog1")
9275 (set_attr "prefix_rep" "0")
9276 (set_attr "prefix_extra" "1")
9277 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9278 (set_attr "mode" "DI")])
9280 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9282 ;; AMD SSE4A instructions
9284 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9286 (define_insn "sse4a_movnt<mode>"
9287 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9289 [(match_operand:MODEF 1 "register_operand" "x")]
9292 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9293 [(set_attr "type" "ssemov")
9294 (set_attr "mode" "<MODE>")])
9296 (define_insn "sse4a_vmmovnt<mode>"
9297 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9298 (unspec:<ssescalarmode>
9299 [(vec_select:<ssescalarmode>
9300 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9301 (parallel [(const_int 0)]))]
9304 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9305 [(set_attr "type" "ssemov")
9306 (set_attr "mode" "<ssescalarmode>")])
9308 (define_insn "sse4a_extrqi"
9309 [(set (match_operand:V2DI 0 "register_operand" "=x")
9310 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9311 (match_operand 2 "const_int_operand" "")
9312 (match_operand 3 "const_int_operand" "")]
9315 "extrq\t{%3, %2, %0|%0, %2, %3}"
9316 [(set_attr "type" "sse")
9317 (set_attr "prefix_data16" "1")
9318 (set_attr "length_immediate" "2")
9319 (set_attr "mode" "TI")])
9321 (define_insn "sse4a_extrq"
9322 [(set (match_operand:V2DI 0 "register_operand" "=x")
9323 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9324 (match_operand:V16QI 2 "register_operand" "x")]
9327 "extrq\t{%2, %0|%0, %2}"
9328 [(set_attr "type" "sse")
9329 (set_attr "prefix_data16" "1")
9330 (set_attr "mode" "TI")])
9332 (define_insn "sse4a_insertqi"
9333 [(set (match_operand:V2DI 0 "register_operand" "=x")
9334 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9335 (match_operand:V2DI 2 "register_operand" "x")
9336 (match_operand 3 "const_int_operand" "")
9337 (match_operand 4 "const_int_operand" "")]
9340 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9341 [(set_attr "type" "sseins")
9342 (set_attr "prefix_data16" "0")
9343 (set_attr "prefix_rep" "1")
9344 (set_attr "length_immediate" "2")
9345 (set_attr "mode" "TI")])
9347 (define_insn "sse4a_insertq"
9348 [(set (match_operand:V2DI 0 "register_operand" "=x")
9349 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9350 (match_operand:V2DI 2 "register_operand" "x")]
9353 "insertq\t{%2, %0|%0, %2}"
9354 [(set_attr "type" "sseins")
9355 (set_attr "prefix_data16" "0")
9356 (set_attr "prefix_rep" "1")
9357 (set_attr "mode" "TI")])
9359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9361 ;; Intel SSE4.1 instructions
9363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9365 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9366 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9367 (vec_merge:AVXMODEF2P
9368 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9369 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9370 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9372 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9373 [(set_attr "type" "ssemov")
9374 (set_attr "prefix_extra" "1")
9375 (set_attr "length_immediate" "1")
9376 (set_attr "prefix" "vex")
9377 (set_attr "mode" "<avxvecmode>")])
9379 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9380 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9382 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9383 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9384 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9387 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9388 [(set_attr "type" "ssemov")
9389 (set_attr "prefix_extra" "1")
9390 (set_attr "length_immediate" "1")
9391 (set_attr "prefix" "vex")
9392 (set_attr "mode" "<avxvecmode>")])
9394 (define_insn "sse4_1_blend<ssemodesuffix>"
9395 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9396 (vec_merge:SSEMODEF2P
9397 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9398 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9399 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9401 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9402 [(set_attr "type" "ssemov")
9403 (set_attr "prefix_data16" "1")
9404 (set_attr "prefix_extra" "1")
9405 (set_attr "length_immediate" "1")
9406 (set_attr "mode" "<MODE>")])
9408 (define_insn "sse4_1_blendv<ssemodesuffix>"
9409 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9411 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9412 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9413 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9416 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9417 [(set_attr "type" "ssemov")
9418 (set_attr "prefix_data16" "1")
9419 (set_attr "prefix_extra" "1")
9420 (set_attr "mode" "<MODE>")])
9422 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9423 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9425 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9426 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9427 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9430 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9431 [(set_attr "type" "ssemul")
9432 (set_attr "prefix" "vex")
9433 (set_attr "prefix_extra" "1")
9434 (set_attr "length_immediate" "1")
9435 (set_attr "mode" "<avxvecmode>")])
9437 (define_insn "sse4_1_dp<ssemodesuffix>"
9438 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9440 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9441 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9442 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9445 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9446 [(set_attr "type" "ssemul")
9447 (set_attr "prefix_data16" "1")
9448 (set_attr "prefix_extra" "1")
9449 (set_attr "length_immediate" "1")
9450 (set_attr "mode" "<MODE>")])
9452 (define_insn "sse4_1_movntdqa"
9453 [(set (match_operand:V2DI 0 "register_operand" "=x")
9454 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9457 "%vmovntdqa\t{%1, %0|%0, %1}"
9458 [(set_attr "type" "ssemov")
9459 (set_attr "prefix_extra" "1")
9460 (set_attr "prefix" "maybe_vex")
9461 (set_attr "mode" "TI")])
9463 (define_insn "*avx_mpsadbw"
9464 [(set (match_operand:V16QI 0 "register_operand" "=x")
9465 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9466 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9467 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9470 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9471 [(set_attr "type" "sselog1")
9472 (set_attr "prefix" "vex")
9473 (set_attr "prefix_extra" "1")
9474 (set_attr "length_immediate" "1")
9475 (set_attr "mode" "TI")])
9477 (define_insn "sse4_1_mpsadbw"
9478 [(set (match_operand:V16QI 0 "register_operand" "=x")
9479 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9480 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9481 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9484 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9485 [(set_attr "type" "sselog1")
9486 (set_attr "prefix_extra" "1")
9487 (set_attr "length_immediate" "1")
9488 (set_attr "mode" "TI")])
9490 (define_insn "*avx_packusdw"
9491 [(set (match_operand:V8HI 0 "register_operand" "=x")
9494 (match_operand:V4SI 1 "register_operand" "x"))
9496 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9498 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9499 [(set_attr "type" "sselog")
9500 (set_attr "prefix_extra" "1")
9501 (set_attr "prefix" "vex")
9502 (set_attr "mode" "TI")])
9504 (define_insn "sse4_1_packusdw"
9505 [(set (match_operand:V8HI 0 "register_operand" "=x")
9508 (match_operand:V4SI 1 "register_operand" "0"))
9510 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9512 "packusdw\t{%2, %0|%0, %2}"
9513 [(set_attr "type" "sselog")
9514 (set_attr "prefix_extra" "1")
9515 (set_attr "mode" "TI")])
9517 (define_insn "*avx_pblendvb"
9518 [(set (match_operand:V16QI 0 "register_operand" "=x")
9519 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9520 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9521 (match_operand:V16QI 3 "register_operand" "x")]
9524 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9525 [(set_attr "type" "ssemov")
9526 (set_attr "prefix_extra" "1")
9527 (set_attr "length_immediate" "1")
9528 (set_attr "prefix" "vex")
9529 (set_attr "mode" "TI")])
9531 (define_insn "sse4_1_pblendvb"
9532 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9533 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9534 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9535 (match_operand:V16QI 3 "register_operand" "Yz")]
9538 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9539 [(set_attr "type" "ssemov")
9540 (set_attr "prefix_extra" "1")
9541 (set_attr "mode" "TI")])
9543 (define_insn "*avx_pblendw"
9544 [(set (match_operand:V8HI 0 "register_operand" "=x")
9546 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9547 (match_operand:V8HI 1 "register_operand" "x")
9548 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9550 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9551 [(set_attr "type" "ssemov")
9552 (set_attr "prefix" "vex")
9553 (set_attr "prefix_extra" "1")
9554 (set_attr "length_immediate" "1")
9555 (set_attr "mode" "TI")])
9557 (define_insn "sse4_1_pblendw"
9558 [(set (match_operand:V8HI 0 "register_operand" "=x")
9560 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9561 (match_operand:V8HI 1 "register_operand" "0")
9562 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9564 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9565 [(set_attr "type" "ssemov")
9566 (set_attr "prefix_extra" "1")
9567 (set_attr "length_immediate" "1")
9568 (set_attr "mode" "TI")])
9570 (define_insn "sse4_1_phminposuw"
9571 [(set (match_operand:V8HI 0 "register_operand" "=x")
9572 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9573 UNSPEC_PHMINPOSUW))]
9575 "%vphminposuw\t{%1, %0|%0, %1}"
9576 [(set_attr "type" "sselog1")
9577 (set_attr "prefix_extra" "1")
9578 (set_attr "prefix" "maybe_vex")
9579 (set_attr "mode" "TI")])
9581 (define_insn "sse4_1_extendv8qiv8hi2"
9582 [(set (match_operand:V8HI 0 "register_operand" "=x")
9585 (match_operand:V16QI 1 "register_operand" "x")
9586 (parallel [(const_int 0)
9595 "%vpmovsxbw\t{%1, %0|%0, %1}"
9596 [(set_attr "type" "ssemov")
9597 (set_attr "prefix_extra" "1")
9598 (set_attr "prefix" "maybe_vex")
9599 (set_attr "mode" "TI")])
9601 (define_insn "*sse4_1_extendv8qiv8hi2"
9602 [(set (match_operand:V8HI 0 "register_operand" "=x")
9605 (vec_duplicate:V16QI
9606 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9607 (parallel [(const_int 0)
9616 "%vpmovsxbw\t{%1, %0|%0, %1}"
9617 [(set_attr "type" "ssemov")
9618 (set_attr "prefix_extra" "1")
9619 (set_attr "prefix" "maybe_vex")
9620 (set_attr "mode" "TI")])
9622 (define_insn "sse4_1_extendv4qiv4si2"
9623 [(set (match_operand:V4SI 0 "register_operand" "=x")
9626 (match_operand:V16QI 1 "register_operand" "x")
9627 (parallel [(const_int 0)
9632 "%vpmovsxbd\t{%1, %0|%0, %1}"
9633 [(set_attr "type" "ssemov")
9634 (set_attr "prefix_extra" "1")
9635 (set_attr "prefix" "maybe_vex")
9636 (set_attr "mode" "TI")])
9638 (define_insn "*sse4_1_extendv4qiv4si2"
9639 [(set (match_operand:V4SI 0 "register_operand" "=x")
9642 (vec_duplicate:V16QI
9643 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9644 (parallel [(const_int 0)
9649 "%vpmovsxbd\t{%1, %0|%0, %1}"
9650 [(set_attr "type" "ssemov")
9651 (set_attr "prefix_extra" "1")
9652 (set_attr "prefix" "maybe_vex")
9653 (set_attr "mode" "TI")])
9655 (define_insn "sse4_1_extendv2qiv2di2"
9656 [(set (match_operand:V2DI 0 "register_operand" "=x")
9659 (match_operand:V16QI 1 "register_operand" "x")
9660 (parallel [(const_int 0)
9663 "%vpmovsxbq\t{%1, %0|%0, %1}"
9664 [(set_attr "type" "ssemov")
9665 (set_attr "prefix_extra" "1")
9666 (set_attr "prefix" "maybe_vex")
9667 (set_attr "mode" "TI")])
9669 (define_insn "*sse4_1_extendv2qiv2di2"
9670 [(set (match_operand:V2DI 0 "register_operand" "=x")
9673 (vec_duplicate:V16QI
9674 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9675 (parallel [(const_int 0)
9678 "%vpmovsxbq\t{%1, %0|%0, %1}"
9679 [(set_attr "type" "ssemov")
9680 (set_attr "prefix_extra" "1")
9681 (set_attr "prefix" "maybe_vex")
9682 (set_attr "mode" "TI")])
9684 (define_insn "sse4_1_extendv4hiv4si2"
9685 [(set (match_operand:V4SI 0 "register_operand" "=x")
9688 (match_operand:V8HI 1 "register_operand" "x")
9689 (parallel [(const_int 0)
9694 "%vpmovsxwd\t{%1, %0|%0, %1}"
9695 [(set_attr "type" "ssemov")
9696 (set_attr "prefix_extra" "1")
9697 (set_attr "prefix" "maybe_vex")
9698 (set_attr "mode" "TI")])
9700 (define_insn "*sse4_1_extendv4hiv4si2"
9701 [(set (match_operand:V4SI 0 "register_operand" "=x")
9705 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9706 (parallel [(const_int 0)
9711 "%vpmovsxwd\t{%1, %0|%0, %1}"
9712 [(set_attr "type" "ssemov")
9713 (set_attr "prefix_extra" "1")
9714 (set_attr "prefix" "maybe_vex")
9715 (set_attr "mode" "TI")])
9717 (define_insn "sse4_1_extendv2hiv2di2"
9718 [(set (match_operand:V2DI 0 "register_operand" "=x")
9721 (match_operand:V8HI 1 "register_operand" "x")
9722 (parallel [(const_int 0)
9725 "%vpmovsxwq\t{%1, %0|%0, %1}"
9726 [(set_attr "type" "ssemov")
9727 (set_attr "prefix_extra" "1")
9728 (set_attr "prefix" "maybe_vex")
9729 (set_attr "mode" "TI")])
9731 (define_insn "*sse4_1_extendv2hiv2di2"
9732 [(set (match_operand:V2DI 0 "register_operand" "=x")
9736 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9737 (parallel [(const_int 0)
9740 "%vpmovsxwq\t{%1, %0|%0, %1}"
9741 [(set_attr "type" "ssemov")
9742 (set_attr "prefix_extra" "1")
9743 (set_attr "prefix" "maybe_vex")
9744 (set_attr "mode" "TI")])
9746 (define_insn "sse4_1_extendv2siv2di2"
9747 [(set (match_operand:V2DI 0 "register_operand" "=x")
9750 (match_operand:V4SI 1 "register_operand" "x")
9751 (parallel [(const_int 0)
9754 "%vpmovsxdq\t{%1, %0|%0, %1}"
9755 [(set_attr "type" "ssemov")
9756 (set_attr "prefix_extra" "1")
9757 (set_attr "prefix" "maybe_vex")
9758 (set_attr "mode" "TI")])
9760 (define_insn "*sse4_1_extendv2siv2di2"
9761 [(set (match_operand:V2DI 0 "register_operand" "=x")
9765 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9766 (parallel [(const_int 0)
9769 "%vpmovsxdq\t{%1, %0|%0, %1}"
9770 [(set_attr "type" "ssemov")
9771 (set_attr "prefix_extra" "1")
9772 (set_attr "prefix" "maybe_vex")
9773 (set_attr "mode" "TI")])
9775 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9776 [(set (match_operand:V8HI 0 "register_operand" "=x")
9779 (match_operand:V16QI 1 "register_operand" "x")
9780 (parallel [(const_int 0)
9789 "%vpmovzxbw\t{%1, %0|%0, %1}"
9790 [(set_attr "type" "ssemov")
9791 (set_attr "prefix_extra" "1")
9792 (set_attr "prefix" "maybe_vex")
9793 (set_attr "mode" "TI")])
9795 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9796 [(set (match_operand:V8HI 0 "register_operand" "=x")
9799 (vec_duplicate:V16QI
9800 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9801 (parallel [(const_int 0)
9810 "%vpmovzxbw\t{%1, %0|%0, %1}"
9811 [(set_attr "type" "ssemov")
9812 (set_attr "prefix_extra" "1")
9813 (set_attr "prefix" "maybe_vex")
9814 (set_attr "mode" "TI")])
9816 (define_insn "sse4_1_zero_extendv4qiv4si2"
9817 [(set (match_operand:V4SI 0 "register_operand" "=x")
9820 (match_operand:V16QI 1 "register_operand" "x")
9821 (parallel [(const_int 0)
9826 "%vpmovzxbd\t{%1, %0|%0, %1}"
9827 [(set_attr "type" "ssemov")
9828 (set_attr "prefix_extra" "1")
9829 (set_attr "prefix" "maybe_vex")
9830 (set_attr "mode" "TI")])
9832 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9833 [(set (match_operand:V4SI 0 "register_operand" "=x")
9836 (vec_duplicate:V16QI
9837 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9838 (parallel [(const_int 0)
9843 "%vpmovzxbd\t{%1, %0|%0, %1}"
9844 [(set_attr "type" "ssemov")
9845 (set_attr "prefix_extra" "1")
9846 (set_attr "prefix" "maybe_vex")
9847 (set_attr "mode" "TI")])
9849 (define_insn "sse4_1_zero_extendv2qiv2di2"
9850 [(set (match_operand:V2DI 0 "register_operand" "=x")
9853 (match_operand:V16QI 1 "register_operand" "x")
9854 (parallel [(const_int 0)
9857 "%vpmovzxbq\t{%1, %0|%0, %1}"
9858 [(set_attr "type" "ssemov")
9859 (set_attr "prefix_extra" "1")
9860 (set_attr "prefix" "maybe_vex")
9861 (set_attr "mode" "TI")])
9863 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9864 [(set (match_operand:V2DI 0 "register_operand" "=x")
9867 (vec_duplicate:V16QI
9868 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9869 (parallel [(const_int 0)
9872 "%vpmovzxbq\t{%1, %0|%0, %1}"
9873 [(set_attr "type" "ssemov")
9874 (set_attr "prefix_extra" "1")
9875 (set_attr "prefix" "maybe_vex")
9876 (set_attr "mode" "TI")])
9878 (define_insn "sse4_1_zero_extendv4hiv4si2"
9879 [(set (match_operand:V4SI 0 "register_operand" "=x")
9882 (match_operand:V8HI 1 "register_operand" "x")
9883 (parallel [(const_int 0)
9888 "%vpmovzxwd\t{%1, %0|%0, %1}"
9889 [(set_attr "type" "ssemov")
9890 (set_attr "prefix_extra" "1")
9891 (set_attr "prefix" "maybe_vex")
9892 (set_attr "mode" "TI")])
9894 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9895 [(set (match_operand:V4SI 0 "register_operand" "=x")
9899 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9900 (parallel [(const_int 0)
9905 "%vpmovzxwd\t{%1, %0|%0, %1}"
9906 [(set_attr "type" "ssemov")
9907 (set_attr "prefix_extra" "1")
9908 (set_attr "prefix" "maybe_vex")
9909 (set_attr "mode" "TI")])
9911 (define_insn "sse4_1_zero_extendv2hiv2di2"
9912 [(set (match_operand:V2DI 0 "register_operand" "=x")
9915 (match_operand:V8HI 1 "register_operand" "x")
9916 (parallel [(const_int 0)
9919 "%vpmovzxwq\t{%1, %0|%0, %1}"
9920 [(set_attr "type" "ssemov")
9921 (set_attr "prefix_extra" "1")
9922 (set_attr "prefix" "maybe_vex")
9923 (set_attr "mode" "TI")])
9925 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9926 [(set (match_operand:V2DI 0 "register_operand" "=x")
9930 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9931 (parallel [(const_int 0)
9934 "%vpmovzxwq\t{%1, %0|%0, %1}"
9935 [(set_attr "type" "ssemov")
9936 (set_attr "prefix_extra" "1")
9937 (set_attr "prefix" "maybe_vex")
9938 (set_attr "mode" "TI")])
9940 (define_insn "sse4_1_zero_extendv2siv2di2"
9941 [(set (match_operand:V2DI 0 "register_operand" "=x")
9944 (match_operand:V4SI 1 "register_operand" "x")
9945 (parallel [(const_int 0)
9948 "%vpmovzxdq\t{%1, %0|%0, %1}"
9949 [(set_attr "type" "ssemov")
9950 (set_attr "prefix_extra" "1")
9951 (set_attr "prefix" "maybe_vex")
9952 (set_attr "mode" "TI")])
9954 (define_insn "*sse4_1_zero_extendv2siv2di2"
9955 [(set (match_operand:V2DI 0 "register_operand" "=x")
9959 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9960 (parallel [(const_int 0)
9963 "%vpmovzxdq\t{%1, %0|%0, %1}"
9964 [(set_attr "type" "ssemov")
9965 (set_attr "prefix_extra" "1")
9966 (set_attr "prefix" "maybe_vex")
9967 (set_attr "mode" "TI")])
9969 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9970 ;; setting FLAGS_REG. But it is not a really compare instruction.
9971 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9972 [(set (reg:CC FLAGS_REG)
9973 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9974 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9977 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9978 [(set_attr "type" "ssecomi")
9979 (set_attr "prefix_extra" "1")
9980 (set_attr "prefix" "vex")
9981 (set_attr "mode" "<MODE>")])
9983 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9984 ;; But it is not a really compare instruction.
9985 (define_insn "avx_ptest256"
9986 [(set (reg:CC FLAGS_REG)
9987 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9988 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9991 "vptest\t{%1, %0|%0, %1}"
9992 [(set_attr "type" "ssecomi")
9993 (set_attr "prefix_extra" "1")
9994 (set_attr "prefix" "vex")
9995 (set_attr "mode" "OI")])
9997 (define_insn "sse4_1_ptest"
9998 [(set (reg:CC FLAGS_REG)
9999 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
10000 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10003 "%vptest\t{%1, %0|%0, %1}"
10004 [(set_attr "type" "ssecomi")
10005 (set_attr "prefix_extra" "1")
10006 (set_attr "prefix" "maybe_vex")
10007 (set_attr "mode" "TI")])
10009 (define_insn "avx_round<ssemodesuffix>256"
10010 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
10011 (unspec:AVX256MODEF2P
10012 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
10013 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10016 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10017 [(set_attr "type" "ssecvt")
10018 (set_attr "prefix_extra" "1")
10019 (set_attr "length_immediate" "1")
10020 (set_attr "prefix" "vex")
10021 (set_attr "mode" "<MODE>")])
10023 (define_insn "sse4_1_round<ssemodesuffix>"
10024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10026 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
10027 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10030 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10031 [(set_attr "type" "ssecvt")
10032 (set_attr "prefix_data16" "1")
10033 (set_attr "prefix_extra" "1")
10034 (set_attr "length_immediate" "1")
10035 (set_attr "prefix" "maybe_vex")
10036 (set_attr "mode" "<MODE>")])
10038 (define_insn "*avx_round<ssescalarmodesuffix>"
10039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10040 (vec_merge:SSEMODEF2P
10042 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10043 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10045 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10048 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10049 [(set_attr "type" "ssecvt")
10050 (set_attr "prefix_extra" "1")
10051 (set_attr "length_immediate" "1")
10052 (set_attr "prefix" "vex")
10053 (set_attr "mode" "<MODE>")])
10055 (define_insn "sse4_1_round<ssescalarmodesuffix>"
10056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10057 (vec_merge:SSEMODEF2P
10059 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10060 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10062 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10065 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10066 [(set_attr "type" "ssecvt")
10067 (set_attr "prefix_data16" "1")
10068 (set_attr "prefix_extra" "1")
10069 (set_attr "length_immediate" "1")
10070 (set_attr "mode" "<MODE>")])
10072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10074 ;; Intel SSE4.2 string/text processing instructions
10076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10078 (define_insn_and_split "sse4_2_pcmpestr"
10079 [(set (match_operand:SI 0 "register_operand" "=c,c")
10081 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10082 (match_operand:SI 3 "register_operand" "a,a")
10083 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10084 (match_operand:SI 5 "register_operand" "d,d")
10085 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10087 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10095 (set (reg:CC FLAGS_REG)
10104 && can_create_pseudo_p ()"
10109 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10110 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10111 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10114 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10115 operands[3], operands[4],
10116 operands[5], operands[6]));
10118 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10119 operands[3], operands[4],
10120 operands[5], operands[6]));
10121 if (flags && !(ecx || xmm0))
10122 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10123 operands[2], operands[3],
10124 operands[4], operands[5],
10128 [(set_attr "type" "sselog")
10129 (set_attr "prefix_data16" "1")
10130 (set_attr "prefix_extra" "1")
10131 (set_attr "length_immediate" "1")
10132 (set_attr "memory" "none,load")
10133 (set_attr "mode" "TI")])
10135 (define_insn "sse4_2_pcmpestri"
10136 [(set (match_operand:SI 0 "register_operand" "=c,c")
10138 [(match_operand:V16QI 1 "register_operand" "x,x")
10139 (match_operand:SI 2 "register_operand" "a,a")
10140 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10141 (match_operand:SI 4 "register_operand" "d,d")
10142 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10144 (set (reg:CC FLAGS_REG)
10153 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10154 [(set_attr "type" "sselog")
10155 (set_attr "prefix_data16" "1")
10156 (set_attr "prefix_extra" "1")
10157 (set_attr "prefix" "maybe_vex")
10158 (set_attr "length_immediate" "1")
10159 (set_attr "memory" "none,load")
10160 (set_attr "mode" "TI")])
10162 (define_insn "sse4_2_pcmpestrm"
10163 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10165 [(match_operand:V16QI 1 "register_operand" "x,x")
10166 (match_operand:SI 2 "register_operand" "a,a")
10167 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10168 (match_operand:SI 4 "register_operand" "d,d")
10169 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10171 (set (reg:CC FLAGS_REG)
10180 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10181 [(set_attr "type" "sselog")
10182 (set_attr "prefix_data16" "1")
10183 (set_attr "prefix_extra" "1")
10184 (set_attr "length_immediate" "1")
10185 (set_attr "prefix" "maybe_vex")
10186 (set_attr "memory" "none,load")
10187 (set_attr "mode" "TI")])
10189 (define_insn "sse4_2_pcmpestr_cconly"
10190 [(set (reg:CC FLAGS_REG)
10192 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10193 (match_operand:SI 3 "register_operand" "a,a,a,a")
10194 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10195 (match_operand:SI 5 "register_operand" "d,d,d,d")
10196 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10198 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10199 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10202 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10203 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10204 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10205 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10206 [(set_attr "type" "sselog")
10207 (set_attr "prefix_data16" "1")
10208 (set_attr "prefix_extra" "1")
10209 (set_attr "length_immediate" "1")
10210 (set_attr "memory" "none,load,none,load")
10211 (set_attr "prefix" "maybe_vex")
10212 (set_attr "mode" "TI")])
10214 (define_insn_and_split "sse4_2_pcmpistr"
10215 [(set (match_operand:SI 0 "register_operand" "=c,c")
10217 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10218 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10219 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10221 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10227 (set (reg:CC FLAGS_REG)
10234 && can_create_pseudo_p ()"
10239 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10240 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10241 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10244 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10245 operands[3], operands[4]));
10247 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10248 operands[3], operands[4]));
10249 if (flags && !(ecx || xmm0))
10250 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10251 operands[2], operands[3],
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "memory" "none,load")
10260 (set_attr "mode" "TI")])
10262 (define_insn "sse4_2_pcmpistri"
10263 [(set (match_operand:SI 0 "register_operand" "=c,c")
10265 [(match_operand:V16QI 1 "register_operand" "x,x")
10266 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10267 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10269 (set (reg:CC FLAGS_REG)
10276 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10277 [(set_attr "type" "sselog")
10278 (set_attr "prefix_data16" "1")
10279 (set_attr "prefix_extra" "1")
10280 (set_attr "length_immediate" "1")
10281 (set_attr "prefix" "maybe_vex")
10282 (set_attr "memory" "none,load")
10283 (set_attr "mode" "TI")])
10285 (define_insn "sse4_2_pcmpistrm"
10286 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10288 [(match_operand:V16QI 1 "register_operand" "x,x")
10289 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10290 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10292 (set (reg:CC FLAGS_REG)
10299 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10300 [(set_attr "type" "sselog")
10301 (set_attr "prefix_data16" "1")
10302 (set_attr "prefix_extra" "1")
10303 (set_attr "length_immediate" "1")
10304 (set_attr "prefix" "maybe_vex")
10305 (set_attr "memory" "none,load")
10306 (set_attr "mode" "TI")])
10308 (define_insn "sse4_2_pcmpistr_cconly"
10309 [(set (reg:CC FLAGS_REG)
10311 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10312 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10313 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10315 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10316 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10319 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10320 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10321 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10322 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10323 [(set_attr "type" "sselog")
10324 (set_attr "prefix_data16" "1")
10325 (set_attr "prefix_extra" "1")
10326 (set_attr "length_immediate" "1")
10327 (set_attr "memory" "none,load,none,load")
10328 (set_attr "prefix" "maybe_vex")
10329 (set_attr "mode" "TI")])
10331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10333 ;; XOP instructions
10335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10337 ;; XOP parallel integer multiply/add instructions.
10338 ;; Note the XOP multiply/add instructions
10339 ;; a[i] = b[i] * c[i] + d[i];
10340 ;; do not allow the value being added to be a memory operation.
10341 (define_insn "xop_pmacsww"
10342 [(set (match_operand:V8HI 0 "register_operand" "=x")
10345 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10346 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10347 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10349 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10350 [(set_attr "type" "ssemuladd")
10351 (set_attr "mode" "TI")])
10353 (define_insn "xop_pmacssww"
10354 [(set (match_operand:V8HI 0 "register_operand" "=x")
10356 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10357 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10358 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10360 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10361 [(set_attr "type" "ssemuladd")
10362 (set_attr "mode" "TI")])
10364 (define_insn "xop_pmacsdd"
10365 [(set (match_operand:V4SI 0 "register_operand" "=x")
10368 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10369 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10370 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10372 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10373 [(set_attr "type" "ssemuladd")
10374 (set_attr "mode" "TI")])
10376 (define_insn "xop_pmacssdd"
10377 [(set (match_operand:V4SI 0 "register_operand" "=x")
10379 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10380 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10381 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10383 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10384 [(set_attr "type" "ssemuladd")
10385 (set_attr "mode" "TI")])
10387 (define_insn "xop_pmacssdql"
10388 [(set (match_operand:V2DI 0 "register_operand" "=x")
10393 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10394 (parallel [(const_int 1)
10397 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10398 (parallel [(const_int 1)
10400 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10402 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10403 [(set_attr "type" "ssemuladd")
10404 (set_attr "mode" "TI")])
10406 (define_insn "xop_pmacssdqh"
10407 [(set (match_operand:V2DI 0 "register_operand" "=x")
10412 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10413 (parallel [(const_int 0)
10417 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10418 (parallel [(const_int 0)
10420 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10422 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10423 [(set_attr "type" "ssemuladd")
10424 (set_attr "mode" "TI")])
10426 (define_insn "xop_pmacsdql"
10427 [(set (match_operand:V2DI 0 "register_operand" "=x")
10432 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10433 (parallel [(const_int 1)
10437 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10438 (parallel [(const_int 1)
10440 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10442 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10443 [(set_attr "type" "ssemuladd")
10444 (set_attr "mode" "TI")])
10446 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10447 ;; fake it with a multiply/add. In general, we expect the define_split to
10448 ;; occur before register allocation, so we have to handle the corner case where
10449 ;; the target is the same as operands 1/2
10450 (define_insn_and_split "xop_mulv2div2di3_low"
10451 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10455 (match_operand:V4SI 1 "register_operand" "%x")
10456 (parallel [(const_int 1)
10460 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10461 (parallel [(const_int 1)
10462 (const_int 3)])))))]
10465 "&& reload_completed"
10466 [(set (match_dup 0)
10474 (parallel [(const_int 1)
10479 (parallel [(const_int 1)
10483 operands[3] = CONST0_RTX (V2DImode);
10485 [(set_attr "type" "ssemul")
10486 (set_attr "mode" "TI")])
10488 (define_insn "xop_pmacsdqh"
10489 [(set (match_operand:V2DI 0 "register_operand" "=x")
10494 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10495 (parallel [(const_int 0)
10499 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10500 (parallel [(const_int 0)
10502 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10504 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10505 [(set_attr "type" "ssemuladd")
10506 (set_attr "mode" "TI")])
10508 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10509 ;; fake it with a multiply/add. In general, we expect the define_split to
10510 ;; occur before register allocation, so we have to handle the corner case where
10511 ;; the target is the same as either operands[1] or operands[2]
10512 (define_insn_and_split "xop_mulv2div2di3_high"
10513 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10517 (match_operand:V4SI 1 "register_operand" "%x")
10518 (parallel [(const_int 0)
10522 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10523 (parallel [(const_int 0)
10524 (const_int 2)])))))]
10527 "&& reload_completed"
10528 [(set (match_dup 0)
10536 (parallel [(const_int 0)
10541 (parallel [(const_int 0)
10545 operands[3] = CONST0_RTX (V2DImode);
10547 [(set_attr "type" "ssemul")
10548 (set_attr "mode" "TI")])
10550 ;; XOP parallel integer multiply/add instructions for the intrinisics
10551 (define_insn "xop_pmacsswd"
10552 [(set (match_operand:V4SI 0 "register_operand" "=x")
10557 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10558 (parallel [(const_int 1)
10564 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10565 (parallel [(const_int 1)
10569 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10571 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10572 [(set_attr "type" "ssemuladd")
10573 (set_attr "mode" "TI")])
10575 (define_insn "xop_pmacswd"
10576 [(set (match_operand:V4SI 0 "register_operand" "=x")
10581 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10582 (parallel [(const_int 1)
10588 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10589 (parallel [(const_int 1)
10593 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10595 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10596 [(set_attr "type" "ssemuladd")
10597 (set_attr "mode" "TI")])
10599 (define_insn "xop_pmadcsswd"
10600 [(set (match_operand:V4SI 0 "register_operand" "=x")
10606 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10607 (parallel [(const_int 0)
10613 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10614 (parallel [(const_int 0)
10622 (parallel [(const_int 1)
10629 (parallel [(const_int 1)
10632 (const_int 7)])))))
10633 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10635 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10636 [(set_attr "type" "ssemuladd")
10637 (set_attr "mode" "TI")])
10639 (define_insn "xop_pmadcswd"
10640 [(set (match_operand:V4SI 0 "register_operand" "=x")
10646 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10647 (parallel [(const_int 0)
10653 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10654 (parallel [(const_int 0)
10662 (parallel [(const_int 1)
10669 (parallel [(const_int 1)
10672 (const_int 7)])))))
10673 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10675 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10676 [(set_attr "type" "ssemuladd")
10677 (set_attr "mode" "TI")])
10679 ;; XOP parallel XMM conditional moves
10680 (define_insn "xop_pcmov_<mode>"
10681 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10682 (if_then_else:SSEMODE
10683 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10684 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10685 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10687 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10688 [(set_attr "type" "sse4arg")])
10690 (define_insn "xop_pcmov_<mode>256"
10691 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10692 (if_then_else:AVX256MODE
10693 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10694 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10695 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10697 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10698 [(set_attr "type" "sse4arg")])
10700 ;; XOP horizontal add/subtract instructions
10701 (define_insn "xop_phaddbw"
10702 [(set (match_operand:V8HI 0 "register_operand" "=x")
10706 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10707 (parallel [(const_int 0)
10718 (parallel [(const_int 1)
10725 (const_int 15)])))))]
10727 "vphaddbw\t{%1, %0|%0, %1}"
10728 [(set_attr "type" "sseiadd1")])
10730 (define_insn "xop_phaddbd"
10731 [(set (match_operand:V4SI 0 "register_operand" "=x")
10736 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10737 (parallel [(const_int 0)
10744 (parallel [(const_int 1)
10747 (const_int 13)]))))
10752 (parallel [(const_int 2)
10759 (parallel [(const_int 3)
10762 (const_int 15)]))))))]
10764 "vphaddbd\t{%1, %0|%0, %1}"
10765 [(set_attr "type" "sseiadd1")])
10767 (define_insn "xop_phaddbq"
10768 [(set (match_operand:V2DI 0 "register_operand" "=x")
10774 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10775 (parallel [(const_int 0)
10780 (parallel [(const_int 1)
10786 (parallel [(const_int 2)
10791 (parallel [(const_int 3)
10792 (const_int 7)])))))
10798 (parallel [(const_int 8)
10803 (parallel [(const_int 9)
10804 (const_int 13)]))))
10809 (parallel [(const_int 10)
10814 (parallel [(const_int 11)
10815 (const_int 15)])))))))]
10817 "vphaddbq\t{%1, %0|%0, %1}"
10818 [(set_attr "type" "sseiadd1")])
10820 (define_insn "xop_phaddwd"
10821 [(set (match_operand:V4SI 0 "register_operand" "=x")
10825 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10826 (parallel [(const_int 0)
10833 (parallel [(const_int 1)
10836 (const_int 7)])))))]
10838 "vphaddwd\t{%1, %0|%0, %1}"
10839 [(set_attr "type" "sseiadd1")])
10841 (define_insn "xop_phaddwq"
10842 [(set (match_operand:V2DI 0 "register_operand" "=x")
10847 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10848 (parallel [(const_int 0)
10853 (parallel [(const_int 1)
10859 (parallel [(const_int 2)
10864 (parallel [(const_int 3)
10865 (const_int 7)]))))))]
10867 "vphaddwq\t{%1, %0|%0, %1}"
10868 [(set_attr "type" "sseiadd1")])
10870 (define_insn "xop_phadddq"
10871 [(set (match_operand:V2DI 0 "register_operand" "=x")
10875 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10876 (parallel [(const_int 0)
10881 (parallel [(const_int 1)
10882 (const_int 3)])))))]
10884 "vphadddq\t{%1, %0|%0, %1}"
10885 [(set_attr "type" "sseiadd1")])
10887 (define_insn "xop_phaddubw"
10888 [(set (match_operand:V8HI 0 "register_operand" "=x")
10892 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10893 (parallel [(const_int 0)
10904 (parallel [(const_int 1)
10911 (const_int 15)])))))]
10913 "vphaddubw\t{%1, %0|%0, %1}"
10914 [(set_attr "type" "sseiadd1")])
10916 (define_insn "xop_phaddubd"
10917 [(set (match_operand:V4SI 0 "register_operand" "=x")
10922 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10923 (parallel [(const_int 0)
10930 (parallel [(const_int 1)
10933 (const_int 13)]))))
10938 (parallel [(const_int 2)
10945 (parallel [(const_int 3)
10948 (const_int 15)]))))))]
10950 "vphaddubd\t{%1, %0|%0, %1}"
10951 [(set_attr "type" "sseiadd1")])
10953 (define_insn "xop_phaddubq"
10954 [(set (match_operand:V2DI 0 "register_operand" "=x")
10960 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10961 (parallel [(const_int 0)
10966 (parallel [(const_int 1)
10972 (parallel [(const_int 2)
10977 (parallel [(const_int 3)
10978 (const_int 7)])))))
10984 (parallel [(const_int 8)
10989 (parallel [(const_int 9)
10990 (const_int 13)]))))
10995 (parallel [(const_int 10)
11000 (parallel [(const_int 11)
11001 (const_int 15)])))))))]
11003 "vphaddubq\t{%1, %0|%0, %1}"
11004 [(set_attr "type" "sseiadd1")])
11006 (define_insn "xop_phadduwd"
11007 [(set (match_operand:V4SI 0 "register_operand" "=x")
11011 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11012 (parallel [(const_int 0)
11019 (parallel [(const_int 1)
11022 (const_int 7)])))))]
11024 "vphadduwd\t{%1, %0|%0, %1}"
11025 [(set_attr "type" "sseiadd1")])
11027 (define_insn "xop_phadduwq"
11028 [(set (match_operand:V2DI 0 "register_operand" "=x")
11033 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11034 (parallel [(const_int 0)
11039 (parallel [(const_int 1)
11045 (parallel [(const_int 2)
11050 (parallel [(const_int 3)
11051 (const_int 7)]))))))]
11053 "vphadduwq\t{%1, %0|%0, %1}"
11054 [(set_attr "type" "sseiadd1")])
11056 (define_insn "xop_phaddudq"
11057 [(set (match_operand:V2DI 0 "register_operand" "=x")
11061 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11062 (parallel [(const_int 0)
11067 (parallel [(const_int 1)
11068 (const_int 3)])))))]
11070 "vphaddudq\t{%1, %0|%0, %1}"
11071 [(set_attr "type" "sseiadd1")])
11073 (define_insn "xop_phsubbw"
11074 [(set (match_operand:V8HI 0 "register_operand" "=x")
11078 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11079 (parallel [(const_int 0)
11090 (parallel [(const_int 1)
11097 (const_int 15)])))))]
11099 "vphsubbw\t{%1, %0|%0, %1}"
11100 [(set_attr "type" "sseiadd1")])
11102 (define_insn "xop_phsubwd"
11103 [(set (match_operand:V4SI 0 "register_operand" "=x")
11107 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11108 (parallel [(const_int 0)
11115 (parallel [(const_int 1)
11118 (const_int 7)])))))]
11120 "vphsubwd\t{%1, %0|%0, %1}"
11121 [(set_attr "type" "sseiadd1")])
11123 (define_insn "xop_phsubdq"
11124 [(set (match_operand:V2DI 0 "register_operand" "=x")
11128 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11129 (parallel [(const_int 0)
11134 (parallel [(const_int 1)
11135 (const_int 3)])))))]
11137 "vphsubdq\t{%1, %0|%0, %1}"
11138 [(set_attr "type" "sseiadd1")])
11140 ;; XOP permute instructions
11141 (define_insn "xop_pperm"
11142 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11144 [(match_operand:V16QI 1 "register_operand" "x,x")
11145 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11146 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11147 UNSPEC_XOP_PERMUTE))]
11148 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11149 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11150 [(set_attr "type" "sse4arg")
11151 (set_attr "mode" "TI")])
11153 ;; XOP pack instructions that combine two vectors into a smaller vector
11154 (define_insn "xop_pperm_pack_v2di_v4si"
11155 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11158 (match_operand:V2DI 1 "register_operand" "x,x"))
11160 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11161 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11162 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11163 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11164 [(set_attr "type" "sse4arg")
11165 (set_attr "mode" "TI")])
11167 (define_insn "xop_pperm_pack_v4si_v8hi"
11168 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11171 (match_operand:V4SI 1 "register_operand" "x,x"))
11173 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11174 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11175 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11176 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11177 [(set_attr "type" "sse4arg")
11178 (set_attr "mode" "TI")])
11180 (define_insn "xop_pperm_pack_v8hi_v16qi"
11181 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11184 (match_operand:V8HI 1 "register_operand" "x,x"))
11186 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11187 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11188 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11189 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11190 [(set_attr "type" "sse4arg")
11191 (set_attr "mode" "TI")])
11193 ;; XOP packed rotate instructions
11194 (define_expand "rotl<mode>3"
11195 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11196 (rotate:SSEMODE1248
11197 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11198 (match_operand:SI 2 "general_operand")))]
11201 /* If we were given a scalar, convert it to parallel */
11202 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11204 rtvec vs = rtvec_alloc (<ssescalarnum>);
11205 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11206 rtx reg = gen_reg_rtx (<MODE>mode);
11207 rtx op2 = operands[2];
11210 if (GET_MODE (op2) != <ssescalarmode>mode)
11212 op2 = gen_reg_rtx (<ssescalarmode>mode);
11213 convert_move (op2, operands[2], false);
11216 for (i = 0; i < <ssescalarnum>; i++)
11217 RTVEC_ELT (vs, i) = op2;
11219 emit_insn (gen_vec_init<mode> (reg, par));
11220 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11225 (define_expand "rotr<mode>3"
11226 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11227 (rotatert:SSEMODE1248
11228 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11229 (match_operand:SI 2 "general_operand")))]
11232 /* If we were given a scalar, convert it to parallel */
11233 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11235 rtvec vs = rtvec_alloc (<ssescalarnum>);
11236 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11237 rtx neg = gen_reg_rtx (<MODE>mode);
11238 rtx reg = gen_reg_rtx (<MODE>mode);
11239 rtx op2 = operands[2];
11242 if (GET_MODE (op2) != <ssescalarmode>mode)
11244 op2 = gen_reg_rtx (<ssescalarmode>mode);
11245 convert_move (op2, operands[2], false);
11248 for (i = 0; i < <ssescalarnum>; i++)
11249 RTVEC_ELT (vs, i) = op2;
11251 emit_insn (gen_vec_init<mode> (reg, par));
11252 emit_insn (gen_neg<mode>2 (neg, reg));
11253 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11258 (define_insn "xop_rotl<mode>3"
11259 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11260 (rotate:SSEMODE1248
11261 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11262 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11264 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11265 [(set_attr "type" "sseishft")
11266 (set_attr "length_immediate" "1")
11267 (set_attr "mode" "TI")])
11269 (define_insn "xop_rotr<mode>3"
11270 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11271 (rotatert:SSEMODE1248
11272 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11273 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11276 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11277 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11279 [(set_attr "type" "sseishft")
11280 (set_attr "length_immediate" "1")
11281 (set_attr "mode" "TI")])
11283 (define_expand "vrotr<mode>3"
11284 [(match_operand:SSEMODE1248 0 "register_operand" "")
11285 (match_operand:SSEMODE1248 1 "register_operand" "")
11286 (match_operand:SSEMODE1248 2 "register_operand" "")]
11289 rtx reg = gen_reg_rtx (<MODE>mode);
11290 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11291 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11295 (define_expand "vrotl<mode>3"
11296 [(match_operand:SSEMODE1248 0 "register_operand" "")
11297 (match_operand:SSEMODE1248 1 "register_operand" "")
11298 (match_operand:SSEMODE1248 2 "register_operand" "")]
11301 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11305 (define_insn "xop_vrotl<mode>3"
11306 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11307 (if_then_else:SSEMODE1248
11309 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11311 (rotate:SSEMODE1248
11312 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11314 (rotatert:SSEMODE1248
11316 (neg:SSEMODE1248 (match_dup 2)))))]
11317 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11318 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11319 [(set_attr "type" "sseishft")
11320 (set_attr "prefix_data16" "0")
11321 (set_attr "prefix_extra" "2")
11322 (set_attr "mode" "TI")])
11324 ;; XOP packed shift instructions.
11325 ;; FIXME: add V2DI back in
11326 (define_expand "vlshr<mode>3"
11327 [(match_operand:SSEMODE124 0 "register_operand" "")
11328 (match_operand:SSEMODE124 1 "register_operand" "")
11329 (match_operand:SSEMODE124 2 "register_operand" "")]
11332 rtx neg = gen_reg_rtx (<MODE>mode);
11333 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11334 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11338 (define_expand "vashr<mode>3"
11339 [(match_operand:SSEMODE124 0 "register_operand" "")
11340 (match_operand:SSEMODE124 1 "register_operand" "")
11341 (match_operand:SSEMODE124 2 "register_operand" "")]
11344 rtx neg = gen_reg_rtx (<MODE>mode);
11345 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11346 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11350 (define_expand "vashl<mode>3"
11351 [(match_operand:SSEMODE124 0 "register_operand" "")
11352 (match_operand:SSEMODE124 1 "register_operand" "")
11353 (match_operand:SSEMODE124 2 "register_operand" "")]
11356 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11360 (define_insn "xop_ashl<mode>3"
11361 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11362 (if_then_else:SSEMODE1248
11364 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11366 (ashift:SSEMODE1248
11367 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11369 (ashiftrt:SSEMODE1248
11371 (neg:SSEMODE1248 (match_dup 2)))))]
11372 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11373 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11374 [(set_attr "type" "sseishft")
11375 (set_attr "prefix_data16" "0")
11376 (set_attr "prefix_extra" "2")
11377 (set_attr "mode" "TI")])
11379 (define_insn "xop_lshl<mode>3"
11380 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11381 (if_then_else:SSEMODE1248
11383 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11385 (ashift:SSEMODE1248
11386 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11388 (lshiftrt:SSEMODE1248
11390 (neg:SSEMODE1248 (match_dup 2)))))]
11391 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11392 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11393 [(set_attr "type" "sseishft")
11394 (set_attr "prefix_data16" "0")
11395 (set_attr "prefix_extra" "2")
11396 (set_attr "mode" "TI")])
11398 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11399 (define_expand "ashlv16qi3"
11400 [(match_operand:V16QI 0 "register_operand" "")
11401 (match_operand:V16QI 1 "register_operand" "")
11402 (match_operand:SI 2 "nonmemory_operand" "")]
11405 rtvec vs = rtvec_alloc (16);
11406 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11407 rtx reg = gen_reg_rtx (V16QImode);
11409 for (i = 0; i < 16; i++)
11410 RTVEC_ELT (vs, i) = operands[2];
11412 emit_insn (gen_vec_initv16qi (reg, par));
11413 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11417 (define_expand "lshlv16qi3"
11418 [(match_operand:V16QI 0 "register_operand" "")
11419 (match_operand:V16QI 1 "register_operand" "")
11420 (match_operand:SI 2 "nonmemory_operand" "")]
11423 rtvec vs = rtvec_alloc (16);
11424 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11425 rtx reg = gen_reg_rtx (V16QImode);
11427 for (i = 0; i < 16; i++)
11428 RTVEC_ELT (vs, i) = operands[2];
11430 emit_insn (gen_vec_initv16qi (reg, par));
11431 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11435 (define_expand "ashrv16qi3"
11436 [(match_operand:V16QI 0 "register_operand" "")
11437 (match_operand:V16QI 1 "register_operand" "")
11438 (match_operand:SI 2 "nonmemory_operand" "")]
11441 rtvec vs = rtvec_alloc (16);
11442 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11443 rtx reg = gen_reg_rtx (V16QImode);
11445 rtx ele = ((CONST_INT_P (operands[2]))
11446 ? GEN_INT (- INTVAL (operands[2]))
11449 for (i = 0; i < 16; i++)
11450 RTVEC_ELT (vs, i) = ele;
11452 emit_insn (gen_vec_initv16qi (reg, par));
11454 if (!CONST_INT_P (operands[2]))
11456 rtx neg = gen_reg_rtx (V16QImode);
11457 emit_insn (gen_negv16qi2 (neg, reg));
11458 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11461 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11466 (define_expand "ashrv2di3"
11467 [(match_operand:V2DI 0 "register_operand" "")
11468 (match_operand:V2DI 1 "register_operand" "")
11469 (match_operand:DI 2 "nonmemory_operand" "")]
11472 rtvec vs = rtvec_alloc (2);
11473 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11474 rtx reg = gen_reg_rtx (V2DImode);
11477 if (CONST_INT_P (operands[2]))
11478 ele = GEN_INT (- INTVAL (operands[2]));
11479 else if (GET_MODE (operands[2]) != DImode)
11481 rtx move = gen_reg_rtx (DImode);
11482 ele = gen_reg_rtx (DImode);
11483 convert_move (move, operands[2], false);
11484 emit_insn (gen_negdi2 (ele, move));
11488 ele = gen_reg_rtx (DImode);
11489 emit_insn (gen_negdi2 (ele, operands[2]));
11492 RTVEC_ELT (vs, 0) = ele;
11493 RTVEC_ELT (vs, 1) = ele;
11494 emit_insn (gen_vec_initv2di (reg, par));
11495 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11499 ;; XOP FRCZ support
11501 (define_insn "xop_frcz<mode>2"
11502 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11504 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11507 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11508 [(set_attr "type" "ssecvt1")
11509 (set_attr "mode" "<MODE>")])
11512 (define_insn "xop_vmfrcz<mode>2"
11513 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11514 (vec_merge:SSEMODEF2P
11516 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11518 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11521 "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
11522 [(set_attr "type" "ssecvt1")
11523 (set_attr "mode" "<MODE>")])
11525 (define_insn "xop_frcz<mode>2256"
11526 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11528 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11531 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11532 [(set_attr "type" "ssecvt1")
11533 (set_attr "mode" "<MODE>")])
11535 (define_insn "xop_maskcmp<mode>3"
11536 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11537 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11538 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11539 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11541 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11542 [(set_attr "type" "sse4arg")
11543 (set_attr "prefix_data16" "0")
11544 (set_attr "prefix_rep" "0")
11545 (set_attr "prefix_extra" "2")
11546 (set_attr "length_immediate" "1")
11547 (set_attr "mode" "TI")])
11549 (define_insn "xop_maskcmp_uns<mode>3"
11550 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11551 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11552 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11553 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11555 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11556 [(set_attr "type" "ssecmp")
11557 (set_attr "prefix_data16" "0")
11558 (set_attr "prefix_rep" "0")
11559 (set_attr "prefix_extra" "2")
11560 (set_attr "length_immediate" "1")
11561 (set_attr "mode" "TI")])
11563 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11564 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11565 ;; the exact instruction generated for the intrinsic.
11566 (define_insn "xop_maskcmp_uns2<mode>3"
11567 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11568 (unspec:SSEMODE1248
11569 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11570 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11571 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11572 UNSPEC_XOP_UNSIGNED_CMP))]
11574 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11575 [(set_attr "type" "ssecmp")
11576 (set_attr "prefix_data16" "0")
11577 (set_attr "prefix_extra" "2")
11578 (set_attr "length_immediate" "1")
11579 (set_attr "mode" "TI")])
11581 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11582 ;; being added here to be complete.
11583 (define_insn "xop_pcom_tf<mode>3"
11584 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11585 (unspec:SSEMODE1248
11586 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11587 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11588 (match_operand:SI 3 "const_int_operand" "n")]
11589 UNSPEC_XOP_TRUEFALSE))]
11592 return ((INTVAL (operands[3]) != 0)
11593 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11594 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11596 [(set_attr "type" "ssecmp")
11597 (set_attr "prefix_data16" "0")
11598 (set_attr "prefix_extra" "2")
11599 (set_attr "length_immediate" "1")
11600 (set_attr "mode" "TI")])
11602 (define_insn "xop_vpermil2<mode>3"
11603 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11605 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11606 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11607 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11608 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11611 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11612 [(set_attr "type" "sse4arg")
11613 (set_attr "length_immediate" "1")
11614 (set_attr "mode" "<MODE>")])
11616 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11617 (define_insn "*avx_aesenc"
11618 [(set (match_operand:V2DI 0 "register_operand" "=x")
11619 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11620 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11622 "TARGET_AES && TARGET_AVX"
11623 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11624 [(set_attr "type" "sselog1")
11625 (set_attr "prefix_extra" "1")
11626 (set_attr "prefix" "vex")
11627 (set_attr "mode" "TI")])
11629 (define_insn "aesenc"
11630 [(set (match_operand:V2DI 0 "register_operand" "=x")
11631 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11632 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11635 "aesenc\t{%2, %0|%0, %2}"
11636 [(set_attr "type" "sselog1")
11637 (set_attr "prefix_extra" "1")
11638 (set_attr "mode" "TI")])
11640 (define_insn "*avx_aesenclast"
11641 [(set (match_operand:V2DI 0 "register_operand" "=x")
11642 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11643 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11644 UNSPEC_AESENCLAST))]
11645 "TARGET_AES && TARGET_AVX"
11646 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11647 [(set_attr "type" "sselog1")
11648 (set_attr "prefix_extra" "1")
11649 (set_attr "prefix" "vex")
11650 (set_attr "mode" "TI")])
11652 (define_insn "aesenclast"
11653 [(set (match_operand:V2DI 0 "register_operand" "=x")
11654 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11655 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11656 UNSPEC_AESENCLAST))]
11658 "aesenclast\t{%2, %0|%0, %2}"
11659 [(set_attr "type" "sselog1")
11660 (set_attr "prefix_extra" "1")
11661 (set_attr "mode" "TI")])
11663 (define_insn "*avx_aesdec"
11664 [(set (match_operand:V2DI 0 "register_operand" "=x")
11665 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11666 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11668 "TARGET_AES && TARGET_AVX"
11669 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11670 [(set_attr "type" "sselog1")
11671 (set_attr "prefix_extra" "1")
11672 (set_attr "prefix" "vex")
11673 (set_attr "mode" "TI")])
11675 (define_insn "aesdec"
11676 [(set (match_operand:V2DI 0 "register_operand" "=x")
11677 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11678 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11681 "aesdec\t{%2, %0|%0, %2}"
11682 [(set_attr "type" "sselog1")
11683 (set_attr "prefix_extra" "1")
11684 (set_attr "mode" "TI")])
11686 (define_insn "*avx_aesdeclast"
11687 [(set (match_operand:V2DI 0 "register_operand" "=x")
11688 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11689 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11690 UNSPEC_AESDECLAST))]
11691 "TARGET_AES && TARGET_AVX"
11692 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11693 [(set_attr "type" "sselog1")
11694 (set_attr "prefix_extra" "1")
11695 (set_attr "prefix" "vex")
11696 (set_attr "mode" "TI")])
11698 (define_insn "aesdeclast"
11699 [(set (match_operand:V2DI 0 "register_operand" "=x")
11700 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11701 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11702 UNSPEC_AESDECLAST))]
11704 "aesdeclast\t{%2, %0|%0, %2}"
11705 [(set_attr "type" "sselog1")
11706 (set_attr "prefix_extra" "1")
11707 (set_attr "mode" "TI")])
11709 (define_insn "aesimc"
11710 [(set (match_operand:V2DI 0 "register_operand" "=x")
11711 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11714 "%vaesimc\t{%1, %0|%0, %1}"
11715 [(set_attr "type" "sselog1")
11716 (set_attr "prefix_extra" "1")
11717 (set_attr "prefix" "maybe_vex")
11718 (set_attr "mode" "TI")])
11720 (define_insn "aeskeygenassist"
11721 [(set (match_operand:V2DI 0 "register_operand" "=x")
11722 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11723 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11724 UNSPEC_AESKEYGENASSIST))]
11726 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11727 [(set_attr "type" "sselog1")
11728 (set_attr "prefix_extra" "1")
11729 (set_attr "length_immediate" "1")
11730 (set_attr "prefix" "maybe_vex")
11731 (set_attr "mode" "TI")])
11733 (define_insn "*vpclmulqdq"
11734 [(set (match_operand:V2DI 0 "register_operand" "=x")
11735 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11736 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11737 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11739 "TARGET_PCLMUL && TARGET_AVX"
11740 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11741 [(set_attr "type" "sselog1")
11742 (set_attr "prefix_extra" "1")
11743 (set_attr "length_immediate" "1")
11744 (set_attr "prefix" "vex")
11745 (set_attr "mode" "TI")])
11747 (define_insn "pclmulqdq"
11748 [(set (match_operand:V2DI 0 "register_operand" "=x")
11749 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11750 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11751 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11754 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11755 [(set_attr "type" "sselog1")
11756 (set_attr "prefix_extra" "1")
11757 (set_attr "length_immediate" "1")
11758 (set_attr "mode" "TI")])
11760 (define_expand "avx_vzeroall"
11761 [(match_par_dup 0 [(const_int 0)])]
11764 int nregs = TARGET_64BIT ? 16 : 8;
11767 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11769 XVECEXP (operands[0], 0, 0)
11770 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11773 for (regno = 0; regno < nregs; regno++)
11774 XVECEXP (operands[0], 0, regno + 1)
11775 = gen_rtx_SET (VOIDmode,
11776 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11777 CONST0_RTX (V8SImode));
11780 (define_insn "*avx_vzeroall"
11781 [(match_parallel 0 "vzeroall_operation"
11782 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11785 [(set_attr "type" "sse")
11786 (set_attr "modrm" "0")
11787 (set_attr "memory" "none")
11788 (set_attr "prefix" "vex")
11789 (set_attr "mode" "OI")])
11791 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11792 (define_expand "avx_vzeroupper"
11793 [(match_par_dup 0 [(const_int 0)])]
11796 int nregs = TARGET_64BIT ? 16 : 8;
11799 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11801 XVECEXP (operands[0], 0, 0)
11802 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11803 UNSPECV_VZEROUPPER);
11805 for (regno = 0; regno < nregs; regno++)
11806 XVECEXP (operands[0], 0, regno + 1)
11807 = gen_rtx_CLOBBER (VOIDmode,
11808 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11811 (define_insn "*avx_vzeroupper"
11812 [(match_parallel 0 "vzeroupper_operation"
11813 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11816 [(set_attr "type" "sse")
11817 (set_attr "modrm" "0")
11818 (set_attr "memory" "none")
11819 (set_attr "prefix" "vex")
11820 (set_attr "mode" "OI")])
11822 (define_insn_and_split "vec_dup<mode>"
11823 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11824 (vec_duplicate:AVX256MODE24P
11825 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11828 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11830 "&& reload_completed && REG_P (operands[1])"
11831 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11832 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11834 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11836 [(set_attr "type" "ssemov")
11837 (set_attr "prefix_extra" "1")
11838 (set_attr "prefix" "vex")
11839 (set_attr "mode" "V8SF")])
11841 (define_insn "avx_vbroadcastf128_<mode>"
11842 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11843 (vec_concat:AVX256MODE
11844 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11848 vbroadcastf128\t{%1, %0|%0, %1}
11849 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11850 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11851 [(set_attr "type" "ssemov,sselog1,sselog1")
11852 (set_attr "prefix_extra" "1")
11853 (set_attr "length_immediate" "0,1,1")
11854 (set_attr "prefix" "vex")
11855 (set_attr "mode" "V4SF,V8SF,V8SF")])
11857 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11858 ;; If it so happens that the input is in memory, use vbroadcast.
11859 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11860 (define_insn "*avx_vperm_broadcast_v4sf"
11861 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11863 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11864 (match_parallel 2 "avx_vbroadcast_operand"
11865 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11868 int elt = INTVAL (operands[3]);
11869 switch (which_alternative)
11873 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11874 return "vbroadcastss\t{%1, %0|%0, %1}";
11876 operands[2] = GEN_INT (elt * 0x55);
11877 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11879 gcc_unreachable ();
11882 [(set_attr "type" "ssemov,ssemov,sselog1")
11883 (set_attr "prefix_extra" "1")
11884 (set_attr "length_immediate" "0,0,1")
11885 (set_attr "prefix" "vex")
11886 (set_attr "mode" "SF,SF,V4SF")])
11888 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11889 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11890 (vec_select:AVX256MODEF2P
11891 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11892 (match_parallel 2 "avx_vbroadcast_operand"
11893 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11896 "&& reload_completed"
11897 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11899 rtx op0 = operands[0], op1 = operands[1];
11900 int elt = INTVAL (operands[3]);
11906 /* Shuffle element we care about into all elements of the 128-bit lane.
11907 The other lane gets shuffled too, but we don't care. */
11908 if (<MODE>mode == V4DFmode)
11909 mask = (elt & 1 ? 15 : 0);
11911 mask = (elt & 3) * 0x55;
11912 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11914 /* Shuffle the lane we care about into both lanes of the dest. */
11915 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11916 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11920 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11921 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11924 (define_expand "avx_vpermil<mode>"
11925 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11926 (vec_select:AVXMODEFDP
11927 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11928 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11931 int mask = INTVAL (operands[2]);
11932 rtx perm[<ssescalarnum>];
11934 perm[0] = GEN_INT (mask & 1);
11935 perm[1] = GEN_INT ((mask >> 1) & 1);
11936 if (<MODE>mode == V4DFmode)
11938 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11939 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11943 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11946 (define_expand "avx_vpermil<mode>"
11947 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11948 (vec_select:AVXMODEFSP
11949 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11950 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11953 int mask = INTVAL (operands[2]);
11954 rtx perm[<ssescalarnum>];
11956 perm[0] = GEN_INT (mask & 3);
11957 perm[1] = GEN_INT ((mask >> 2) & 3);
11958 perm[2] = GEN_INT ((mask >> 4) & 3);
11959 perm[3] = GEN_INT ((mask >> 6) & 3);
11960 if (<MODE>mode == V8SFmode)
11962 perm[4] = GEN_INT ((mask & 3) + 4);
11963 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11964 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11965 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11969 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11972 (define_insn "*avx_vpermilp<mode>"
11973 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11974 (vec_select:AVXMODEF2P
11975 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11976 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11977 [(match_operand 3 "const_int_operand" "")])))]
11980 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11981 operands[2] = GEN_INT (mask);
11982 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11984 [(set_attr "type" "sselog")
11985 (set_attr "prefix_extra" "1")
11986 (set_attr "length_immediate" "1")
11987 (set_attr "prefix" "vex")
11988 (set_attr "mode" "<MODE>")])
11990 (define_insn "avx_vpermilvar<mode>3"
11991 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11993 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11994 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11997 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11998 [(set_attr "type" "sselog")
11999 (set_attr "prefix_extra" "1")
12000 (set_attr "prefix" "vex")
12001 (set_attr "mode" "<MODE>")])
12003 (define_expand "avx_vperm2f128<mode>3"
12004 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12005 (unspec:AVX256MODE2P
12006 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12007 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12008 (match_operand:SI 3 "const_0_to_255_operand" "")]
12009 UNSPEC_VPERMIL2F128))]
12012 int mask = INTVAL (operands[3]);
12013 if ((mask & 0x88) == 0)
12015 rtx perm[<ssescalarnum>], t1, t2;
12016 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12018 base = (mask & 3) * nelt2;
12019 for (i = 0; i < nelt2; ++i)
12020 perm[i] = GEN_INT (base + i);
12022 base = ((mask >> 4) & 3) * nelt2;
12023 for (i = 0; i < nelt2; ++i)
12024 perm[i + nelt2] = GEN_INT (base + i);
12026 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
12027 operands[1], operands[2]);
12028 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12029 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12030 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12036 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12037 ;; means that in order to represent this properly in rtl we'd have to
12038 ;; nest *another* vec_concat with a zero operand and do the select from
12039 ;; a 4x wide vector. That doesn't seem very nice.
12040 (define_insn "*avx_vperm2f128<mode>_full"
12041 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12042 (unspec:AVX256MODE2P
12043 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12044 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12045 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12046 UNSPEC_VPERMIL2F128))]
12048 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12049 [(set_attr "type" "sselog")
12050 (set_attr "prefix_extra" "1")
12051 (set_attr "length_immediate" "1")
12052 (set_attr "prefix" "vex")
12053 (set_attr "mode" "V8SF")])
12055 (define_insn "*avx_vperm2f128<mode>_nozero"
12056 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12057 (vec_select:AVX256MODE2P
12058 (vec_concat:<ssedoublesizemode>
12059 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12060 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12061 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
12062 [(match_operand 4 "const_int_operand" "")])))]
12065 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12066 operands[3] = GEN_INT (mask);
12067 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12069 [(set_attr "type" "sselog")
12070 (set_attr "prefix_extra" "1")
12071 (set_attr "length_immediate" "1")
12072 (set_attr "prefix" "vex")
12073 (set_attr "mode" "V8SF")])
12075 (define_expand "avx_vinsertf128<mode>"
12076 [(match_operand:AVX256MODE 0 "register_operand" "")
12077 (match_operand:AVX256MODE 1 "register_operand" "")
12078 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12079 (match_operand:SI 3 "const_0_to_1_operand" "")]
12082 switch (INTVAL (operands[3]))
12085 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12089 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12093 gcc_unreachable ();
12098 (define_insn "vec_set_lo_<mode>"
12099 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12100 (vec_concat:AVX256MODE4P
12101 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12102 (vec_select:<avxhalfvecmode>
12103 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12104 (parallel [(const_int 2) (const_int 3)]))))]
12106 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12107 [(set_attr "type" "sselog")
12108 (set_attr "prefix_extra" "1")
12109 (set_attr "length_immediate" "1")
12110 (set_attr "prefix" "vex")
12111 (set_attr "mode" "V8SF")])
12113 (define_insn "vec_set_hi_<mode>"
12114 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12115 (vec_concat:AVX256MODE4P
12116 (vec_select:<avxhalfvecmode>
12117 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12118 (parallel [(const_int 0) (const_int 1)]))
12119 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12121 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12122 [(set_attr "type" "sselog")
12123 (set_attr "prefix_extra" "1")
12124 (set_attr "length_immediate" "1")
12125 (set_attr "prefix" "vex")
12126 (set_attr "mode" "V8SF")])
12128 (define_insn "vec_set_lo_<mode>"
12129 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12130 (vec_concat:AVX256MODE8P
12131 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12132 (vec_select:<avxhalfvecmode>
12133 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12134 (parallel [(const_int 4) (const_int 5)
12135 (const_int 6) (const_int 7)]))))]
12137 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12138 [(set_attr "type" "sselog")
12139 (set_attr "prefix_extra" "1")
12140 (set_attr "length_immediate" "1")
12141 (set_attr "prefix" "vex")
12142 (set_attr "mode" "V8SF")])
12144 (define_insn "vec_set_hi_<mode>"
12145 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12146 (vec_concat:AVX256MODE8P
12147 (vec_select:<avxhalfvecmode>
12148 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12149 (parallel [(const_int 0) (const_int 1)
12150 (const_int 2) (const_int 3)]))
12151 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12153 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12154 [(set_attr "type" "sselog")
12155 (set_attr "prefix_extra" "1")
12156 (set_attr "length_immediate" "1")
12157 (set_attr "prefix" "vex")
12158 (set_attr "mode" "V8SF")])
12160 (define_insn "vec_set_lo_v16hi"
12161 [(set (match_operand:V16HI 0 "register_operand" "=x")
12163 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12165 (match_operand:V16HI 1 "register_operand" "x")
12166 (parallel [(const_int 8) (const_int 9)
12167 (const_int 10) (const_int 11)
12168 (const_int 12) (const_int 13)
12169 (const_int 14) (const_int 15)]))))]
12171 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12172 [(set_attr "type" "sselog")
12173 (set_attr "prefix_extra" "1")
12174 (set_attr "length_immediate" "1")
12175 (set_attr "prefix" "vex")
12176 (set_attr "mode" "V8SF")])
12178 (define_insn "vec_set_hi_v16hi"
12179 [(set (match_operand:V16HI 0 "register_operand" "=x")
12182 (match_operand:V16HI 1 "register_operand" "x")
12183 (parallel [(const_int 0) (const_int 1)
12184 (const_int 2) (const_int 3)
12185 (const_int 4) (const_int 5)
12186 (const_int 6) (const_int 7)]))
12187 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12189 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12190 [(set_attr "type" "sselog")
12191 (set_attr "prefix_extra" "1")
12192 (set_attr "length_immediate" "1")
12193 (set_attr "prefix" "vex")
12194 (set_attr "mode" "V8SF")])
12196 (define_insn "vec_set_lo_v32qi"
12197 [(set (match_operand:V32QI 0 "register_operand" "=x")
12199 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12201 (match_operand:V32QI 1 "register_operand" "x")
12202 (parallel [(const_int 16) (const_int 17)
12203 (const_int 18) (const_int 19)
12204 (const_int 20) (const_int 21)
12205 (const_int 22) (const_int 23)
12206 (const_int 24) (const_int 25)
12207 (const_int 26) (const_int 27)
12208 (const_int 28) (const_int 29)
12209 (const_int 30) (const_int 31)]))))]
12211 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12212 [(set_attr "type" "sselog")
12213 (set_attr "prefix_extra" "1")
12214 (set_attr "length_immediate" "1")
12215 (set_attr "prefix" "vex")
12216 (set_attr "mode" "V8SF")])
12218 (define_insn "vec_set_hi_v32qi"
12219 [(set (match_operand:V32QI 0 "register_operand" "=x")
12222 (match_operand:V32QI 1 "register_operand" "x")
12223 (parallel [(const_int 0) (const_int 1)
12224 (const_int 2) (const_int 3)
12225 (const_int 4) (const_int 5)
12226 (const_int 6) (const_int 7)
12227 (const_int 8) (const_int 9)
12228 (const_int 10) (const_int 11)
12229 (const_int 12) (const_int 13)
12230 (const_int 14) (const_int 15)]))
12231 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12233 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12234 [(set_attr "type" "sselog")
12235 (set_attr "prefix_extra" "1")
12236 (set_attr "length_immediate" "1")
12237 (set_attr "prefix" "vex")
12238 (set_attr "mode" "V8SF")])
12240 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
12241 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12243 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12244 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12248 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12249 [(set_attr "type" "sselog1")
12250 (set_attr "prefix_extra" "1")
12251 (set_attr "prefix" "vex")
12252 (set_attr "mode" "<MODE>")])
12254 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
12255 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12257 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12258 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12260 UNSPEC_MASKSTORE))]
12262 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12263 [(set_attr "type" "sselog1")
12264 (set_attr "prefix_extra" "1")
12265 (set_attr "prefix" "vex")
12266 (set_attr "mode" "<MODE>")])
12268 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12269 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12270 (unspec:AVX256MODE2P
12271 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12275 "&& reload_completed"
12278 rtx op1 = operands[1];
12280 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12282 op1 = gen_lowpart (<MODE>mode, op1);
12283 emit_move_insn (operands[0], op1);
12287 (define_expand "vec_init<mode>"
12288 [(match_operand:AVX256MODE 0 "register_operand" "")
12289 (match_operand 1 "" "")]
12292 ix86_expand_vector_init (false, operands[0], operands[1]);
12296 (define_insn "*vec_concat<mode>_avx"
12297 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12298 (vec_concat:AVX256MODE
12299 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12300 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12303 switch (which_alternative)
12306 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12308 switch (get_attr_mode (insn))
12311 return "vmovaps\t{%1, %x0|%x0, %1}";
12313 return "vmovapd\t{%1, %x0|%x0, %1}";
12315 return "vmovdqa\t{%1, %x0|%x0, %1}";
12318 gcc_unreachable ();
12321 [(set_attr "type" "sselog,ssemov")
12322 (set_attr "prefix_extra" "1,*")
12323 (set_attr "length_immediate" "1,*")
12324 (set_attr "prefix" "vex")
12325 (set_attr "mode" "<avxvecmode>")])
12327 (define_insn "vcvtph2ps"
12328 [(set (match_operand:V4SF 0 "register_operand" "=x")
12330 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12332 (parallel [(const_int 0) (const_int 1)
12333 (const_int 1) (const_int 2)])))]
12335 "vcvtph2ps\t{%1, %0|%0, %1}"
12336 [(set_attr "type" "ssecvt")
12337 (set_attr "prefix" "vex")
12338 (set_attr "mode" "V4SF")])
12340 (define_insn "*vcvtph2ps_load"
12341 [(set (match_operand:V4SF 0 "register_operand" "=x")
12342 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12343 UNSPEC_VCVTPH2PS))]
12345 "vcvtph2ps\t{%1, %0|%0, %1}"
12346 [(set_attr "type" "ssecvt")
12347 (set_attr "prefix" "vex")
12348 (set_attr "mode" "V8SF")])
12350 (define_insn "vcvtph2ps256"
12351 [(set (match_operand:V8SF 0 "register_operand" "=x")
12352 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12353 UNSPEC_VCVTPH2PS))]
12355 "vcvtph2ps\t{%1, %0|%0, %1}"
12356 [(set_attr "type" "ssecvt")
12357 (set_attr "prefix" "vex")
12358 (set_attr "mode" "V8SF")])
12360 (define_expand "vcvtps2ph"
12361 [(set (match_operand:V8HI 0 "register_operand" "")
12363 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12364 (match_operand:SI 2 "immediate_operand" "")]
12368 "operands[3] = CONST0_RTX (V4HImode);")
12370 (define_insn "*vcvtps2ph"
12371 [(set (match_operand:V8HI 0 "register_operand" "=x")
12373 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12374 (match_operand:SI 2 "immediate_operand" "N")]
12376 (match_operand:V4HI 3 "const0_operand" "")))]
12378 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12379 [(set_attr "type" "ssecvt")
12380 (set_attr "prefix" "vex")
12381 (set_attr "mode" "V4SF")])
12383 (define_insn "*vcvtps2ph_store"
12384 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12385 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12386 (match_operand:SI 2 "immediate_operand" "N")]
12387 UNSPEC_VCVTPS2PH))]
12389 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12390 [(set_attr "type" "ssecvt")
12391 (set_attr "prefix" "vex")
12392 (set_attr "mode" "V4SF")])
12394 (define_insn "vcvtps2ph256"
12395 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12396 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12397 (match_operand:SI 2 "immediate_operand" "N")]
12398 UNSPEC_VCVTPS2PH))]
12400 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12401 [(set_attr "type" "ssecvt")
12402 (set_attr "prefix" "vex")
12403 (set_attr "mode" "V8SF")])