1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
67 switch (which_alternative)
70 return standard_sse_constant_opcode (insn, operands[1]);
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
90 (const_string "TI")))])
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
101 (define_insn_and_split "movdi_to_sse"
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
108 "&& reload_completed"
111 switch (which_alternative)
114 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
115 Assemble the 64-bit DImode value in an xmm register. */
116 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 0)));
118 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
119 gen_rtx_SUBREG (SImode, operands[1], 4)));
120 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
124 emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
133 (define_expand "movv4sf"
134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
135 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
138 ix86_expand_vector_move (V4SFmode, operands);
142 (define_insn "*movv4sf_internal"
143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
144 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
146 && (register_operand (operands[0], V4SFmode)
147 || register_operand (operands[1], V4SFmode))"
149 switch (which_alternative)
152 return standard_sse_constant_opcode (insn, operands[1]);
155 return "movaps\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set_attr "mode" "V4SF")])
164 [(set (match_operand:V4SF 0 "register_operand" "")
165 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
166 "TARGET_SSE && reload_completed"
169 (vec_duplicate:V4SF (match_dup 1))
173 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
174 operands[2] = CONST0_RTX (V4SFmode);
177 (define_expand "movv2df"
178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
179 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
182 ix86_expand_vector_move (V2DFmode, operands);
186 (define_insn "*movv2df_internal"
187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
188 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
190 && (register_operand (operands[0], V2DFmode)
191 || register_operand (operands[1], V2DFmode))"
193 switch (which_alternative)
196 return standard_sse_constant_opcode (insn, operands[1]);
199 if (get_attr_mode (insn) == MODE_V4SF)
200 return "movaps\t{%1, %0|%0, %1}";
202 return "movapd\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
210 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
211 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
212 (and (eq_attr "alternative" "2")
213 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
215 (const_string "V4SF")
216 (const_string "V2DF")))])
219 [(set (match_operand:V2DF 0 "register_operand" "")
220 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
221 "TARGET_SSE2 && reload_completed"
222 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
224 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
225 operands[2] = CONST0_RTX (DFmode);
228 (define_expand "push<mode>1"
229 [(match_operand:SSEMODE 0 "register_operand" "")]
232 ix86_expand_push (<MODE>mode, operands[0]);
236 (define_expand "movmisalign<mode>"
237 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
238 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
241 ix86_expand_vector_move_misalign (<MODE>mode, operands);
245 (define_insn "sse_movups"
246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
247 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
249 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
250 "movups\t{%1, %0|%0, %1}"
251 [(set_attr "type" "ssemov")
252 (set_attr "mode" "V2DF")])
254 (define_insn "sse2_movupd"
255 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
256 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
258 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
259 "movupd\t{%1, %0|%0, %1}"
260 [(set_attr "type" "ssemov")
261 (set_attr "mode" "V2DF")])
263 (define_insn "sse2_movdqu"
264 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
265 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
268 "movdqu\t{%1, %0|%0, %1}"
269 [(set_attr "type" "ssemov")
270 (set_attr "mode" "TI")])
272 (define_insn "sse_movntv4sf"
273 [(set (match_operand:V4SF 0 "memory_operand" "=m")
274 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
277 "movntps\t{%1, %0|%0, %1}"
278 [(set_attr "type" "ssemov")
279 (set_attr "mode" "V4SF")])
281 (define_insn "sse2_movntv2df"
282 [(set (match_operand:V2DF 0 "memory_operand" "=m")
283 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
286 "movntpd\t{%1, %0|%0, %1}"
287 [(set_attr "type" "ssecvt")
288 (set_attr "mode" "V2DF")])
290 (define_insn "sse2_movntv2di"
291 [(set (match_operand:V2DI 0 "memory_operand" "=m")
292 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
295 "movntdq\t{%1, %0|%0, %1}"
296 [(set_attr "type" "ssecvt")
297 (set_attr "mode" "TI")])
299 (define_insn "sse2_movntsi"
300 [(set (match_operand:SI 0 "memory_operand" "=m")
301 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
304 "movnti\t{%1, %0|%0, %1}"
305 [(set_attr "type" "ssecvt")
306 (set_attr "mode" "V2DF")])
308 (define_insn "sse3_lddqu"
309 [(set (match_operand:V16QI 0 "register_operand" "=x")
310 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
313 "lddqu\t{%1, %0|%0, %1}"
314 [(set_attr "type" "ssecvt")
315 (set_attr "mode" "TI")])
317 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
319 ;; Parallel single-precision floating point arithmetic
321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
323 (define_expand "negv4sf2"
324 [(set (match_operand:V4SF 0 "register_operand" "")
325 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
327 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
329 (define_expand "absv4sf2"
330 [(set (match_operand:V4SF 0 "register_operand" "")
331 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
333 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
335 (define_expand "addv4sf3"
336 [(set (match_operand:V4SF 0 "register_operand" "")
337 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
338 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
340 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
342 (define_insn "*addv4sf3"
343 [(set (match_operand:V4SF 0 "register_operand" "=x")
344 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
345 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
346 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
347 "addps\t{%2, %0|%0, %2}"
348 [(set_attr "type" "sseadd")
349 (set_attr "mode" "V4SF")])
351 (define_insn "sse_vmaddv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
354 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
355 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
358 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
359 "addss\t{%2, %0|%0, %2}"
360 [(set_attr "type" "sseadd")
361 (set_attr "mode" "SF")])
363 (define_expand "subv4sf3"
364 [(set (match_operand:V4SF 0 "register_operand" "")
365 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
366 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
368 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
370 (define_insn "*subv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "=x")
372 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
373 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
375 "subps\t{%2, %0|%0, %2}"
376 [(set_attr "type" "sseadd")
377 (set_attr "mode" "V4SF")])
379 (define_insn "sse_vmsubv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
382 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
383 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
387 "subss\t{%2, %0|%0, %2}"
388 [(set_attr "type" "sseadd")
389 (set_attr "mode" "SF")])
391 (define_expand "mulv4sf3"
392 [(set (match_operand:V4SF 0 "register_operand" "")
393 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
394 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
396 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
398 (define_insn "*mulv4sf3"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
400 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
401 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
402 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
403 "mulps\t{%2, %0|%0, %2}"
404 [(set_attr "type" "ssemul")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmmulv4sf3"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
411 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
414 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
415 "mulss\t{%2, %0|%0, %2}"
416 [(set_attr "type" "ssemul")
417 (set_attr "mode" "SF")])
419 (define_expand "divv4sf3"
420 [(set (match_operand:V4SF 0 "register_operand" "")
421 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
422 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
424 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
426 (define_insn "*divv4sf3"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
429 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "divps\t{%2, %0|%0, %2}"
432 [(set_attr "type" "ssediv")
433 (set_attr "mode" "V4SF")])
435 (define_insn "sse_vmdivv4sf3"
436 [(set (match_operand:V4SF 0 "register_operand" "=x")
438 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
439 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
443 "divss\t{%2, %0|%0, %2}"
444 [(set_attr "type" "ssediv")
445 (set_attr "mode" "SF")])
447 (define_insn "sse_rcpv4sf2"
448 [(set (match_operand:V4SF 0 "register_operand" "=x")
450 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
452 "rcpps\t{%1, %0|%0, %1}"
453 [(set_attr "type" "sse")
454 (set_attr "mode" "V4SF")])
456 (define_insn "sse_vmrcpv4sf2"
457 [(set (match_operand:V4SF 0 "register_operand" "=x")
459 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
461 (match_operand:V4SF 2 "register_operand" "0")
464 "rcpss\t{%1, %0|%0, %1}"
465 [(set_attr "type" "sse")
466 (set_attr "mode" "SF")])
468 (define_insn "sse_rsqrtv4sf2"
469 [(set (match_operand:V4SF 0 "register_operand" "=x")
471 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
473 "rsqrtps\t{%1, %0|%0, %1}"
474 [(set_attr "type" "sse")
475 (set_attr "mode" "V4SF")])
477 (define_insn "sse_vmrsqrtv4sf2"
478 [(set (match_operand:V4SF 0 "register_operand" "=x")
480 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
482 (match_operand:V4SF 2 "register_operand" "0")
485 "rsqrtss\t{%1, %0|%0, %1}"
486 [(set_attr "type" "sse")
487 (set_attr "mode" "SF")])
489 (define_insn "sqrtv4sf2"
490 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
493 "sqrtps\t{%1, %0|%0, %1}"
494 [(set_attr "type" "sse")
495 (set_attr "mode" "V4SF")])
497 (define_insn "sse_vmsqrtv4sf2"
498 [(set (match_operand:V4SF 0 "register_operand" "=x")
500 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
501 (match_operand:V4SF 2 "register_operand" "0")
504 "sqrtss\t{%1, %0|%0, %1}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
508 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
509 ;; isn't really correct, as those rtl operators aren't defined when
510 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
512 (define_expand "smaxv4sf3"
513 [(set (match_operand:V4SF 0 "register_operand" "")
514 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
515 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
518 if (!flag_finite_math_only)
519 operands[1] = force_reg (V4SFmode, operands[1]);
520 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
523 (define_insn "*smaxv4sf3_finite"
524 [(set (match_operand:V4SF 0 "register_operand" "=x")
525 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
526 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
527 "TARGET_SSE && flag_finite_math_only
528 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
529 "maxps\t{%2, %0|%0, %2}"
530 [(set_attr "type" "sse")
531 (set_attr "mode" "V4SF")])
533 (define_insn "*smaxv4sf3"
534 [(set (match_operand:V4SF 0 "register_operand" "=x")
535 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
536 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
538 "maxps\t{%2, %0|%0, %2}"
539 [(set_attr "type" "sse")
540 (set_attr "mode" "V4SF")])
542 (define_insn "*sse_vmsmaxv4sf3_finite"
543 [(set (match_operand:V4SF 0 "register_operand" "=x")
545 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
546 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
549 "TARGET_SSE && flag_finite_math_only
550 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
551 "maxss\t{%2, %0|%0, %2}"
552 [(set_attr "type" "sse")
553 (set_attr "mode" "SF")])
555 (define_insn "sse_vmsmaxv4sf3"
556 [(set (match_operand:V4SF 0 "register_operand" "=x")
558 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
559 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
563 "maxss\t{%2, %0|%0, %2}"
564 [(set_attr "type" "sse")
565 (set_attr "mode" "SF")])
567 (define_expand "sminv4sf3"
568 [(set (match_operand:V4SF 0 "register_operand" "")
569 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
570 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
573 if (!flag_finite_math_only)
574 operands[1] = force_reg (V4SFmode, operands[1]);
575 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
578 (define_insn "*sminv4sf3_finite"
579 [(set (match_operand:V4SF 0 "register_operand" "=x")
580 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
581 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
582 "TARGET_SSE && flag_finite_math_only
583 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
584 "minps\t{%2, %0|%0, %2}"
585 [(set_attr "type" "sse")
586 (set_attr "mode" "V4SF")])
588 (define_insn "*sminv4sf3"
589 [(set (match_operand:V4SF 0 "register_operand" "=x")
590 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
591 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
593 "minps\t{%2, %0|%0, %2}"
594 [(set_attr "type" "sse")
595 (set_attr "mode" "V4SF")])
597 (define_insn "*sse_vmsminv4sf3_finite"
598 [(set (match_operand:V4SF 0 "register_operand" "=x")
600 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
601 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
604 "TARGET_SSE && flag_finite_math_only
605 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
606 "minss\t{%2, %0|%0, %2}"
607 [(set_attr "type" "sse")
608 (set_attr "mode" "SF")])
610 (define_insn "sse_vmsminv4sf3"
611 [(set (match_operand:V4SF 0 "register_operand" "=x")
613 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
614 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
618 "minss\t{%2, %0|%0, %2}"
619 [(set_attr "type" "sse")
620 (set_attr "mode" "SF")])
622 ;; These versions of the min/max patterns implement exactly the operations
623 ;; min = (op1 < op2 ? op1 : op2)
624 ;; max = (!(op1 < op2) ? op1 : op2)
625 ;; Their operands are not commutative, and thus they may be used in the
626 ;; presence of -0.0 and NaN.
628 (define_insn "*ieee_sminv4sf3"
629 [(set (match_operand:V4SF 0 "register_operand" "=x")
630 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
631 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
634 "minps\t{%2, %0|%0, %2}"
635 [(set_attr "type" "sseadd")
636 (set_attr "mode" "V4SF")])
638 (define_insn "*ieee_smaxv4sf3"
639 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
641 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
644 "maxps\t{%2, %0|%0, %2}"
645 [(set_attr "type" "sseadd")
646 (set_attr "mode" "V4SF")])
648 (define_insn "*ieee_sminv2df3"
649 [(set (match_operand:V2DF 0 "register_operand" "=x")
650 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
651 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
654 "minpd\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "mode" "V2DF")])
658 (define_insn "*ieee_smaxv2df3"
659 [(set (match_operand:V2DF 0 "register_operand" "=x")
660 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
661 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
664 "maxpd\t{%2, %0|%0, %2}"
665 [(set_attr "type" "sseadd")
666 (set_attr "mode" "V2DF")])
668 (define_insn "sse3_addsubv4sf3"
669 [(set (match_operand:V4SF 0 "register_operand" "=x")
672 (match_operand:V4SF 1 "register_operand" "0")
673 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
674 (minus:V4SF (match_dup 1) (match_dup 2))
677 "addsubps\t{%2, %0|%0, %2}"
678 [(set_attr "type" "sseadd")
679 (set_attr "mode" "V4SF")])
681 (define_insn "sse3_haddv4sf3"
682 [(set (match_operand:V4SF 0 "register_operand" "=x")
687 (match_operand:V4SF 1 "register_operand" "0")
688 (parallel [(const_int 0)]))
689 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
691 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
692 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
696 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
697 (parallel [(const_int 0)]))
698 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
700 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
701 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
703 "haddps\t{%2, %0|%0, %2}"
704 [(set_attr "type" "sseadd")
705 (set_attr "mode" "V4SF")])
707 (define_insn "sse3_hsubv4sf3"
708 [(set (match_operand:V4SF 0 "register_operand" "=x")
713 (match_operand:V4SF 1 "register_operand" "0")
714 (parallel [(const_int 0)]))
715 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
717 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
718 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
722 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
723 (parallel [(const_int 0)]))
724 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
726 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
727 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
729 "hsubps\t{%2, %0|%0, %2}"
730 [(set_attr "type" "sseadd")
731 (set_attr "mode" "V4SF")])
733 (define_expand "reduc_splus_v4sf"
734 [(match_operand:V4SF 0 "register_operand" "")
735 (match_operand:V4SF 1 "register_operand" "")]
740 rtx tmp = gen_reg_rtx (V4SFmode);
741 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
742 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
745 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
749 (define_expand "reduc_smax_v4sf"
750 [(match_operand:V4SF 0 "register_operand" "")
751 (match_operand:V4SF 1 "register_operand" "")]
754 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
758 (define_expand "reduc_smin_v4sf"
759 [(match_operand:V4SF 0 "register_operand" "")
760 (match_operand:V4SF 1 "register_operand" "")]
763 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
769 ;; Parallel single-precision floating point comparisons
771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
773 (define_insn "sse_maskcmpv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "=x")
775 (match_operator:V4SF 3 "sse_comparison_operator"
776 [(match_operand:V4SF 1 "register_operand" "0")
777 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
779 "cmp%D3ps\t{%2, %0|%0, %2}"
780 [(set_attr "type" "ssecmp")
781 (set_attr "mode" "V4SF")])
783 (define_insn "sse_maskcmpsf3"
784 [(set (match_operand:SF 0 "register_operand" "=x")
785 (match_operator:SF 3 "sse_comparison_operator"
786 [(match_operand:SF 1 "register_operand" "0")
787 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
789 "cmp%D3ss\t{%2, %0|%0, %2}"
790 [(set_attr "type" "ssecmp")
791 (set_attr "mode" "SF")])
793 (define_insn "sse_vmmaskcmpv4sf3"
794 [(set (match_operand:V4SF 0 "register_operand" "=x")
796 (match_operator:V4SF 3 "sse_comparison_operator"
797 [(match_operand:V4SF 1 "register_operand" "0")
798 (match_operand:V4SF 2 "register_operand" "x")])
802 "cmp%D3ss\t{%2, %0|%0, %2}"
803 [(set_attr "type" "ssecmp")
804 (set_attr "mode" "SF")])
806 (define_insn "sse_comi"
807 [(set (reg:CCFP FLAGS_REG)
810 (match_operand:V4SF 0 "register_operand" "x")
811 (parallel [(const_int 0)]))
813 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
814 (parallel [(const_int 0)]))))]
816 "comiss\t{%1, %0|%0, %1}"
817 [(set_attr "type" "ssecomi")
818 (set_attr "mode" "SF")])
820 (define_insn "sse_ucomi"
821 [(set (reg:CCFPU FLAGS_REG)
824 (match_operand:V4SF 0 "register_operand" "x")
825 (parallel [(const_int 0)]))
827 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
828 (parallel [(const_int 0)]))))]
830 "ucomiss\t{%1, %0|%0, %1}"
831 [(set_attr "type" "ssecomi")
832 (set_attr "mode" "SF")])
834 (define_expand "vcondv4sf"
835 [(set (match_operand:V4SF 0 "register_operand" "")
838 [(match_operand:V4SF 4 "nonimmediate_operand" "")
839 (match_operand:V4SF 5 "nonimmediate_operand" "")])
840 (match_operand:V4SF 1 "general_operand" "")
841 (match_operand:V4SF 2 "general_operand" "")))]
844 if (ix86_expand_fp_vcond (operands))
850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
852 ;; Parallel single-precision floating point logical operations
854 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
856 (define_expand "andv4sf3"
857 [(set (match_operand:V4SF 0 "register_operand" "")
858 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
859 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
861 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
863 (define_insn "*andv4sf3"
864 [(set (match_operand:V4SF 0 "register_operand" "=x")
865 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
866 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
867 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
868 "andps\t{%2, %0|%0, %2}"
869 [(set_attr "type" "sselog")
870 (set_attr "mode" "V4SF")])
872 (define_insn "sse_nandv4sf3"
873 [(set (match_operand:V4SF 0 "register_operand" "=x")
874 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
875 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
877 "andnps\t{%2, %0|%0, %2}"
878 [(set_attr "type" "sselog")
879 (set_attr "mode" "V4SF")])
881 (define_expand "iorv4sf3"
882 [(set (match_operand:V4SF 0 "register_operand" "")
883 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
884 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
886 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
888 (define_insn "*iorv4sf3"
889 [(set (match_operand:V4SF 0 "register_operand" "=x")
890 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
891 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
892 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
893 "orps\t{%2, %0|%0, %2}"
894 [(set_attr "type" "sselog")
895 (set_attr "mode" "V4SF")])
897 (define_expand "xorv4sf3"
898 [(set (match_operand:V4SF 0 "register_operand" "")
899 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
900 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
902 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
904 (define_insn "*xorv4sf3"
905 [(set (match_operand:V4SF 0 "register_operand" "=x")
906 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
907 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
908 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
909 "xorps\t{%2, %0|%0, %2}"
910 [(set_attr "type" "sselog")
911 (set_attr "mode" "V4SF")])
913 ;; Also define scalar versions. These are used for abs, neg, and
914 ;; conditional move. Using subregs into vector modes causes register
915 ;; allocation lossage. These patterns do not allow memory operands
916 ;; because the native instructions read the full 128-bits.
918 (define_insn "*andsf3"
919 [(set (match_operand:SF 0 "register_operand" "=x")
920 (and:SF (match_operand:SF 1 "register_operand" "0")
921 (match_operand:SF 2 "register_operand" "x")))]
923 "andps\t{%2, %0|%0, %2}"
924 [(set_attr "type" "sselog")
925 (set_attr "mode" "V4SF")])
927 (define_insn "*nandsf3"
928 [(set (match_operand:SF 0 "register_operand" "=x")
929 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
930 (match_operand:SF 2 "register_operand" "x")))]
932 "andnps\t{%2, %0|%0, %2}"
933 [(set_attr "type" "sselog")
934 (set_attr "mode" "V4SF")])
936 (define_insn "*iorsf3"
937 [(set (match_operand:SF 0 "register_operand" "=x")
938 (ior:SF (match_operand:SF 1 "register_operand" "0")
939 (match_operand:SF 2 "register_operand" "x")))]
941 "orps\t{%2, %0|%0, %2}"
942 [(set_attr "type" "sselog")
943 (set_attr "mode" "V4SF")])
945 (define_insn "*xorsf3"
946 [(set (match_operand:SF 0 "register_operand" "=x")
947 (xor:SF (match_operand:SF 1 "register_operand" "0")
948 (match_operand:SF 2 "register_operand" "x")))]
950 "xorps\t{%2, %0|%0, %2}"
951 [(set_attr "type" "sselog")
952 (set_attr "mode" "V4SF")])
954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
956 ;; Parallel single-precision floating point conversion operations
958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
960 (define_insn "sse_cvtpi2ps"
961 [(set (match_operand:V4SF 0 "register_operand" "=x")
964 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
965 (match_operand:V4SF 1 "register_operand" "0")
968 "cvtpi2ps\t{%2, %0|%0, %2}"
969 [(set_attr "type" "ssecvt")
970 (set_attr "mode" "V4SF")])
972 (define_insn "sse_cvtps2pi"
973 [(set (match_operand:V2SI 0 "register_operand" "=y")
975 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
977 (parallel [(const_int 0) (const_int 1)])))]
979 "cvtps2pi\t{%1, %0|%0, %1}"
980 [(set_attr "type" "ssecvt")
981 (set_attr "unit" "mmx")
982 (set_attr "mode" "DI")])
984 (define_insn "sse_cvttps2pi"
985 [(set (match_operand:V2SI 0 "register_operand" "=y")
987 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
988 (parallel [(const_int 0) (const_int 1)])))]
990 "cvttps2pi\t{%1, %0|%0, %1}"
991 [(set_attr "type" "ssecvt")
992 (set_attr "unit" "mmx")
993 (set_attr "mode" "SF")])
995 (define_insn "sse_cvtsi2ss"
996 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
999 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1000 (match_operand:V4SF 1 "register_operand" "0,0")
1003 "cvtsi2ss\t{%2, %0|%0, %2}"
1004 [(set_attr "type" "sseicvt")
1005 (set_attr "athlon_decode" "vector,double")
1006 (set_attr "amdfam10_decode" "vector,double")
1007 (set_attr "mode" "SF")])
1009 (define_insn "sse_cvtsi2ssq"
1010 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1013 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1014 (match_operand:V4SF 1 "register_operand" "0,0")
1016 "TARGET_SSE && TARGET_64BIT"
1017 "cvtsi2ssq\t{%2, %0|%0, %2}"
1018 [(set_attr "type" "sseicvt")
1019 (set_attr "athlon_decode" "vector,double")
1020 (set_attr "amdfam10_decode" "vector,double")
1021 (set_attr "mode" "SF")])
1023 (define_insn "sse_cvtss2si"
1024 [(set (match_operand:SI 0 "register_operand" "=r,r")
1027 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1028 (parallel [(const_int 0)]))]
1029 UNSPEC_FIX_NOTRUNC))]
1031 "cvtss2si\t{%1, %0|%0, %1}"
1032 [(set_attr "type" "sseicvt")
1033 (set_attr "athlon_decode" "double,vector")
1034 (set_attr "mode" "SI")])
1036 (define_insn "sse_cvtss2si_2"
1037 [(set (match_operand:SI 0 "register_operand" "=r,r")
1038 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1039 UNSPEC_FIX_NOTRUNC))]
1041 "cvtss2si\t{%1, %0|%0, %1}"
1042 [(set_attr "type" "sseicvt")
1043 (set_attr "athlon_decode" "double,vector")
1044 (set_attr "amdfam10_decode" "double,double")
1045 (set_attr "mode" "SI")])
1047 (define_insn "sse_cvtss2siq"
1048 [(set (match_operand:DI 0 "register_operand" "=r,r")
1051 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1052 (parallel [(const_int 0)]))]
1053 UNSPEC_FIX_NOTRUNC))]
1054 "TARGET_SSE && TARGET_64BIT"
1055 "cvtss2siq\t{%1, %0|%0, %1}"
1056 [(set_attr "type" "sseicvt")
1057 (set_attr "athlon_decode" "double,vector")
1058 (set_attr "mode" "DI")])
1060 (define_insn "sse_cvtss2siq_2"
1061 [(set (match_operand:DI 0 "register_operand" "=r,r")
1062 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1063 UNSPEC_FIX_NOTRUNC))]
1064 "TARGET_SSE && TARGET_64BIT"
1065 "cvtss2siq\t{%1, %0|%0, %1}"
1066 [(set_attr "type" "sseicvt")
1067 (set_attr "athlon_decode" "double,vector")
1068 (set_attr "amdfam10_decode" "double,double")
1069 (set_attr "mode" "DI")])
1071 (define_insn "sse_cvttss2si"
1072 [(set (match_operand:SI 0 "register_operand" "=r,r")
1075 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1076 (parallel [(const_int 0)]))))]
1078 "cvttss2si\t{%1, %0|%0, %1}"
1079 [(set_attr "type" "sseicvt")
1080 (set_attr "athlon_decode" "double,vector")
1081 (set_attr "amdfam10_decode" "double,double")
1082 (set_attr "mode" "SI")])
1084 (define_insn "sse_cvttss2siq"
1085 [(set (match_operand:DI 0 "register_operand" "=r,r")
1088 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1089 (parallel [(const_int 0)]))))]
1090 "TARGET_SSE && TARGET_64BIT"
1091 "cvttss2siq\t{%1, %0|%0, %1}"
1092 [(set_attr "type" "sseicvt")
1093 (set_attr "athlon_decode" "double,vector")
1094 (set_attr "amdfam10_decode" "double,double")
1095 (set_attr "mode" "DI")])
1097 (define_insn "sse2_cvtdq2ps"
1098 [(set (match_operand:V4SF 0 "register_operand" "=x")
1099 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1101 "cvtdq2ps\t{%1, %0|%0, %1}"
1102 [(set_attr "type" "ssecvt")
1103 (set_attr "mode" "V2DF")])
1105 (define_insn "sse2_cvtps2dq"
1106 [(set (match_operand:V4SI 0 "register_operand" "=x")
1107 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1108 UNSPEC_FIX_NOTRUNC))]
1110 "cvtps2dq\t{%1, %0|%0, %1}"
1111 [(set_attr "type" "ssecvt")
1112 (set_attr "mode" "TI")])
1114 (define_insn "sse2_cvttps2dq"
1115 [(set (match_operand:V4SI 0 "register_operand" "=x")
1116 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1118 "cvttps2dq\t{%1, %0|%0, %1}"
1119 [(set_attr "type" "ssecvt")
1120 (set_attr "mode" "TI")])
1122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1124 ;; Parallel single-precision floating point element swizzling
1126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1128 (define_insn "sse_movhlps"
1129 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1132 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1133 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1134 (parallel [(const_int 6)
1138 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1140 movhlps\t{%2, %0|%0, %2}
1141 movlps\t{%H2, %0|%0, %H2}
1142 movhps\t{%2, %0|%0, %2}"
1143 [(set_attr "type" "ssemov")
1144 (set_attr "mode" "V4SF,V2SF,V2SF")])
1146 (define_insn "sse_movlhps"
1147 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1150 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1151 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1152 (parallel [(const_int 0)
1156 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1158 movlhps\t{%2, %0|%0, %2}
1159 movhps\t{%2, %0|%0, %2}
1160 movlps\t{%2, %H0|%H0, %2}"
1161 [(set_attr "type" "ssemov")
1162 (set_attr "mode" "V4SF,V2SF,V2SF")])
1164 (define_insn "sse_unpckhps"
1165 [(set (match_operand:V4SF 0 "register_operand" "=x")
1168 (match_operand:V4SF 1 "register_operand" "0")
1169 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1170 (parallel [(const_int 2) (const_int 6)
1171 (const_int 3) (const_int 7)])))]
1173 "unpckhps\t{%2, %0|%0, %2}"
1174 [(set_attr "type" "sselog")
1175 (set_attr "mode" "V4SF")])
1177 (define_insn "sse_unpcklps"
1178 [(set (match_operand:V4SF 0 "register_operand" "=x")
1181 (match_operand:V4SF 1 "register_operand" "0")
1182 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1183 (parallel [(const_int 0) (const_int 4)
1184 (const_int 1) (const_int 5)])))]
1186 "unpcklps\t{%2, %0|%0, %2}"
1187 [(set_attr "type" "sselog")
1188 (set_attr "mode" "V4SF")])
1190 ;; These are modeled with the same vec_concat as the others so that we
1191 ;; capture users of shufps that can use the new instructions
1192 (define_insn "sse3_movshdup"
1193 [(set (match_operand:V4SF 0 "register_operand" "=x")
1196 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1198 (parallel [(const_int 1)
1203 "movshdup\t{%1, %0|%0, %1}"
1204 [(set_attr "type" "sse")
1205 (set_attr "mode" "V4SF")])
1207 (define_insn "sse3_movsldup"
1208 [(set (match_operand:V4SF 0 "register_operand" "=x")
1211 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1213 (parallel [(const_int 0)
1218 "movsldup\t{%1, %0|%0, %1}"
1219 [(set_attr "type" "sse")
1220 (set_attr "mode" "V4SF")])
1222 (define_expand "sse_shufps"
1223 [(match_operand:V4SF 0 "register_operand" "")
1224 (match_operand:V4SF 1 "register_operand" "")
1225 (match_operand:V4SF 2 "nonimmediate_operand" "")
1226 (match_operand:SI 3 "const_int_operand" "")]
1229 int mask = INTVAL (operands[3]);
1230 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1231 GEN_INT ((mask >> 0) & 3),
1232 GEN_INT ((mask >> 2) & 3),
1233 GEN_INT (((mask >> 4) & 3) + 4),
1234 GEN_INT (((mask >> 6) & 3) + 4)));
1238 (define_insn "sse_shufps_1"
1239 [(set (match_operand:V4SF 0 "register_operand" "=x")
1242 (match_operand:V4SF 1 "register_operand" "0")
1243 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1244 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1245 (match_operand 4 "const_0_to_3_operand" "")
1246 (match_operand 5 "const_4_to_7_operand" "")
1247 (match_operand 6 "const_4_to_7_operand" "")])))]
1251 mask |= INTVAL (operands[3]) << 0;
1252 mask |= INTVAL (operands[4]) << 2;
1253 mask |= (INTVAL (operands[5]) - 4) << 4;
1254 mask |= (INTVAL (operands[6]) - 4) << 6;
1255 operands[3] = GEN_INT (mask);
1257 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1259 [(set_attr "type" "sselog")
1260 (set_attr "mode" "V4SF")])
1262 (define_insn "sse_storehps"
1263 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1265 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1266 (parallel [(const_int 2) (const_int 3)])))]
1269 movhps\t{%1, %0|%0, %1}
1270 movhlps\t{%1, %0|%0, %1}
1271 movlps\t{%H1, %0|%0, %H1}"
1272 [(set_attr "type" "ssemov")
1273 (set_attr "mode" "V2SF,V4SF,V2SF")])
1275 (define_insn "sse_loadhps"
1276 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1279 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1280 (parallel [(const_int 0) (const_int 1)]))
1281 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1284 movhps\t{%2, %0|%0, %2}
1285 movlhps\t{%2, %0|%0, %2}
1286 movlps\t{%2, %H0|%H0, %2}"
1287 [(set_attr "type" "ssemov")
1288 (set_attr "mode" "V2SF,V4SF,V2SF")])
1290 (define_insn "sse_storelps"
1291 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1293 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1294 (parallel [(const_int 0) (const_int 1)])))]
1297 movlps\t{%1, %0|%0, %1}
1298 movaps\t{%1, %0|%0, %1}
1299 movlps\t{%1, %0|%0, %1}"
1300 [(set_attr "type" "ssemov")
1301 (set_attr "mode" "V2SF,V4SF,V2SF")])
1303 (define_insn "sse_loadlps"
1304 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1306 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1308 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1309 (parallel [(const_int 2) (const_int 3)]))))]
1312 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1313 movlps\t{%2, %0|%0, %2}
1314 movlps\t{%2, %0|%0, %2}"
1315 [(set_attr "type" "sselog,ssemov,ssemov")
1316 (set_attr "mode" "V4SF,V2SF,V2SF")])
1318 (define_insn "sse_movss"
1319 [(set (match_operand:V4SF 0 "register_operand" "=x")
1321 (match_operand:V4SF 2 "register_operand" "x")
1322 (match_operand:V4SF 1 "register_operand" "0")
1325 "movss\t{%2, %0|%0, %2}"
1326 [(set_attr "type" "ssemov")
1327 (set_attr "mode" "SF")])
1329 (define_insn "*vec_dupv4sf"
1330 [(set (match_operand:V4SF 0 "register_operand" "=x")
1332 (match_operand:SF 1 "register_operand" "0")))]
1334 "shufps\t{$0, %0, %0|%0, %0, 0}"
1335 [(set_attr "type" "sselog1")
1336 (set_attr "mode" "V4SF")])
1338 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1339 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1340 ;; alternatives pretty much forces the MMX alternative to be chosen.
1341 (define_insn "*sse_concatv2sf"
1342 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1344 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1345 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1348 unpcklps\t{%2, %0|%0, %2}
1349 movss\t{%1, %0|%0, %1}
1350 punpckldq\t{%2, %0|%0, %2}
1351 movd\t{%1, %0|%0, %1}"
1352 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1353 (set_attr "mode" "V4SF,SF,DI,DI")])
1355 (define_insn "*sse_concatv4sf"
1356 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1358 (match_operand:V2SF 1 "register_operand" " 0,0")
1359 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1362 movlhps\t{%2, %0|%0, %2}
1363 movhps\t{%2, %0|%0, %2}"
1364 [(set_attr "type" "ssemov")
1365 (set_attr "mode" "V4SF,V2SF")])
1367 (define_expand "vec_initv4sf"
1368 [(match_operand:V4SF 0 "register_operand" "")
1369 (match_operand 1 "" "")]
1372 ix86_expand_vector_init (false, operands[0], operands[1]);
1376 (define_insn "vec_setv4sf_0"
1377 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
1380 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1381 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1385 movss\t{%2, %0|%0, %2}
1386 movss\t{%2, %0|%0, %2}
1387 movd\t{%2, %0|%0, %2}
1389 [(set_attr "type" "ssemov")
1390 (set_attr "mode" "SF")])
1393 [(set (match_operand:V4SF 0 "memory_operand" "")
1396 (match_operand:SF 1 "nonmemory_operand" ""))
1399 "TARGET_SSE && reload_completed"
1402 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1406 (define_expand "vec_setv4sf"
1407 [(match_operand:V4SF 0 "register_operand" "")
1408 (match_operand:SF 1 "register_operand" "")
1409 (match_operand 2 "const_int_operand" "")]
1412 ix86_expand_vector_set (false, operands[0], operands[1],
1413 INTVAL (operands[2]));
1417 (define_insn_and_split "*vec_extractv4sf_0"
1418 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1420 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1421 (parallel [(const_int 0)])))]
1422 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1424 "&& reload_completed"
1427 rtx op1 = operands[1];
1429 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1431 op1 = gen_lowpart (SFmode, op1);
1432 emit_move_insn (operands[0], op1);
1436 (define_expand "vec_extractv4sf"
1437 [(match_operand:SF 0 "register_operand" "")
1438 (match_operand:V4SF 1 "register_operand" "")
1439 (match_operand 2 "const_int_operand" "")]
1442 ix86_expand_vector_extract (false, operands[0], operands[1],
1443 INTVAL (operands[2]));
1447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1449 ;; Parallel double-precision floating point arithmetic
1451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1453 (define_expand "negv2df2"
1454 [(set (match_operand:V2DF 0 "register_operand" "")
1455 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1457 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1459 (define_expand "absv2df2"
1460 [(set (match_operand:V2DF 0 "register_operand" "")
1461 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1463 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1465 (define_expand "addv2df3"
1466 [(set (match_operand:V2DF 0 "register_operand" "")
1467 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1468 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1470 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1472 (define_insn "*addv2df3"
1473 [(set (match_operand:V2DF 0 "register_operand" "=x")
1474 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1476 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1477 "addpd\t{%2, %0|%0, %2}"
1478 [(set_attr "type" "sseadd")
1479 (set_attr "mode" "V2DF")])
1481 (define_insn "sse2_vmaddv2df3"
1482 [(set (match_operand:V2DF 0 "register_operand" "=x")
1484 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1485 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1488 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1489 "addsd\t{%2, %0|%0, %2}"
1490 [(set_attr "type" "sseadd")
1491 (set_attr "mode" "DF")])
1493 (define_expand "subv2df3"
1494 [(set (match_operand:V2DF 0 "register_operand" "")
1495 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1496 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1498 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1500 (define_insn "*subv2df3"
1501 [(set (match_operand:V2DF 0 "register_operand" "=x")
1502 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1503 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1505 "subpd\t{%2, %0|%0, %2}"
1506 [(set_attr "type" "sseadd")
1507 (set_attr "mode" "V2DF")])
1509 (define_insn "sse2_vmsubv2df3"
1510 [(set (match_operand:V2DF 0 "register_operand" "=x")
1512 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1513 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1517 "subsd\t{%2, %0|%0, %2}"
1518 [(set_attr "type" "sseadd")
1519 (set_attr "mode" "DF")])
1521 (define_expand "mulv2df3"
1522 [(set (match_operand:V2DF 0 "register_operand" "")
1523 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1524 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1526 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1528 (define_insn "*mulv2df3"
1529 [(set (match_operand:V2DF 0 "register_operand" "=x")
1530 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1531 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1532 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1533 "mulpd\t{%2, %0|%0, %2}"
1534 [(set_attr "type" "ssemul")
1535 (set_attr "mode" "V2DF")])
1537 (define_insn "sse2_vmmulv2df3"
1538 [(set (match_operand:V2DF 0 "register_operand" "=x")
1540 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1541 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1544 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1545 "mulsd\t{%2, %0|%0, %2}"
1546 [(set_attr "type" "ssemul")
1547 (set_attr "mode" "DF")])
1549 (define_expand "divv2df3"
1550 [(set (match_operand:V2DF 0 "register_operand" "")
1551 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1552 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1554 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1556 (define_insn "*divv2df3"
1557 [(set (match_operand:V2DF 0 "register_operand" "=x")
1558 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1559 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1561 "divpd\t{%2, %0|%0, %2}"
1562 [(set_attr "type" "ssediv")
1563 (set_attr "mode" "V2DF")])
1565 (define_insn "sse2_vmdivv2df3"
1566 [(set (match_operand:V2DF 0 "register_operand" "=x")
1568 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1569 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1573 "divsd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "ssediv")
1575 (set_attr "mode" "DF")])
1577 (define_insn "sqrtv2df2"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1579 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1581 "sqrtpd\t{%1, %0|%0, %1}"
1582 [(set_attr "type" "sse")
1583 (set_attr "mode" "V2DF")])
1585 (define_insn "sse2_vmsqrtv2df2"
1586 [(set (match_operand:V2DF 0 "register_operand" "=x")
1588 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1589 (match_operand:V2DF 2 "register_operand" "0")
1592 "sqrtsd\t{%1, %0|%0, %1}"
1593 [(set_attr "type" "sse")
1594 (set_attr "mode" "DF")])
1596 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1597 ;; isn't really correct, as those rtl operators aren't defined when
1598 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1600 (define_expand "smaxv2df3"
1601 [(set (match_operand:V2DF 0 "register_operand" "")
1602 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1603 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1606 if (!flag_finite_math_only)
1607 operands[1] = force_reg (V2DFmode, operands[1]);
1608 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1611 (define_insn "*smaxv2df3_finite"
1612 [(set (match_operand:V2DF 0 "register_operand" "=x")
1613 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1614 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1615 "TARGET_SSE2 && flag_finite_math_only
1616 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1617 "maxpd\t{%2, %0|%0, %2}"
1618 [(set_attr "type" "sseadd")
1619 (set_attr "mode" "V2DF")])
1621 (define_insn "*smaxv2df3"
1622 [(set (match_operand:V2DF 0 "register_operand" "=x")
1623 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1624 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1626 "maxpd\t{%2, %0|%0, %2}"
1627 [(set_attr "type" "sseadd")
1628 (set_attr "mode" "V2DF")])
1630 (define_insn "*sse2_vmsmaxv2df3_finite"
1631 [(set (match_operand:V2DF 0 "register_operand" "=x")
1633 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1634 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1637 "TARGET_SSE2 && flag_finite_math_only
1638 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1639 "maxsd\t{%2, %0|%0, %2}"
1640 [(set_attr "type" "sseadd")
1641 (set_attr "mode" "DF")])
1643 (define_insn "sse2_vmsmaxv2df3"
1644 [(set (match_operand:V2DF 0 "register_operand" "=x")
1646 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1647 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1651 "maxsd\t{%2, %0|%0, %2}"
1652 [(set_attr "type" "sseadd")
1653 (set_attr "mode" "DF")])
1655 (define_expand "sminv2df3"
1656 [(set (match_operand:V2DF 0 "register_operand" "")
1657 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1658 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1661 if (!flag_finite_math_only)
1662 operands[1] = force_reg (V2DFmode, operands[1]);
1663 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1666 (define_insn "*sminv2df3_finite"
1667 [(set (match_operand:V2DF 0 "register_operand" "=x")
1668 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1669 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1670 "TARGET_SSE2 && flag_finite_math_only
1671 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1672 "minpd\t{%2, %0|%0, %2}"
1673 [(set_attr "type" "sseadd")
1674 (set_attr "mode" "V2DF")])
1676 (define_insn "*sminv2df3"
1677 [(set (match_operand:V2DF 0 "register_operand" "=x")
1678 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1679 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1681 "minpd\t{%2, %0|%0, %2}"
1682 [(set_attr "type" "sseadd")
1683 (set_attr "mode" "V2DF")])
1685 (define_insn "*sse2_vmsminv2df3_finite"
1686 [(set (match_operand:V2DF 0 "register_operand" "=x")
1688 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1689 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1692 "TARGET_SSE2 && flag_finite_math_only
1693 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1694 "minsd\t{%2, %0|%0, %2}"
1695 [(set_attr "type" "sseadd")
1696 (set_attr "mode" "DF")])
1698 (define_insn "sse2_vmsminv2df3"
1699 [(set (match_operand:V2DF 0 "register_operand" "=x")
1701 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1702 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1706 "minsd\t{%2, %0|%0, %2}"
1707 [(set_attr "type" "sseadd")
1708 (set_attr "mode" "DF")])
1710 (define_insn "sse3_addsubv2df3"
1711 [(set (match_operand:V2DF 0 "register_operand" "=x")
1714 (match_operand:V2DF 1 "register_operand" "0")
1715 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1716 (minus:V2DF (match_dup 1) (match_dup 2))
1719 "addsubpd\t{%2, %0|%0, %2}"
1720 [(set_attr "type" "sseadd")
1721 (set_attr "mode" "V2DF")])
1723 (define_insn "sse3_haddv2df3"
1724 [(set (match_operand:V2DF 0 "register_operand" "=x")
1728 (match_operand:V2DF 1 "register_operand" "0")
1729 (parallel [(const_int 0)]))
1730 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1733 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1734 (parallel [(const_int 0)]))
1735 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1737 "haddpd\t{%2, %0|%0, %2}"
1738 [(set_attr "type" "sseadd")
1739 (set_attr "mode" "V2DF")])
1741 (define_insn "sse3_hsubv2df3"
1742 [(set (match_operand:V2DF 0 "register_operand" "=x")
1746 (match_operand:V2DF 1 "register_operand" "0")
1747 (parallel [(const_int 0)]))
1748 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1751 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1752 (parallel [(const_int 0)]))
1753 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1755 "hsubpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sseadd")
1757 (set_attr "mode" "V2DF")])
1759 (define_expand "reduc_splus_v2df"
1760 [(match_operand:V2DF 0 "register_operand" "")
1761 (match_operand:V2DF 1 "register_operand" "")]
1764 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1770 ;; Parallel double-precision floating point comparisons
1772 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1774 (define_insn "sse2_maskcmpv2df3"
1775 [(set (match_operand:V2DF 0 "register_operand" "=x")
1776 (match_operator:V2DF 3 "sse_comparison_operator"
1777 [(match_operand:V2DF 1 "register_operand" "0")
1778 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1780 "cmp%D3pd\t{%2, %0|%0, %2}"
1781 [(set_attr "type" "ssecmp")
1782 (set_attr "mode" "V2DF")])
1784 (define_insn "sse2_maskcmpdf3"
1785 [(set (match_operand:DF 0 "register_operand" "=x")
1786 (match_operator:DF 3 "sse_comparison_operator"
1787 [(match_operand:DF 1 "register_operand" "0")
1788 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1790 "cmp%D3sd\t{%2, %0|%0, %2}"
1791 [(set_attr "type" "ssecmp")
1792 (set_attr "mode" "DF")])
1794 (define_insn "sse2_vmmaskcmpv2df3"
1795 [(set (match_operand:V2DF 0 "register_operand" "=x")
1797 (match_operator:V2DF 3 "sse_comparison_operator"
1798 [(match_operand:V2DF 1 "register_operand" "0")
1799 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1803 "cmp%D3sd\t{%2, %0|%0, %2}"
1804 [(set_attr "type" "ssecmp")
1805 (set_attr "mode" "DF")])
1807 (define_insn "sse2_comi"
1808 [(set (reg:CCFP FLAGS_REG)
1811 (match_operand:V2DF 0 "register_operand" "x")
1812 (parallel [(const_int 0)]))
1814 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1815 (parallel [(const_int 0)]))))]
1817 "comisd\t{%1, %0|%0, %1}"
1818 [(set_attr "type" "ssecomi")
1819 (set_attr "mode" "DF")])
1821 (define_insn "sse2_ucomi"
1822 [(set (reg:CCFPU FLAGS_REG)
1825 (match_operand:V2DF 0 "register_operand" "x")
1826 (parallel [(const_int 0)]))
1828 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1829 (parallel [(const_int 0)]))))]
1831 "ucomisd\t{%1, %0|%0, %1}"
1832 [(set_attr "type" "ssecomi")
1833 (set_attr "mode" "DF")])
1835 (define_expand "vcondv2df"
1836 [(set (match_operand:V2DF 0 "register_operand" "")
1838 (match_operator 3 ""
1839 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1840 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1841 (match_operand:V2DF 1 "general_operand" "")
1842 (match_operand:V2DF 2 "general_operand" "")))]
1845 if (ix86_expand_fp_vcond (operands))
1851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1853 ;; Parallel double-precision floating point logical operations
1855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1857 (define_expand "andv2df3"
1858 [(set (match_operand:V2DF 0 "register_operand" "")
1859 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1860 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1862 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1864 (define_insn "*andv2df3"
1865 [(set (match_operand:V2DF 0 "register_operand" "=x")
1866 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1867 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1868 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1869 "andpd\t{%2, %0|%0, %2}"
1870 [(set_attr "type" "sselog")
1871 (set_attr "mode" "V2DF")])
1873 (define_insn "sse2_nandv2df3"
1874 [(set (match_operand:V2DF 0 "register_operand" "=x")
1875 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1876 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1878 "andnpd\t{%2, %0|%0, %2}"
1879 [(set_attr "type" "sselog")
1880 (set_attr "mode" "V2DF")])
1882 (define_expand "iorv2df3"
1883 [(set (match_operand:V2DF 0 "register_operand" "")
1884 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1885 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1887 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1889 (define_insn "*iorv2df3"
1890 [(set (match_operand:V2DF 0 "register_operand" "=x")
1891 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1892 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1893 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1894 "orpd\t{%2, %0|%0, %2}"
1895 [(set_attr "type" "sselog")
1896 (set_attr "mode" "V2DF")])
1898 (define_expand "xorv2df3"
1899 [(set (match_operand:V2DF 0 "register_operand" "")
1900 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1901 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1903 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1905 (define_insn "*xorv2df3"
1906 [(set (match_operand:V2DF 0 "register_operand" "=x")
1907 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1908 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1909 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1910 "xorpd\t{%2, %0|%0, %2}"
1911 [(set_attr "type" "sselog")
1912 (set_attr "mode" "V2DF")])
1914 ;; Also define scalar versions. These are used for abs, neg, and
1915 ;; conditional move. Using subregs into vector modes causes register
1916 ;; allocation lossage. These patterns do not allow memory operands
1917 ;; because the native instructions read the full 128-bits.
1919 (define_insn "*anddf3"
1920 [(set (match_operand:DF 0 "register_operand" "=x")
1921 (and:DF (match_operand:DF 1 "register_operand" "0")
1922 (match_operand:DF 2 "register_operand" "x")))]
1924 "andpd\t{%2, %0|%0, %2}"
1925 [(set_attr "type" "sselog")
1926 (set_attr "mode" "V2DF")])
1928 (define_insn "*nanddf3"
1929 [(set (match_operand:DF 0 "register_operand" "=x")
1930 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1931 (match_operand:DF 2 "register_operand" "x")))]
1933 "andnpd\t{%2, %0|%0, %2}"
1934 [(set_attr "type" "sselog")
1935 (set_attr "mode" "V2DF")])
1937 (define_insn "*iordf3"
1938 [(set (match_operand:DF 0 "register_operand" "=x")
1939 (ior:DF (match_operand:DF 1 "register_operand" "0")
1940 (match_operand:DF 2 "register_operand" "x")))]
1942 "orpd\t{%2, %0|%0, %2}"
1943 [(set_attr "type" "sselog")
1944 (set_attr "mode" "V2DF")])
1946 (define_insn "*xordf3"
1947 [(set (match_operand:DF 0 "register_operand" "=x")
1948 (xor:DF (match_operand:DF 1 "register_operand" "0")
1949 (match_operand:DF 2 "register_operand" "x")))]
1951 "xorpd\t{%2, %0|%0, %2}"
1952 [(set_attr "type" "sselog")
1953 (set_attr "mode" "V2DF")])
1955 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1957 ;; Parallel double-precision floating point conversion operations
1959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1961 (define_insn "sse2_cvtpi2pd"
1962 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1963 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1965 "cvtpi2pd\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "ssecvt")
1967 (set_attr "unit" "mmx,*")
1968 (set_attr "mode" "V2DF")])
1970 (define_insn "sse2_cvtpd2pi"
1971 [(set (match_operand:V2SI 0 "register_operand" "=y")
1972 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1973 UNSPEC_FIX_NOTRUNC))]
1975 "cvtpd2pi\t{%1, %0|%0, %1}"
1976 [(set_attr "type" "ssecvt")
1977 (set_attr "unit" "mmx")
1978 (set_attr "mode" "DI")])
1980 (define_insn "sse2_cvttpd2pi"
1981 [(set (match_operand:V2SI 0 "register_operand" "=y")
1982 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1984 "cvttpd2pi\t{%1, %0|%0, %1}"
1985 [(set_attr "type" "ssecvt")
1986 (set_attr "unit" "mmx")
1987 (set_attr "mode" "TI")])
1989 (define_insn "sse2_cvtsi2sd"
1990 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1993 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1994 (match_operand:V2DF 1 "register_operand" "0,0")
1997 "cvtsi2sd\t{%2, %0|%0, %2}"
1998 [(set_attr "type" "sseicvt")
1999 (set_attr "mode" "DF")
2000 (set_attr "athlon_decode" "double,direct")
2001 (set_attr "amdfam10_decode" "vector,double")])
2003 (define_insn "sse2_cvtsi2sdq"
2004 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2007 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2008 (match_operand:V2DF 1 "register_operand" "0,0")
2010 "TARGET_SSE2 && TARGET_64BIT"
2011 "cvtsi2sdq\t{%2, %0|%0, %2}"
2012 [(set_attr "type" "sseicvt")
2013 (set_attr "mode" "DF")
2014 (set_attr "athlon_decode" "double,direct")
2015 (set_attr "amdfam10_decode" "vector,double")])
2017 (define_insn "sse2_cvtsd2si"
2018 [(set (match_operand:SI 0 "register_operand" "=r,r")
2021 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2022 (parallel [(const_int 0)]))]
2023 UNSPEC_FIX_NOTRUNC))]
2025 "cvtsd2si\t{%1, %0|%0, %1}"
2026 [(set_attr "type" "sseicvt")
2027 (set_attr "athlon_decode" "double,vector")
2028 (set_attr "mode" "SI")])
2030 (define_insn "sse2_cvtsd2si_2"
2031 [(set (match_operand:SI 0 "register_operand" "=r,r")
2032 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2033 UNSPEC_FIX_NOTRUNC))]
2035 "cvtsd2si\t{%1, %0|%0, %1}"
2036 [(set_attr "type" "sseicvt")
2037 (set_attr "athlon_decode" "double,vector")
2038 (set_attr "amdfam10_decode" "double,double")
2039 (set_attr "mode" "SI")])
2041 (define_insn "sse2_cvtsd2siq"
2042 [(set (match_operand:DI 0 "register_operand" "=r,r")
2045 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2046 (parallel [(const_int 0)]))]
2047 UNSPEC_FIX_NOTRUNC))]
2048 "TARGET_SSE2 && TARGET_64BIT"
2049 "cvtsd2siq\t{%1, %0|%0, %1}"
2050 [(set_attr "type" "sseicvt")
2051 (set_attr "athlon_decode" "double,vector")
2052 (set_attr "mode" "DI")])
2054 (define_insn "sse2_cvtsd2siq_2"
2055 [(set (match_operand:DI 0 "register_operand" "=r,r")
2056 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2057 UNSPEC_FIX_NOTRUNC))]
2058 "TARGET_SSE2 && TARGET_64BIT"
2059 "cvtsd2siq\t{%1, %0|%0, %1}"
2060 [(set_attr "type" "sseicvt")
2061 (set_attr "athlon_decode" "double,vector")
2062 (set_attr "amdfam10_decode" "double,double")
2063 (set_attr "mode" "DI")])
2065 (define_insn "sse2_cvttsd2si"
2066 [(set (match_operand:SI 0 "register_operand" "=r,r")
2069 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2070 (parallel [(const_int 0)]))))]
2072 "cvttsd2si\t{%1, %0|%0, %1}"
2073 [(set_attr "type" "sseicvt")
2074 (set_attr "mode" "SI")
2075 (set_attr "athlon_decode" "double,vector")
2076 (set_attr "amdfam10_decode" "double,double")])
2078 (define_insn "sse2_cvttsd2siq"
2079 [(set (match_operand:DI 0 "register_operand" "=r,r")
2082 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2083 (parallel [(const_int 0)]))))]
2084 "TARGET_SSE2 && TARGET_64BIT"
2085 "cvttsd2siq\t{%1, %0|%0, %1}"
2086 [(set_attr "type" "sseicvt")
2087 (set_attr "mode" "DI")
2088 (set_attr "athlon_decode" "double,vector")
2089 (set_attr "amdfam10_decode" "double,double")])
2091 (define_insn "sse2_cvtdq2pd"
2092 [(set (match_operand:V2DF 0 "register_operand" "=x")
2095 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2096 (parallel [(const_int 0) (const_int 1)]))))]
2098 "cvtdq2pd\t{%1, %0|%0, %1}"
2099 [(set_attr "type" "ssecvt")
2100 (set_attr "mode" "V2DF")])
2102 (define_expand "sse2_cvtpd2dq"
2103 [(set (match_operand:V4SI 0 "register_operand" "")
2105 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2109 "operands[2] = CONST0_RTX (V2SImode);")
2111 (define_insn "*sse2_cvtpd2dq"
2112 [(set (match_operand:V4SI 0 "register_operand" "=x")
2114 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2116 (match_operand:V2SI 2 "const0_operand" "")))]
2118 "cvtpd2dq\t{%1, %0|%0, %1}"
2119 [(set_attr "type" "ssecvt")
2120 (set_attr "mode" "TI")
2121 (set_attr "amdfam10_decode" "double")])
2123 (define_expand "sse2_cvttpd2dq"
2124 [(set (match_operand:V4SI 0 "register_operand" "")
2126 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2129 "operands[2] = CONST0_RTX (V2SImode);")
2131 (define_insn "*sse2_cvttpd2dq"
2132 [(set (match_operand:V4SI 0 "register_operand" "=x")
2134 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2135 (match_operand:V2SI 2 "const0_operand" "")))]
2137 "cvttpd2dq\t{%1, %0|%0, %1}"
2138 [(set_attr "type" "ssecvt")
2139 (set_attr "mode" "TI")
2140 (set_attr "amdfam10_decode" "double")])
2142 (define_insn "sse2_cvtsd2ss"
2143 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2146 (float_truncate:V2SF
2147 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2148 (match_operand:V4SF 1 "register_operand" "0,0")
2151 "cvtsd2ss\t{%2, %0|%0, %2}"
2152 [(set_attr "type" "ssecvt")
2153 (set_attr "athlon_decode" "vector,double")
2154 (set_attr "amdfam10_decode" "vector,double")
2155 (set_attr "mode" "SF")])
2157 (define_insn "sse2_cvtss2sd"
2158 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2162 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2163 (parallel [(const_int 0) (const_int 1)])))
2164 (match_operand:V2DF 1 "register_operand" "0,0")
2167 "cvtss2sd\t{%2, %0|%0, %2}"
2168 [(set_attr "type" "ssecvt")
2169 (set_attr "amdfam10_decode" "vector,double")
2170 (set_attr "mode" "DF")])
2172 (define_expand "sse2_cvtpd2ps"
2173 [(set (match_operand:V4SF 0 "register_operand" "")
2175 (float_truncate:V2SF
2176 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2179 "operands[2] = CONST0_RTX (V2SFmode);")
2181 (define_insn "*sse2_cvtpd2ps"
2182 [(set (match_operand:V4SF 0 "register_operand" "=x")
2184 (float_truncate:V2SF
2185 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2186 (match_operand:V2SF 2 "const0_operand" "")))]
2188 "cvtpd2ps\t{%1, %0|%0, %1}"
2189 [(set_attr "type" "ssecvt")
2190 (set_attr "mode" "V4SF")
2191 (set_attr "amdfam10_decode" "double")])
2193 (define_insn "sse2_cvtps2pd"
2194 [(set (match_operand:V2DF 0 "register_operand" "=x")
2197 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2198 (parallel [(const_int 0) (const_int 1)]))))]
2200 "cvtps2pd\t{%1, %0|%0, %1}"
2201 [(set_attr "type" "ssecvt")
2202 (set_attr "mode" "V2DF")
2203 (set_attr "amdfam10_decode" "direct")])
2205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2207 ;; Parallel double-precision floating point element swizzling
2209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2211 (define_insn "sse2_unpckhpd"
2212 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2215 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2216 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2217 (parallel [(const_int 1)
2219 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2221 unpckhpd\t{%2, %0|%0, %2}
2222 movlpd\t{%H1, %0|%0, %H1}
2223 movhpd\t{%1, %0|%0, %1}"
2224 [(set_attr "type" "sselog,ssemov,ssemov")
2225 (set_attr "mode" "V2DF,V1DF,V1DF")])
2227 (define_insn "*sse3_movddup"
2228 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2231 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2233 (parallel [(const_int 0)
2235 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2237 movddup\t{%1, %0|%0, %1}
2239 [(set_attr "type" "sselog1,ssemov")
2240 (set_attr "mode" "V2DF")])
2243 [(set (match_operand:V2DF 0 "memory_operand" "")
2246 (match_operand:V2DF 1 "register_operand" "")
2248 (parallel [(const_int 0)
2250 "TARGET_SSE3 && reload_completed"
2253 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2254 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2255 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2259 (define_insn "sse2_unpcklpd"
2260 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2263 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2264 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2265 (parallel [(const_int 0)
2267 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2269 unpcklpd\t{%2, %0|%0, %2}
2270 movhpd\t{%2, %0|%0, %2}
2271 movlpd\t{%2, %H0|%H0, %2}"
2272 [(set_attr "type" "sselog,ssemov,ssemov")
2273 (set_attr "mode" "V2DF,V1DF,V1DF")])
2275 (define_expand "sse2_shufpd"
2276 [(match_operand:V2DF 0 "register_operand" "")
2277 (match_operand:V2DF 1 "register_operand" "")
2278 (match_operand:V2DF 2 "nonimmediate_operand" "")
2279 (match_operand:SI 3 "const_int_operand" "")]
2282 int mask = INTVAL (operands[3]);
2283 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2285 GEN_INT (mask & 2 ? 3 : 2)));
2289 (define_insn "sse2_shufpd_1"
2290 [(set (match_operand:V2DF 0 "register_operand" "=x")
2293 (match_operand:V2DF 1 "register_operand" "0")
2294 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2295 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2296 (match_operand 4 "const_2_to_3_operand" "")])))]
2300 mask = INTVAL (operands[3]);
2301 mask |= (INTVAL (operands[4]) - 2) << 1;
2302 operands[3] = GEN_INT (mask);
2304 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2306 [(set_attr "type" "sselog")
2307 (set_attr "mode" "V2DF")])
2309 (define_insn "sse2_storehpd"
2310 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2312 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2313 (parallel [(const_int 1)])))]
2314 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2316 movhpd\t{%1, %0|%0, %1}
2319 [(set_attr "type" "ssemov,sselog1,ssemov")
2320 (set_attr "mode" "V1DF,V2DF,DF")])
2323 [(set (match_operand:DF 0 "register_operand" "")
2325 (match_operand:V2DF 1 "memory_operand" "")
2326 (parallel [(const_int 1)])))]
2327 "TARGET_SSE2 && reload_completed"
2328 [(set (match_dup 0) (match_dup 1))]
2330 operands[1] = adjust_address (operands[1], DFmode, 8);
2333 (define_insn "sse2_storelpd"
2334 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2336 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2337 (parallel [(const_int 0)])))]
2338 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2340 movlpd\t{%1, %0|%0, %1}
2343 [(set_attr "type" "ssemov")
2344 (set_attr "mode" "V1DF,DF,DF")])
2347 [(set (match_operand:DF 0 "register_operand" "")
2349 (match_operand:V2DF 1 "nonimmediate_operand" "")
2350 (parallel [(const_int 0)])))]
2351 "TARGET_SSE2 && reload_completed"
2354 rtx op1 = operands[1];
2356 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2358 op1 = gen_lowpart (DFmode, op1);
2359 emit_move_insn (operands[0], op1);
2363 (define_insn "sse2_loadhpd"
2364 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2367 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2368 (parallel [(const_int 0)]))
2369 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2370 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2372 movhpd\t{%2, %0|%0, %2}
2373 unpcklpd\t{%2, %0|%0, %2}
2374 shufpd\t{$1, %1, %0|%0, %1, 1}
2376 [(set_attr "type" "ssemov,sselog,sselog,other")
2377 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2380 [(set (match_operand:V2DF 0 "memory_operand" "")
2382 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2383 (match_operand:DF 1 "register_operand" "")))]
2384 "TARGET_SSE2 && reload_completed"
2385 [(set (match_dup 0) (match_dup 1))]
2387 operands[0] = adjust_address (operands[0], DFmode, 8);
2390 (define_insn "sse2_loadlpd"
2391 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2393 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2395 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2396 (parallel [(const_int 1)]))))]
2397 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2399 movsd\t{%2, %0|%0, %2}
2400 movlpd\t{%2, %0|%0, %2}
2401 movsd\t{%2, %0|%0, %2}
2402 shufpd\t{$2, %2, %0|%0, %2, 2}
2403 movhpd\t{%H1, %0|%0, %H1}
2405 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2406 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2409 [(set (match_operand:V2DF 0 "memory_operand" "")
2411 (match_operand:DF 1 "register_operand" "")
2412 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2413 "TARGET_SSE2 && reload_completed"
2414 [(set (match_dup 0) (match_dup 1))]
2416 operands[0] = adjust_address (operands[0], DFmode, 8);
2419 ;; Not sure these two are ever used, but it doesn't hurt to have
2421 (define_insn "*vec_extractv2df_1_sse"
2422 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2424 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2425 (parallel [(const_int 1)])))]
2426 "!TARGET_SSE2 && TARGET_SSE
2427 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2429 movhps\t{%1, %0|%0, %1}
2430 movhlps\t{%1, %0|%0, %1}
2431 movlps\t{%H1, %0|%0, %H1}"
2432 [(set_attr "type" "ssemov")
2433 (set_attr "mode" "V2SF,V4SF,V2SF")])
2435 (define_insn "*vec_extractv2df_0_sse"
2436 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2438 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2439 (parallel [(const_int 0)])))]
2440 "!TARGET_SSE2 && TARGET_SSE
2441 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2443 movlps\t{%1, %0|%0, %1}
2444 movaps\t{%1, %0|%0, %1}
2445 movlps\t{%1, %0|%0, %1}"
2446 [(set_attr "type" "ssemov")
2447 (set_attr "mode" "V2SF,V4SF,V2SF")])
2449 (define_insn "sse2_movsd"
2450 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2452 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2453 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2457 movsd\t{%2, %0|%0, %2}
2458 movlpd\t{%2, %0|%0, %2}
2459 movlpd\t{%2, %0|%0, %2}
2460 shufpd\t{$2, %2, %0|%0, %2, 2}
2461 movhps\t{%H1, %0|%0, %H1}
2462 movhps\t{%1, %H0|%H0, %1}"
2463 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2464 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2466 (define_insn "*vec_dupv2df_sse3"
2467 [(set (match_operand:V2DF 0 "register_operand" "=x")
2469 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2471 "movddup\t{%1, %0|%0, %1}"
2472 [(set_attr "type" "sselog1")
2473 (set_attr "mode" "DF")])
2475 (define_insn "*vec_dupv2df"
2476 [(set (match_operand:V2DF 0 "register_operand" "=x")
2478 (match_operand:DF 1 "register_operand" "0")))]
2481 [(set_attr "type" "sselog1")
2482 (set_attr "mode" "V4SF")])
2484 (define_insn "*vec_concatv2df_sse3"
2485 [(set (match_operand:V2DF 0 "register_operand" "=x")
2487 (match_operand:DF 1 "nonimmediate_operand" "xm")
2490 "movddup\t{%1, %0|%0, %1}"
2491 [(set_attr "type" "sselog1")
2492 (set_attr "mode" "DF")])
2494 (define_insn "*vec_concatv2df"
2495 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2497 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2498 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2501 unpcklpd\t{%2, %0|%0, %2}
2502 movhpd\t{%2, %0|%0, %2}
2503 movsd\t{%1, %0|%0, %1}
2504 movlhps\t{%2, %0|%0, %2}
2505 movhps\t{%2, %0|%0, %2}"
2506 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2507 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2509 (define_expand "vec_setv2df"
2510 [(match_operand:V2DF 0 "register_operand" "")
2511 (match_operand:DF 1 "register_operand" "")
2512 (match_operand 2 "const_int_operand" "")]
2515 ix86_expand_vector_set (false, operands[0], operands[1],
2516 INTVAL (operands[2]));
2520 (define_expand "vec_extractv2df"
2521 [(match_operand:DF 0 "register_operand" "")
2522 (match_operand:V2DF 1 "register_operand" "")
2523 (match_operand 2 "const_int_operand" "")]
2526 ix86_expand_vector_extract (false, operands[0], operands[1],
2527 INTVAL (operands[2]));
2531 (define_expand "vec_initv2df"
2532 [(match_operand:V2DF 0 "register_operand" "")
2533 (match_operand 1 "" "")]
2536 ix86_expand_vector_init (false, operands[0], operands[1]);
2540 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2542 ;; Parallel integral arithmetic
2544 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2546 (define_expand "neg<mode>2"
2547 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2550 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2552 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2554 (define_expand "add<mode>3"
2555 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2556 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2557 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2559 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2561 (define_insn "*add<mode>3"
2562 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2564 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2565 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2566 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2567 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2568 [(set_attr "type" "sseiadd")
2569 (set_attr "mode" "TI")])
2571 (define_insn "sse2_ssadd<mode>3"
2572 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2574 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2575 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2576 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2577 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2578 [(set_attr "type" "sseiadd")
2579 (set_attr "mode" "TI")])
2581 (define_insn "sse2_usadd<mode>3"
2582 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2584 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2585 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2586 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2587 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2588 [(set_attr "type" "sseiadd")
2589 (set_attr "mode" "TI")])
2591 (define_expand "sub<mode>3"
2592 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2593 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2594 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2596 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2598 (define_insn "*sub<mode>3"
2599 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2601 (match_operand:SSEMODEI 1 "register_operand" "0")
2602 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2604 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2605 [(set_attr "type" "sseiadd")
2606 (set_attr "mode" "TI")])
2608 (define_insn "sse2_sssub<mode>3"
2609 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2611 (match_operand:SSEMODE12 1 "register_operand" "0")
2612 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2614 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2615 [(set_attr "type" "sseiadd")
2616 (set_attr "mode" "TI")])
2618 (define_insn "sse2_ussub<mode>3"
2619 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2621 (match_operand:SSEMODE12 1 "register_operand" "0")
2622 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2624 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2625 [(set_attr "type" "sseiadd")
2626 (set_attr "mode" "TI")])
2628 (define_expand "mulv16qi3"
2629 [(set (match_operand:V16QI 0 "register_operand" "")
2630 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2631 (match_operand:V16QI 2 "register_operand" "")))]
2637 for (i = 0; i < 12; ++i)
2638 t[i] = gen_reg_rtx (V16QImode);
2640 /* Unpack data such that we've got a source byte in each low byte of
2641 each word. We don't care what goes into the high byte of each word.
2642 Rather than trying to get zero in there, most convenient is to let
2643 it be a copy of the low byte. */
2644 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2645 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2646 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2647 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2649 /* Multiply words. The end-of-line annotations here give a picture of what
2650 the output of that instruction looks like. Dot means don't care; the
2651 letters are the bytes of the result with A being the most significant. */
2652 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2653 gen_lowpart (V8HImode, t[0]),
2654 gen_lowpart (V8HImode, t[1])));
2655 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2656 gen_lowpart (V8HImode, t[2]),
2657 gen_lowpart (V8HImode, t[3])));
2659 /* Extract the relevant bytes and merge them back together. */
2660 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2661 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2662 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2663 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2664 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2665 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2668 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2672 (define_expand "mulv8hi3"
2673 [(set (match_operand:V8HI 0 "register_operand" "")
2674 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2675 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2677 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2679 (define_insn "*mulv8hi3"
2680 [(set (match_operand:V8HI 0 "register_operand" "=x")
2681 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2682 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2683 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2684 "pmullw\t{%2, %0|%0, %2}"
2685 [(set_attr "type" "sseimul")
2686 (set_attr "mode" "TI")])
2688 (define_expand "smulv8hi3_highpart"
2689 [(set (match_operand:V8HI 0 "register_operand" "")
2694 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2696 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2699 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2701 (define_insn "*smulv8hi3_highpart"
2702 [(set (match_operand:V8HI 0 "register_operand" "=x")
2707 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2709 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2711 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2712 "pmulhw\t{%2, %0|%0, %2}"
2713 [(set_attr "type" "sseimul")
2714 (set_attr "mode" "TI")])
2716 (define_expand "umulv8hi3_highpart"
2717 [(set (match_operand:V8HI 0 "register_operand" "")
2722 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2724 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2727 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2729 (define_insn "*umulv8hi3_highpart"
2730 [(set (match_operand:V8HI 0 "register_operand" "=x")
2735 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2737 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2739 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2740 "pmulhuw\t{%2, %0|%0, %2}"
2741 [(set_attr "type" "sseimul")
2742 (set_attr "mode" "TI")])
2744 (define_insn "sse2_umulv2siv2di3"
2745 [(set (match_operand:V2DI 0 "register_operand" "=x")
2749 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2750 (parallel [(const_int 0) (const_int 2)])))
2753 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2754 (parallel [(const_int 0) (const_int 2)])))))]
2755 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2756 "pmuludq\t{%2, %0|%0, %2}"
2757 [(set_attr "type" "sseimul")
2758 (set_attr "mode" "TI")])
2760 (define_insn "sse2_pmaddwd"
2761 [(set (match_operand:V4SI 0 "register_operand" "=x")
2766 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2767 (parallel [(const_int 0)
2773 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2774 (parallel [(const_int 0)
2780 (vec_select:V4HI (match_dup 1)
2781 (parallel [(const_int 1)
2786 (vec_select:V4HI (match_dup 2)
2787 (parallel [(const_int 1)
2790 (const_int 7)]))))))]
2792 "pmaddwd\t{%2, %0|%0, %2}"
2793 [(set_attr "type" "sseiadd")
2794 (set_attr "mode" "TI")])
2796 (define_expand "mulv4si3"
2797 [(set (match_operand:V4SI 0 "register_operand" "")
2798 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2799 (match_operand:V4SI 2 "register_operand" "")))]
2802 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2808 t1 = gen_reg_rtx (V4SImode);
2809 t2 = gen_reg_rtx (V4SImode);
2810 t3 = gen_reg_rtx (V4SImode);
2811 t4 = gen_reg_rtx (V4SImode);
2812 t5 = gen_reg_rtx (V4SImode);
2813 t6 = gen_reg_rtx (V4SImode);
2814 thirtytwo = GEN_INT (32);
2816 /* Multiply elements 2 and 0. */
2817 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2819 /* Shift both input vectors down one element, so that elements 3 and 1
2820 are now in the slots for elements 2 and 0. For K8, at least, this is
2821 faster than using a shuffle. */
2822 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2823 gen_lowpart (TImode, op1), thirtytwo));
2824 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2825 gen_lowpart (TImode, op2), thirtytwo));
2827 /* Multiply elements 3 and 1. */
2828 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2830 /* Move the results in element 2 down to element 1; we don't care what
2831 goes in elements 2 and 3. */
2832 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2833 const0_rtx, const0_rtx));
2834 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2835 const0_rtx, const0_rtx));
2837 /* Merge the parts back together. */
2838 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2842 (define_expand "mulv2di3"
2843 [(set (match_operand:V2DI 0 "register_operand" "")
2844 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2845 (match_operand:V2DI 2 "register_operand" "")))]
2848 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2854 t1 = gen_reg_rtx (V2DImode);
2855 t2 = gen_reg_rtx (V2DImode);
2856 t3 = gen_reg_rtx (V2DImode);
2857 t4 = gen_reg_rtx (V2DImode);
2858 t5 = gen_reg_rtx (V2DImode);
2859 t6 = gen_reg_rtx (V2DImode);
2860 thirtytwo = GEN_INT (32);
2862 /* Multiply low parts. */
2863 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2864 gen_lowpart (V4SImode, op2)));
2866 /* Shift input vectors left 32 bits so we can multiply high parts. */
2867 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2868 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2870 /* Multiply high parts by low parts. */
2871 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2872 gen_lowpart (V4SImode, t3)));
2873 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2874 gen_lowpart (V4SImode, t2)));
2876 /* Shift them back. */
2877 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2878 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2880 /* Add the three parts together. */
2881 emit_insn (gen_addv2di3 (t6, t1, t4));
2882 emit_insn (gen_addv2di3 (op0, t6, t5));
2886 (define_expand "vec_widen_smult_hi_v8hi"
2887 [(match_operand:V4SI 0 "register_operand" "")
2888 (match_operand:V8HI 1 "register_operand" "")
2889 (match_operand:V8HI 2 "register_operand" "")]
2892 rtx op1, op2, t1, t2, dest;
2896 t1 = gen_reg_rtx (V8HImode);
2897 t2 = gen_reg_rtx (V8HImode);
2898 dest = gen_lowpart (V8HImode, operands[0]);
2900 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2901 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2902 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2906 (define_expand "vec_widen_smult_lo_v8hi"
2907 [(match_operand:V4SI 0 "register_operand" "")
2908 (match_operand:V8HI 1 "register_operand" "")
2909 (match_operand:V8HI 2 "register_operand" "")]
2912 rtx op1, op2, t1, t2, dest;
2916 t1 = gen_reg_rtx (V8HImode);
2917 t2 = gen_reg_rtx (V8HImode);
2918 dest = gen_lowpart (V8HImode, operands[0]);
2920 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2921 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2922 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2926 (define_expand "vec_widen_umult_hi_v8hi"
2927 [(match_operand:V4SI 0 "register_operand" "")
2928 (match_operand:V8HI 1 "register_operand" "")
2929 (match_operand:V8HI 2 "register_operand" "")]
2932 rtx op1, op2, t1, t2, dest;
2936 t1 = gen_reg_rtx (V8HImode);
2937 t2 = gen_reg_rtx (V8HImode);
2938 dest = gen_lowpart (V8HImode, operands[0]);
2940 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2941 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2942 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2946 (define_expand "vec_widen_umult_lo_v8hi"
2947 [(match_operand:V4SI 0 "register_operand" "")
2948 (match_operand:V8HI 1 "register_operand" "")
2949 (match_operand:V8HI 2 "register_operand" "")]
2952 rtx op1, op2, t1, t2, dest;
2956 t1 = gen_reg_rtx (V8HImode);
2957 t2 = gen_reg_rtx (V8HImode);
2958 dest = gen_lowpart (V8HImode, operands[0]);
2960 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2961 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2962 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2966 (define_expand "vec_widen_smult_hi_v4si"
2967 [(match_operand:V2DI 0 "register_operand" "")
2968 (match_operand:V4SI 1 "register_operand" "")
2969 (match_operand:V4SI 2 "register_operand" "")]
2972 rtx op1, op2, t1, t2;
2976 t1 = gen_reg_rtx (V4SImode);
2977 t2 = gen_reg_rtx (V4SImode);
2979 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2980 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2981 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2985 (define_expand "vec_widen_smult_lo_v4si"
2986 [(match_operand:V2DI 0 "register_operand" "")
2987 (match_operand:V4SI 1 "register_operand" "")
2988 (match_operand:V4SI 2 "register_operand" "")]
2991 rtx op1, op2, t1, t2;
2995 t1 = gen_reg_rtx (V4SImode);
2996 t2 = gen_reg_rtx (V4SImode);
2998 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2999 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3000 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3004 (define_expand "vec_widen_umult_hi_v4si"
3005 [(match_operand:V2DI 0 "register_operand" "")
3006 (match_operand:V4SI 1 "register_operand" "")
3007 (match_operand:V4SI 2 "register_operand" "")]
3010 rtx op1, op2, t1, t2;
3014 t1 = gen_reg_rtx (V4SImode);
3015 t2 = gen_reg_rtx (V4SImode);
3017 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3018 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3019 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3023 (define_expand "vec_widen_umult_lo_v4si"
3024 [(match_operand:V2DI 0 "register_operand" "")
3025 (match_operand:V4SI 1 "register_operand" "")
3026 (match_operand:V4SI 2 "register_operand" "")]
3029 rtx op1, op2, t1, t2;
3033 t1 = gen_reg_rtx (V4SImode);
3034 t2 = gen_reg_rtx (V4SImode);
3036 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3037 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3038 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3042 (define_expand "sdot_prodv8hi"
3043 [(match_operand:V4SI 0 "register_operand" "")
3044 (match_operand:V8HI 1 "nonimmediate_operand" "")
3045 (match_operand:V8HI 2 "nonimmediate_operand" "")
3046 (match_operand:V4SI 3 "register_operand" "")]
3049 rtx t = gen_reg_rtx (V4SImode);
3050 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3051 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3055 (define_expand "udot_prodv4si"
3056 [(match_operand:V2DI 0 "register_operand" "")
3057 (match_operand:V4SI 1 "register_operand" "")
3058 (match_operand:V4SI 2 "register_operand" "")
3059 (match_operand:V2DI 3 "register_operand" "")]
3064 t1 = gen_reg_rtx (V2DImode);
3065 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3066 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3068 t2 = gen_reg_rtx (V4SImode);
3069 t3 = gen_reg_rtx (V4SImode);
3070 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3071 gen_lowpart (TImode, operands[1]),
3073 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3074 gen_lowpart (TImode, operands[2]),
3077 t4 = gen_reg_rtx (V2DImode);
3078 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3080 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3084 (define_insn "ashr<mode>3"
3085 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3087 (match_operand:SSEMODE24 1 "register_operand" "0")
3088 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3090 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3091 [(set_attr "type" "sseishft")
3092 (set_attr "mode" "TI")])
3094 (define_insn "lshr<mode>3"
3095 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3096 (lshiftrt:SSEMODE248
3097 (match_operand:SSEMODE248 1 "register_operand" "0")
3098 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3100 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3101 [(set_attr "type" "sseishft")
3102 (set_attr "mode" "TI")])
3104 (define_insn "ashl<mode>3"
3105 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3107 (match_operand:SSEMODE248 1 "register_operand" "0")
3108 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3110 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3111 [(set_attr "type" "sseishft")
3112 (set_attr "mode" "TI")])
3114 (define_insn "sse2_ashlti3"
3115 [(set (match_operand:TI 0 "register_operand" "=x")
3116 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3117 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3120 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3121 return "pslldq\t{%2, %0|%0, %2}";
3123 [(set_attr "type" "sseishft")
3124 (set_attr "mode" "TI")])
3126 (define_expand "vec_shl_<mode>"
3127 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3128 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3129 (match_operand:SI 2 "general_operand" "")))]
3132 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3134 operands[0] = gen_lowpart (TImode, operands[0]);
3135 operands[1] = gen_lowpart (TImode, operands[1]);
3138 (define_insn "sse2_lshrti3"
3139 [(set (match_operand:TI 0 "register_operand" "=x")
3140 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3141 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3144 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3145 return "psrldq\t{%2, %0|%0, %2}";
3147 [(set_attr "type" "sseishft")
3148 (set_attr "mode" "TI")])
3150 (define_expand "vec_shr_<mode>"
3151 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3152 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3153 (match_operand:SI 2 "general_operand" "")))]
3156 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3158 operands[0] = gen_lowpart (TImode, operands[0]);
3159 operands[1] = gen_lowpart (TImode, operands[1]);
3162 (define_expand "umaxv16qi3"
3163 [(set (match_operand:V16QI 0 "register_operand" "")
3164 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3165 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3167 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3169 (define_insn "*umaxv16qi3"
3170 [(set (match_operand:V16QI 0 "register_operand" "=x")
3171 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3172 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3173 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3174 "pmaxub\t{%2, %0|%0, %2}"
3175 [(set_attr "type" "sseiadd")
3176 (set_attr "mode" "TI")])
3178 (define_expand "smaxv8hi3"
3179 [(set (match_operand:V8HI 0 "register_operand" "")
3180 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3181 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3183 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3185 (define_insn "*smaxv8hi3"
3186 [(set (match_operand:V8HI 0 "register_operand" "=x")
3187 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3188 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3189 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3190 "pmaxsw\t{%2, %0|%0, %2}"
3191 [(set_attr "type" "sseiadd")
3192 (set_attr "mode" "TI")])
3194 (define_expand "umaxv8hi3"
3195 [(set (match_operand:V8HI 0 "register_operand" "=x")
3196 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
3197 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3199 (plus:V8HI (match_dup 0) (match_dup 2)))]
3202 operands[3] = operands[0];
3203 if (rtx_equal_p (operands[0], operands[2]))
3204 operands[0] = gen_reg_rtx (V8HImode);
3207 (define_expand "smax<mode>3"
3208 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3209 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3210 (match_operand:SSEMODE14 2 "register_operand" "")))]
3216 xops[0] = operands[0];
3217 xops[1] = operands[1];
3218 xops[2] = operands[2];
3219 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3220 xops[4] = operands[1];
3221 xops[5] = operands[2];
3222 ok = ix86_expand_int_vcond (xops);
3227 (define_expand "umaxv4si3"
3228 [(set (match_operand:V4SI 0 "register_operand" "")
3229 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3230 (match_operand:V4SI 2 "register_operand" "")))]
3236 xops[0] = operands[0];
3237 xops[1] = operands[1];
3238 xops[2] = operands[2];
3239 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3240 xops[4] = operands[1];
3241 xops[5] = operands[2];
3242 ok = ix86_expand_int_vcond (xops);
3247 (define_expand "uminv16qi3"
3248 [(set (match_operand:V16QI 0 "register_operand" "")
3249 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3250 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3252 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3254 (define_insn "*uminv16qi3"
3255 [(set (match_operand:V16QI 0 "register_operand" "=x")
3256 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3257 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3258 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3259 "pminub\t{%2, %0|%0, %2}"
3260 [(set_attr "type" "sseiadd")
3261 (set_attr "mode" "TI")])
3263 (define_expand "sminv8hi3"
3264 [(set (match_operand:V8HI 0 "register_operand" "")
3265 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3266 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3268 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3270 (define_insn "*sminv8hi3"
3271 [(set (match_operand:V8HI 0 "register_operand" "=x")
3272 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3273 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3274 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3275 "pminsw\t{%2, %0|%0, %2}"
3276 [(set_attr "type" "sseiadd")
3277 (set_attr "mode" "TI")])
3279 (define_expand "smin<mode>3"
3280 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3281 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3282 (match_operand:SSEMODE14 2 "register_operand" "")))]
3288 xops[0] = operands[0];
3289 xops[1] = operands[2];
3290 xops[2] = operands[1];
3291 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3292 xops[4] = operands[1];
3293 xops[5] = operands[2];
3294 ok = ix86_expand_int_vcond (xops);
3299 (define_expand "umin<mode>3"
3300 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3301 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3302 (match_operand:SSEMODE24 2 "register_operand" "")))]
3308 xops[0] = operands[0];
3309 xops[1] = operands[2];
3310 xops[2] = operands[1];
3311 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3312 xops[4] = operands[1];
3313 xops[5] = operands[2];
3314 ok = ix86_expand_int_vcond (xops);
3319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3321 ;; Parallel integral comparisons
3323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3325 (define_insn "sse2_eq<mode>3"
3326 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3328 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3329 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3330 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3331 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3332 [(set_attr "type" "ssecmp")
3333 (set_attr "mode" "TI")])
3335 (define_insn "sse2_gt<mode>3"
3336 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3338 (match_operand:SSEMODE124 1 "register_operand" "0")
3339 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3341 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3342 [(set_attr "type" "ssecmp")
3343 (set_attr "mode" "TI")])
3345 (define_expand "vcond<mode>"
3346 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3347 (if_then_else:SSEMODE124
3348 (match_operator 3 ""
3349 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3350 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3351 (match_operand:SSEMODE124 1 "general_operand" "")
3352 (match_operand:SSEMODE124 2 "general_operand" "")))]
3355 if (ix86_expand_int_vcond (operands))
3361 (define_expand "vcondu<mode>"
3362 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3363 (if_then_else:SSEMODE124
3364 (match_operator 3 ""
3365 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3366 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3367 (match_operand:SSEMODE124 1 "general_operand" "")
3368 (match_operand:SSEMODE124 2 "general_operand" "")))]
3371 if (ix86_expand_int_vcond (operands))
3377 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3379 ;; Parallel integral logical operations
3381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3383 (define_expand "one_cmpl<mode>2"
3384 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3385 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3389 int i, n = GET_MODE_NUNITS (<MODE>mode);
3390 rtvec v = rtvec_alloc (n);
3392 for (i = 0; i < n; ++i)
3393 RTVEC_ELT (v, i) = constm1_rtx;
3395 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3398 (define_expand "and<mode>3"
3399 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3400 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3401 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3403 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3405 (define_insn "*and<mode>3"
3406 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3408 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3409 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3410 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3411 "pand\t{%2, %0|%0, %2}"
3412 [(set_attr "type" "sselog")
3413 (set_attr "mode" "TI")])
3415 (define_insn "sse2_nand<mode>3"
3416 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3418 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3419 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3421 "pandn\t{%2, %0|%0, %2}"
3422 [(set_attr "type" "sselog")
3423 (set_attr "mode" "TI")])
3425 (define_expand "ior<mode>3"
3426 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3427 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3428 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3430 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3432 (define_insn "*ior<mode>3"
3433 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3435 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3436 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3437 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3438 "por\t{%2, %0|%0, %2}"
3439 [(set_attr "type" "sselog")
3440 (set_attr "mode" "TI")])
3442 (define_expand "xor<mode>3"
3443 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3444 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3445 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3447 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3449 (define_insn "*xor<mode>3"
3450 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3452 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3453 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3454 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3455 "pxor\t{%2, %0|%0, %2}"
3456 [(set_attr "type" "sselog")
3457 (set_attr "mode" "TI")])
3459 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3461 ;; Parallel integral element swizzling
3463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3466 ;; op1 = abcdefghijklmnop
3467 ;; op2 = qrstuvwxyz012345
3468 ;; h1 = aqbrcsdteufvgwhx
3469 ;; l1 = iyjzk0l1m2n3o4p5
3470 ;; h2 = aiqybjrzcks0dlt1
3471 ;; l2 = emu2fnv3gow4hpx5
3472 ;; h3 = aeimquy2bfjnrvz3
3473 ;; l3 = cgkosw04dhlptx15
3474 ;; result = bdfhjlnprtvxz135
3475 (define_expand "vec_pack_mod_v8hi"
3476 [(match_operand:V16QI 0 "register_operand" "")
3477 (match_operand:V8HI 1 "register_operand" "")
3478 (match_operand:V8HI 2 "register_operand" "")]
3481 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3483 op1 = gen_lowpart (V16QImode, operands[1]);
3484 op2 = gen_lowpart (V16QImode, operands[2]);
3485 h1 = gen_reg_rtx (V16QImode);
3486 l1 = gen_reg_rtx (V16QImode);
3487 h2 = gen_reg_rtx (V16QImode);
3488 l2 = gen_reg_rtx (V16QImode);
3489 h3 = gen_reg_rtx (V16QImode);
3490 l3 = gen_reg_rtx (V16QImode);
3492 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3493 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3494 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3495 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3496 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3497 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3498 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3509 ;; result = bdfhjlnp
3510 (define_expand "vec_pack_mod_v4si"
3511 [(match_operand:V8HI 0 "register_operand" "")
3512 (match_operand:V4SI 1 "register_operand" "")
3513 (match_operand:V4SI 2 "register_operand" "")]
3516 rtx op1, op2, h1, l1, h2, l2;
3518 op1 = gen_lowpart (V8HImode, operands[1]);
3519 op2 = gen_lowpart (V8HImode, operands[2]);
3520 h1 = gen_reg_rtx (V8HImode);
3521 l1 = gen_reg_rtx (V8HImode);
3522 h2 = gen_reg_rtx (V8HImode);
3523 l2 = gen_reg_rtx (V8HImode);
3525 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3526 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3527 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3528 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3529 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3539 (define_expand "vec_pack_mod_v2di"
3540 [(match_operand:V4SI 0 "register_operand" "")
3541 (match_operand:V2DI 1 "register_operand" "")
3542 (match_operand:V2DI 2 "register_operand" "")]
3545 rtx op1, op2, h1, l1;
3547 op1 = gen_lowpart (V4SImode, operands[1]);
3548 op2 = gen_lowpart (V4SImode, operands[2]);
3549 h1 = gen_reg_rtx (V4SImode);
3550 l1 = gen_reg_rtx (V4SImode);
3552 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3553 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3554 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3558 (define_expand "vec_interleave_highv16qi"
3559 [(set (match_operand:V16QI 0 "register_operand" "=x")
3562 (match_operand:V16QI 1 "register_operand" "0")
3563 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3564 (parallel [(const_int 8) (const_int 24)
3565 (const_int 9) (const_int 25)
3566 (const_int 10) (const_int 26)
3567 (const_int 11) (const_int 27)
3568 (const_int 12) (const_int 28)
3569 (const_int 13) (const_int 29)
3570 (const_int 14) (const_int 30)
3571 (const_int 15) (const_int 31)])))]
3574 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3578 (define_expand "vec_interleave_lowv16qi"
3579 [(set (match_operand:V16QI 0 "register_operand" "=x")
3582 (match_operand:V16QI 1 "register_operand" "0")
3583 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3584 (parallel [(const_int 0) (const_int 16)
3585 (const_int 1) (const_int 17)
3586 (const_int 2) (const_int 18)
3587 (const_int 3) (const_int 19)
3588 (const_int 4) (const_int 20)
3589 (const_int 5) (const_int 21)
3590 (const_int 6) (const_int 22)
3591 (const_int 7) (const_int 23)])))]
3594 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3598 (define_expand "vec_interleave_highv8hi"
3599 [(set (match_operand:V8HI 0 "register_operand" "=x")
3602 (match_operand:V8HI 1 "register_operand" "0")
3603 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3604 (parallel [(const_int 4) (const_int 12)
3605 (const_int 5) (const_int 13)
3606 (const_int 6) (const_int 14)
3607 (const_int 7) (const_int 15)])))]
3610 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
3614 (define_expand "vec_interleave_lowv8hi"
3615 [(set (match_operand:V8HI 0 "register_operand" "=x")
3618 (match_operand:V8HI 1 "register_operand" "0")
3619 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3620 (parallel [(const_int 0) (const_int 8)
3621 (const_int 1) (const_int 9)
3622 (const_int 2) (const_int 10)
3623 (const_int 3) (const_int 11)])))]
3626 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
3630 (define_expand "vec_interleave_highv4si"
3631 [(set (match_operand:V4SI 0 "register_operand" "=x")
3634 (match_operand:V4SI 1 "register_operand" "0")
3635 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3636 (parallel [(const_int 2) (const_int 6)
3637 (const_int 3) (const_int 7)])))]
3640 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
3644 (define_expand "vec_interleave_lowv4si"
3645 [(set (match_operand:V4SI 0 "register_operand" "=x")
3648 (match_operand:V4SI 1 "register_operand" "0")
3649 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3650 (parallel [(const_int 0) (const_int 4)
3651 (const_int 1) (const_int 5)])))]
3654 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
3658 (define_expand "vec_interleave_highv2di"
3659 [(set (match_operand:V2DI 0 "register_operand" "=x")
3662 (match_operand:V2DI 1 "register_operand" "0")
3663 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3664 (parallel [(const_int 1)
3668 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
3672 (define_expand "vec_interleave_lowv2di"
3673 [(set (match_operand:V2DI 0 "register_operand" "=x")
3676 (match_operand:V2DI 1 "register_operand" "0")
3677 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3678 (parallel [(const_int 0)
3682 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
3686 (define_insn "sse2_packsswb"
3687 [(set (match_operand:V16QI 0 "register_operand" "=x")
3690 (match_operand:V8HI 1 "register_operand" "0"))
3692 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3694 "packsswb\t{%2, %0|%0, %2}"
3695 [(set_attr "type" "sselog")
3696 (set_attr "mode" "TI")])
3698 (define_insn "sse2_packssdw"
3699 [(set (match_operand:V8HI 0 "register_operand" "=x")
3702 (match_operand:V4SI 1 "register_operand" "0"))
3704 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3706 "packssdw\t{%2, %0|%0, %2}"
3707 [(set_attr "type" "sselog")
3708 (set_attr "mode" "TI")])
3710 (define_insn "sse2_packuswb"
3711 [(set (match_operand:V16QI 0 "register_operand" "=x")
3714 (match_operand:V8HI 1 "register_operand" "0"))
3716 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3718 "packuswb\t{%2, %0|%0, %2}"
3719 [(set_attr "type" "sselog")
3720 (set_attr "mode" "TI")])
3722 (define_insn "sse2_punpckhbw"
3723 [(set (match_operand:V16QI 0 "register_operand" "=x")
3726 (match_operand:V16QI 1 "register_operand" "0")
3727 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3728 (parallel [(const_int 8) (const_int 24)
3729 (const_int 9) (const_int 25)
3730 (const_int 10) (const_int 26)
3731 (const_int 11) (const_int 27)
3732 (const_int 12) (const_int 28)
3733 (const_int 13) (const_int 29)
3734 (const_int 14) (const_int 30)
3735 (const_int 15) (const_int 31)])))]
3737 "punpckhbw\t{%2, %0|%0, %2}"
3738 [(set_attr "type" "sselog")
3739 (set_attr "mode" "TI")])
3741 (define_insn "sse2_punpcklbw"
3742 [(set (match_operand:V16QI 0 "register_operand" "=x")
3745 (match_operand:V16QI 1 "register_operand" "0")
3746 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3747 (parallel [(const_int 0) (const_int 16)
3748 (const_int 1) (const_int 17)
3749 (const_int 2) (const_int 18)
3750 (const_int 3) (const_int 19)
3751 (const_int 4) (const_int 20)
3752 (const_int 5) (const_int 21)
3753 (const_int 6) (const_int 22)
3754 (const_int 7) (const_int 23)])))]
3756 "punpcklbw\t{%2, %0|%0, %2}"
3757 [(set_attr "type" "sselog")
3758 (set_attr "mode" "TI")])
3760 (define_insn "sse2_punpckhwd"
3761 [(set (match_operand:V8HI 0 "register_operand" "=x")
3764 (match_operand:V8HI 1 "register_operand" "0")
3765 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3766 (parallel [(const_int 4) (const_int 12)
3767 (const_int 5) (const_int 13)
3768 (const_int 6) (const_int 14)
3769 (const_int 7) (const_int 15)])))]
3771 "punpckhwd\t{%2, %0|%0, %2}"
3772 [(set_attr "type" "sselog")
3773 (set_attr "mode" "TI")])
3775 (define_insn "sse2_punpcklwd"
3776 [(set (match_operand:V8HI 0 "register_operand" "=x")
3779 (match_operand:V8HI 1 "register_operand" "0")
3780 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3781 (parallel [(const_int 0) (const_int 8)
3782 (const_int 1) (const_int 9)
3783 (const_int 2) (const_int 10)
3784 (const_int 3) (const_int 11)])))]
3786 "punpcklwd\t{%2, %0|%0, %2}"
3787 [(set_attr "type" "sselog")
3788 (set_attr "mode" "TI")])
3790 (define_insn "sse2_punpckhdq"
3791 [(set (match_operand:V4SI 0 "register_operand" "=x")
3794 (match_operand:V4SI 1 "register_operand" "0")
3795 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3796 (parallel [(const_int 2) (const_int 6)
3797 (const_int 3) (const_int 7)])))]
3799 "punpckhdq\t{%2, %0|%0, %2}"
3800 [(set_attr "type" "sselog")
3801 (set_attr "mode" "TI")])
3803 (define_insn "sse2_punpckldq"
3804 [(set (match_operand:V4SI 0 "register_operand" "=x")
3807 (match_operand:V4SI 1 "register_operand" "0")
3808 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3809 (parallel [(const_int 0) (const_int 4)
3810 (const_int 1) (const_int 5)])))]
3812 "punpckldq\t{%2, %0|%0, %2}"
3813 [(set_attr "type" "sselog")
3814 (set_attr "mode" "TI")])
3816 (define_insn "sse2_punpckhqdq"
3817 [(set (match_operand:V2DI 0 "register_operand" "=x")
3820 (match_operand:V2DI 1 "register_operand" "0")
3821 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3822 (parallel [(const_int 1)
3825 "punpckhqdq\t{%2, %0|%0, %2}"
3826 [(set_attr "type" "sselog")
3827 (set_attr "mode" "TI")])
3829 (define_insn "sse2_punpcklqdq"
3830 [(set (match_operand:V2DI 0 "register_operand" "=x")
3833 (match_operand:V2DI 1 "register_operand" "0")
3834 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3835 (parallel [(const_int 0)
3838 "punpcklqdq\t{%2, %0|%0, %2}"
3839 [(set_attr "type" "sselog")
3840 (set_attr "mode" "TI")])
3842 (define_expand "sse2_pinsrw"
3843 [(set (match_operand:V8HI 0 "register_operand" "")
3846 (match_operand:SI 2 "nonimmediate_operand" ""))
3847 (match_operand:V8HI 1 "register_operand" "")
3848 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3851 operands[2] = gen_lowpart (HImode, operands[2]);
3852 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3855 (define_insn "*sse2_pinsrw"
3856 [(set (match_operand:V8HI 0 "register_operand" "=x")
3859 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3860 (match_operand:V8HI 1 "register_operand" "0")
3861 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3864 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3865 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3867 [(set_attr "type" "sselog")
3868 (set_attr "mode" "TI")])
3870 (define_insn "sse2_pextrw"
3871 [(set (match_operand:SI 0 "register_operand" "=r")
3874 (match_operand:V8HI 1 "register_operand" "x")
3875 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3877 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3878 [(set_attr "type" "sselog")
3879 (set_attr "mode" "TI")])
3881 (define_expand "sse2_pshufd"
3882 [(match_operand:V4SI 0 "register_operand" "")
3883 (match_operand:V4SI 1 "nonimmediate_operand" "")
3884 (match_operand:SI 2 "const_int_operand" "")]
3887 int mask = INTVAL (operands[2]);
3888 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3889 GEN_INT ((mask >> 0) & 3),
3890 GEN_INT ((mask >> 2) & 3),
3891 GEN_INT ((mask >> 4) & 3),
3892 GEN_INT ((mask >> 6) & 3)));
3896 (define_insn "sse2_pshufd_1"
3897 [(set (match_operand:V4SI 0 "register_operand" "=x")
3899 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3900 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3901 (match_operand 3 "const_0_to_3_operand" "")
3902 (match_operand 4 "const_0_to_3_operand" "")
3903 (match_operand 5 "const_0_to_3_operand" "")])))]
3907 mask |= INTVAL (operands[2]) << 0;
3908 mask |= INTVAL (operands[3]) << 2;
3909 mask |= INTVAL (operands[4]) << 4;
3910 mask |= INTVAL (operands[5]) << 6;
3911 operands[2] = GEN_INT (mask);
3913 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3915 [(set_attr "type" "sselog1")
3916 (set_attr "mode" "TI")])
3918 (define_expand "sse2_pshuflw"
3919 [(match_operand:V8HI 0 "register_operand" "")
3920 (match_operand:V8HI 1 "nonimmediate_operand" "")
3921 (match_operand:SI 2 "const_int_operand" "")]
3924 int mask = INTVAL (operands[2]);
3925 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3926 GEN_INT ((mask >> 0) & 3),
3927 GEN_INT ((mask >> 2) & 3),
3928 GEN_INT ((mask >> 4) & 3),
3929 GEN_INT ((mask >> 6) & 3)));
3933 (define_insn "sse2_pshuflw_1"
3934 [(set (match_operand:V8HI 0 "register_operand" "=x")
3936 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3937 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3938 (match_operand 3 "const_0_to_3_operand" "")
3939 (match_operand 4 "const_0_to_3_operand" "")
3940 (match_operand 5 "const_0_to_3_operand" "")
3948 mask |= INTVAL (operands[2]) << 0;
3949 mask |= INTVAL (operands[3]) << 2;
3950 mask |= INTVAL (operands[4]) << 4;
3951 mask |= INTVAL (operands[5]) << 6;
3952 operands[2] = GEN_INT (mask);
3954 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3956 [(set_attr "type" "sselog")
3957 (set_attr "mode" "TI")])
3959 (define_expand "sse2_pshufhw"
3960 [(match_operand:V8HI 0 "register_operand" "")
3961 (match_operand:V8HI 1 "nonimmediate_operand" "")
3962 (match_operand:SI 2 "const_int_operand" "")]
3965 int mask = INTVAL (operands[2]);
3966 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3967 GEN_INT (((mask >> 0) & 3) + 4),
3968 GEN_INT (((mask >> 2) & 3) + 4),
3969 GEN_INT (((mask >> 4) & 3) + 4),
3970 GEN_INT (((mask >> 6) & 3) + 4)));
3974 (define_insn "sse2_pshufhw_1"
3975 [(set (match_operand:V8HI 0 "register_operand" "=x")
3977 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3978 (parallel [(const_int 0)
3982 (match_operand 2 "const_4_to_7_operand" "")
3983 (match_operand 3 "const_4_to_7_operand" "")
3984 (match_operand 4 "const_4_to_7_operand" "")
3985 (match_operand 5 "const_4_to_7_operand" "")])))]
3989 mask |= (INTVAL (operands[2]) - 4) << 0;
3990 mask |= (INTVAL (operands[3]) - 4) << 2;
3991 mask |= (INTVAL (operands[4]) - 4) << 4;
3992 mask |= (INTVAL (operands[5]) - 4) << 6;
3993 operands[2] = GEN_INT (mask);
3995 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3997 [(set_attr "type" "sselog")
3998 (set_attr "mode" "TI")])
4000 (define_expand "sse2_loadd"
4001 [(set (match_operand:V4SI 0 "register_operand" "")
4004 (match_operand:SI 1 "nonimmediate_operand" ""))
4008 "operands[2] = CONST0_RTX (V4SImode);")
4010 (define_insn "sse2_loadld"
4011 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4014 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4015 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4019 movd\t{%2, %0|%0, %2}
4020 movd\t{%2, %0|%0, %2}
4021 movss\t{%2, %0|%0, %2}
4022 movss\t{%2, %0|%0, %2}"
4023 [(set_attr "type" "ssemov")
4024 (set_attr "mode" "TI,TI,V4SF,SF")])
4026 (define_insn_and_split "sse2_stored"
4027 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4029 (match_operand:V4SI 1 "register_operand" "x,Yi")
4030 (parallel [(const_int 0)])))]
4033 "&& reload_completed"
4034 [(set (match_dup 0) (match_dup 1))]
4036 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4039 (define_expand "sse_storeq"
4040 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4042 (match_operand:V2DI 1 "register_operand" "")
4043 (parallel [(const_int 0)])))]
4047 (define_insn "*sse2_storeq_rex64"
4048 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4050 (match_operand:V2DI 1 "register_operand" "x,Yi")
4051 (parallel [(const_int 0)])))]
4052 "TARGET_64BIT && TARGET_SSE"
4055 (define_insn "*sse2_storeq"
4056 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4058 (match_operand:V2DI 1 "register_operand" "x")
4059 (parallel [(const_int 0)])))]
4064 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4066 (match_operand:V2DI 1 "register_operand" "")
4067 (parallel [(const_int 0)])))]
4068 "TARGET_SSE && reload_completed"
4069 [(set (match_dup 0) (match_dup 1))]
4071 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4074 (define_insn "*vec_extractv2di_1_sse2"
4075 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4077 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4078 (parallel [(const_int 1)])))]
4079 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4081 movhps\t{%1, %0|%0, %1}
4082 psrldq\t{$4, %0|%0, 4}
4083 movq\t{%H1, %0|%0, %H1}"
4084 [(set_attr "type" "ssemov,sseishft,ssemov")
4085 (set_attr "mode" "V2SF,TI,TI")])
4087 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4088 (define_insn "*vec_extractv2di_1_sse"
4089 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4091 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4092 (parallel [(const_int 1)])))]
4093 "!TARGET_SSE2 && TARGET_SSE
4094 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4096 movhps\t{%1, %0|%0, %1}
4097 movhlps\t{%1, %0|%0, %1}
4098 movlps\t{%H1, %0|%0, %H1}"
4099 [(set_attr "type" "ssemov")
4100 (set_attr "mode" "V2SF,V4SF,V2SF")])
4102 (define_insn "*vec_dupv4si"
4103 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4105 (match_operand:SI 1 "register_operand" " Y2,0")))]
4108 pshufd\t{$0, %1, %0|%0, %1, 0}
4109 shufps\t{$0, %0, %0|%0, %0, 0}"
4110 [(set_attr "type" "sselog1")
4111 (set_attr "mode" "TI,V4SF")])
4113 (define_insn "*vec_dupv2di"
4114 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4116 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4121 [(set_attr "type" "sselog1,ssemov")
4122 (set_attr "mode" "TI,V4SF")])
4124 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4125 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4126 ;; alternatives pretty much forces the MMX alternative to be chosen.
4127 (define_insn "*sse2_concatv2si"
4128 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
4130 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4131 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
4134 punpckldq\t{%2, %0|%0, %2}
4135 movd\t{%1, %0|%0, %1}
4136 punpckldq\t{%2, %0|%0, %2}
4137 movd\t{%1, %0|%0, %1}"
4138 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4139 (set_attr "mode" "TI,TI,DI,DI")])
4141 (define_insn "*sse1_concatv2si"
4142 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4144 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4145 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4148 unpcklps\t{%2, %0|%0, %2}
4149 movss\t{%1, %0|%0, %1}
4150 punpckldq\t{%2, %0|%0, %2}
4151 movd\t{%1, %0|%0, %1}"
4152 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4153 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4155 (define_insn "*vec_concatv4si_1"
4156 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4158 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4159 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4162 punpcklqdq\t{%2, %0|%0, %2}
4163 movlhps\t{%2, %0|%0, %2}
4164 movhps\t{%2, %0|%0, %2}"
4165 [(set_attr "type" "sselog,ssemov,ssemov")
4166 (set_attr "mode" "TI,V4SF,V2SF")])
4168 (define_insn "vec_concatv2di"
4169 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4171 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4172 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4175 movq\t{%1, %0|%0, %1}
4176 movq2dq\t{%1, %0|%0, %1}
4177 punpcklqdq\t{%2, %0|%0, %2}
4178 movlhps\t{%2, %0|%0, %2}
4179 movhps\t{%2, %0|%0, %2}
4180 movlps\t{%1, %0|%0, %1}"
4181 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4182 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4184 (define_expand "vec_setv2di"
4185 [(match_operand:V2DI 0 "register_operand" "")
4186 (match_operand:DI 1 "register_operand" "")
4187 (match_operand 2 "const_int_operand" "")]
4190 ix86_expand_vector_set (false, operands[0], operands[1],
4191 INTVAL (operands[2]));
4195 (define_expand "vec_extractv2di"
4196 [(match_operand:DI 0 "register_operand" "")
4197 (match_operand:V2DI 1 "register_operand" "")
4198 (match_operand 2 "const_int_operand" "")]
4201 ix86_expand_vector_extract (false, operands[0], operands[1],
4202 INTVAL (operands[2]));
4206 (define_expand "vec_initv2di"
4207 [(match_operand:V2DI 0 "register_operand" "")
4208 (match_operand 1 "" "")]
4211 ix86_expand_vector_init (false, operands[0], operands[1]);
4215 (define_expand "vec_setv4si"
4216 [(match_operand:V4SI 0 "register_operand" "")
4217 (match_operand:SI 1 "register_operand" "")
4218 (match_operand 2 "const_int_operand" "")]
4221 ix86_expand_vector_set (false, operands[0], operands[1],
4222 INTVAL (operands[2]));
4226 (define_expand "vec_extractv4si"
4227 [(match_operand:SI 0 "register_operand" "")
4228 (match_operand:V4SI 1 "register_operand" "")
4229 (match_operand 2 "const_int_operand" "")]
4232 ix86_expand_vector_extract (false, operands[0], operands[1],
4233 INTVAL (operands[2]));
4237 (define_expand "vec_initv4si"
4238 [(match_operand:V4SI 0 "register_operand" "")
4239 (match_operand 1 "" "")]
4242 ix86_expand_vector_init (false, operands[0], operands[1]);
4246 (define_expand "vec_setv8hi"
4247 [(match_operand:V8HI 0 "register_operand" "")
4248 (match_operand:HI 1 "register_operand" "")
4249 (match_operand 2 "const_int_operand" "")]
4252 ix86_expand_vector_set (false, operands[0], operands[1],
4253 INTVAL (operands[2]));
4257 (define_expand "vec_extractv8hi"
4258 [(match_operand:HI 0 "register_operand" "")
4259 (match_operand:V8HI 1 "register_operand" "")
4260 (match_operand 2 "const_int_operand" "")]
4263 ix86_expand_vector_extract (false, operands[0], operands[1],
4264 INTVAL (operands[2]));
4268 (define_expand "vec_initv8hi"
4269 [(match_operand:V8HI 0 "register_operand" "")
4270 (match_operand 1 "" "")]
4273 ix86_expand_vector_init (false, operands[0], operands[1]);
4277 (define_expand "vec_setv16qi"
4278 [(match_operand:V16QI 0 "register_operand" "")
4279 (match_operand:QI 1 "register_operand" "")
4280 (match_operand 2 "const_int_operand" "")]
4283 ix86_expand_vector_set (false, operands[0], operands[1],
4284 INTVAL (operands[2]));
4288 (define_expand "vec_extractv16qi"
4289 [(match_operand:QI 0 "register_operand" "")
4290 (match_operand:V16QI 1 "register_operand" "")
4291 (match_operand 2 "const_int_operand" "")]
4294 ix86_expand_vector_extract (false, operands[0], operands[1],
4295 INTVAL (operands[2]));
4299 (define_expand "vec_initv16qi"
4300 [(match_operand:V16QI 0 "register_operand" "")
4301 (match_operand 1 "" "")]
4304 ix86_expand_vector_init (false, operands[0], operands[1]);
4308 (define_expand "vec_unpacku_hi_v16qi"
4309 [(match_operand:V8HI 0 "register_operand" "")
4310 (match_operand:V16QI 1 "register_operand" "")]
4313 ix86_expand_sse_unpack (operands, true, true);
4317 (define_expand "vec_unpacks_hi_v16qi"
4318 [(match_operand:V8HI 0 "register_operand" "")
4319 (match_operand:V16QI 1 "register_operand" "")]
4322 ix86_expand_sse_unpack (operands, false, true);
4326 (define_expand "vec_unpacku_lo_v16qi"
4327 [(match_operand:V8HI 0 "register_operand" "")
4328 (match_operand:V16QI 1 "register_operand" "")]
4331 ix86_expand_sse_unpack (operands, true, false);
4335 (define_expand "vec_unpacks_lo_v16qi"
4336 [(match_operand:V8HI 0 "register_operand" "")
4337 (match_operand:V16QI 1 "register_operand" "")]
4340 ix86_expand_sse_unpack (operands, false, false);
4344 (define_expand "vec_unpacku_hi_v8hi"
4345 [(match_operand:V4SI 0 "register_operand" "")
4346 (match_operand:V8HI 1 "register_operand" "")]
4349 ix86_expand_sse_unpack (operands, true, true);
4353 (define_expand "vec_unpacks_hi_v8hi"
4354 [(match_operand:V4SI 0 "register_operand" "")
4355 (match_operand:V8HI 1 "register_operand" "")]
4358 ix86_expand_sse_unpack (operands, false, true);
4362 (define_expand "vec_unpacku_lo_v8hi"
4363 [(match_operand:V4SI 0 "register_operand" "")
4364 (match_operand:V8HI 1 "register_operand" "")]
4367 ix86_expand_sse_unpack (operands, true, false);
4371 (define_expand "vec_unpacks_lo_v8hi"
4372 [(match_operand:V4SI 0 "register_operand" "")
4373 (match_operand:V8HI 1 "register_operand" "")]
4376 ix86_expand_sse_unpack (operands, false, false);
4380 (define_expand "vec_unpacku_hi_v4si"
4381 [(match_operand:V2DI 0 "register_operand" "")
4382 (match_operand:V4SI 1 "register_operand" "")]
4385 ix86_expand_sse_unpack (operands, true, true);
4389 (define_expand "vec_unpacks_hi_v4si"
4390 [(match_operand:V2DI 0 "register_operand" "")
4391 (match_operand:V4SI 1 "register_operand" "")]
4394 ix86_expand_sse_unpack (operands, false, true);
4398 (define_expand "vec_unpacku_lo_v4si"
4399 [(match_operand:V2DI 0 "register_operand" "")
4400 (match_operand:V4SI 1 "register_operand" "")]
4403 ix86_expand_sse_unpack (operands, true, false);
4407 (define_expand "vec_unpacks_lo_v4si"
4408 [(match_operand:V2DI 0 "register_operand" "")
4409 (match_operand:V4SI 1 "register_operand" "")]
4412 ix86_expand_sse_unpack (operands, false, false);
4416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4422 (define_insn "sse2_uavgv16qi3"
4423 [(set (match_operand:V16QI 0 "register_operand" "=x")
4429 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4431 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4432 (const_vector:V16QI [(const_int 1) (const_int 1)
4433 (const_int 1) (const_int 1)
4434 (const_int 1) (const_int 1)
4435 (const_int 1) (const_int 1)
4436 (const_int 1) (const_int 1)
4437 (const_int 1) (const_int 1)
4438 (const_int 1) (const_int 1)
4439 (const_int 1) (const_int 1)]))
4441 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4442 "pavgb\t{%2, %0|%0, %2}"
4443 [(set_attr "type" "sseiadd")
4444 (set_attr "mode" "TI")])
4446 (define_insn "sse2_uavgv8hi3"
4447 [(set (match_operand:V8HI 0 "register_operand" "=x")
4453 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4455 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4456 (const_vector:V8HI [(const_int 1) (const_int 1)
4457 (const_int 1) (const_int 1)
4458 (const_int 1) (const_int 1)
4459 (const_int 1) (const_int 1)]))
4461 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4462 "pavgw\t{%2, %0|%0, %2}"
4463 [(set_attr "type" "sseiadd")
4464 (set_attr "mode" "TI")])
4466 ;; The correct representation for this is absolutely enormous, and
4467 ;; surely not generally useful.
4468 (define_insn "sse2_psadbw"
4469 [(set (match_operand:V2DI 0 "register_operand" "=x")
4470 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4471 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4474 "psadbw\t{%2, %0|%0, %2}"
4475 [(set_attr "type" "sseiadd")
4476 (set_attr "mode" "TI")])
4478 (define_insn "sse_movmskps"
4479 [(set (match_operand:SI 0 "register_operand" "=r")
4480 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4483 "movmskps\t{%1, %0|%0, %1}"
4484 [(set_attr "type" "ssecvt")
4485 (set_attr "mode" "V4SF")])
4487 (define_insn "sse2_movmskpd"
4488 [(set (match_operand:SI 0 "register_operand" "=r")
4489 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4492 "movmskpd\t{%1, %0|%0, %1}"
4493 [(set_attr "type" "ssecvt")
4494 (set_attr "mode" "V2DF")])
4496 (define_insn "sse2_pmovmskb"
4497 [(set (match_operand:SI 0 "register_operand" "=r")
4498 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4501 "pmovmskb\t{%1, %0|%0, %1}"
4502 [(set_attr "type" "ssecvt")
4503 (set_attr "mode" "V2DF")])
4505 (define_expand "sse2_maskmovdqu"
4506 [(set (match_operand:V16QI 0 "memory_operand" "")
4507 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4508 (match_operand:V16QI 2 "register_operand" "x")
4514 (define_insn "*sse2_maskmovdqu"
4515 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4516 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4517 (match_operand:V16QI 2 "register_operand" "x")
4518 (mem:V16QI (match_dup 0))]
4520 "TARGET_SSE2 && !TARGET_64BIT"
4521 ;; @@@ check ordering of operands in intel/nonintel syntax
4522 "maskmovdqu\t{%2, %1|%1, %2}"
4523 [(set_attr "type" "ssecvt")
4524 (set_attr "mode" "TI")])
4526 (define_insn "*sse2_maskmovdqu_rex64"
4527 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4528 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4529 (match_operand:V16QI 2 "register_operand" "x")
4530 (mem:V16QI (match_dup 0))]
4532 "TARGET_SSE2 && TARGET_64BIT"
4533 ;; @@@ check ordering of operands in intel/nonintel syntax
4534 "maskmovdqu\t{%2, %1|%1, %2}"
4535 [(set_attr "type" "ssecvt")
4536 (set_attr "mode" "TI")])
4538 (define_insn "sse_ldmxcsr"
4539 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
4543 [(set_attr "type" "sse")
4544 (set_attr "memory" "load")])
4546 (define_insn "sse_stmxcsr"
4547 [(set (match_operand:SI 0 "memory_operand" "=m")
4548 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
4551 [(set_attr "type" "sse")
4552 (set_attr "memory" "store")])
4554 (define_expand "sse_sfence"
4556 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4557 "TARGET_SSE || TARGET_3DNOW_A"
4559 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4560 MEM_VOLATILE_P (operands[0]) = 1;
4563 (define_insn "*sse_sfence"
4564 [(set (match_operand:BLK 0 "" "")
4565 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4566 "TARGET_SSE || TARGET_3DNOW_A"
4568 [(set_attr "type" "sse")
4569 (set_attr "memory" "unknown")])
4571 (define_insn "sse2_clflush"
4572 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
4576 [(set_attr "type" "sse")
4577 (set_attr "memory" "unknown")])
4579 (define_expand "sse2_mfence"
4581 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4584 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4585 MEM_VOLATILE_P (operands[0]) = 1;
4588 (define_insn "*sse2_mfence"
4589 [(set (match_operand:BLK 0 "" "")
4590 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4593 [(set_attr "type" "sse")
4594 (set_attr "memory" "unknown")])
4596 (define_expand "sse2_lfence"
4598 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4601 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4602 MEM_VOLATILE_P (operands[0]) = 1;
4605 (define_insn "*sse2_lfence"
4606 [(set (match_operand:BLK 0 "" "")
4607 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4610 [(set_attr "type" "sse")
4611 (set_attr "memory" "unknown")])
4613 (define_insn "sse3_mwait"
4614 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4615 (match_operand:SI 1 "register_operand" "c")]
4618 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
4619 ;; Since 32bit register operands are implicitly zero extended to 64bit,
4620 ;; we only need to set up 32bit registers.
4622 [(set_attr "length" "3")])
4624 (define_insn "sse3_monitor"
4625 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4626 (match_operand:SI 1 "register_operand" "c")
4627 (match_operand:SI 2 "register_operand" "d")]
4629 "TARGET_SSE3 && !TARGET_64BIT"
4630 "monitor\t%0, %1, %2"
4631 [(set_attr "length" "3")])
4633 (define_insn "sse3_monitor64"
4634 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
4635 (match_operand:SI 1 "register_operand" "c")
4636 (match_operand:SI 2 "register_operand" "d")]
4638 "TARGET_SSE3 && TARGET_64BIT"
4639 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
4640 ;; RCX and RDX are used. Since 32bit register operands are implicitly
4641 ;; zero extended to 64bit, we only need to set up 32bit registers.
4643 [(set_attr "length" "3")])
4646 (define_insn "ssse3_phaddwv8hi3"
4647 [(set (match_operand:V8HI 0 "register_operand" "=x")
4653 (match_operand:V8HI 1 "register_operand" "0")
4654 (parallel [(const_int 0)]))
4655 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4657 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4658 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4661 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4662 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4664 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4665 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4670 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4671 (parallel [(const_int 0)]))
4672 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4674 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4675 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4678 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4679 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4681 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4682 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4684 "phaddw\t{%2, %0|%0, %2}"
4685 [(set_attr "type" "sseiadd")
4686 (set_attr "mode" "TI")])
4688 (define_insn "ssse3_phaddwv4hi3"
4689 [(set (match_operand:V4HI 0 "register_operand" "=y")
4694 (match_operand:V4HI 1 "register_operand" "0")
4695 (parallel [(const_int 0)]))
4696 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4698 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4699 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4703 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4704 (parallel [(const_int 0)]))
4705 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4707 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4708 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4710 "phaddw\t{%2, %0|%0, %2}"
4711 [(set_attr "type" "sseiadd")
4712 (set_attr "mode" "DI")])
4714 (define_insn "ssse3_phadddv4si3"
4715 [(set (match_operand:V4SI 0 "register_operand" "=x")
4720 (match_operand:V4SI 1 "register_operand" "0")
4721 (parallel [(const_int 0)]))
4722 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4724 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4725 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4729 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4730 (parallel [(const_int 0)]))
4731 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4733 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4734 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4736 "phaddd\t{%2, %0|%0, %2}"
4737 [(set_attr "type" "sseiadd")
4738 (set_attr "mode" "TI")])
4740 (define_insn "ssse3_phadddv2si3"
4741 [(set (match_operand:V2SI 0 "register_operand" "=y")
4745 (match_operand:V2SI 1 "register_operand" "0")
4746 (parallel [(const_int 0)]))
4747 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4750 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4751 (parallel [(const_int 0)]))
4752 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4754 "phaddd\t{%2, %0|%0, %2}"
4755 [(set_attr "type" "sseiadd")
4756 (set_attr "mode" "DI")])
4758 (define_insn "ssse3_phaddswv8hi3"
4759 [(set (match_operand:V8HI 0 "register_operand" "=x")
4765 (match_operand:V8HI 1 "register_operand" "0")
4766 (parallel [(const_int 0)]))
4767 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4769 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4770 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4773 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4774 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4776 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4777 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4782 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4783 (parallel [(const_int 0)]))
4784 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4786 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4787 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4790 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4791 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4793 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4794 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4796 "phaddsw\t{%2, %0|%0, %2}"
4797 [(set_attr "type" "sseiadd")
4798 (set_attr "mode" "TI")])
4800 (define_insn "ssse3_phaddswv4hi3"
4801 [(set (match_operand:V4HI 0 "register_operand" "=y")
4806 (match_operand:V4HI 1 "register_operand" "0")
4807 (parallel [(const_int 0)]))
4808 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4810 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4811 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4815 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4816 (parallel [(const_int 0)]))
4817 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4819 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4820 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4822 "phaddsw\t{%2, %0|%0, %2}"
4823 [(set_attr "type" "sseiadd")
4824 (set_attr "mode" "DI")])
4826 (define_insn "ssse3_phsubwv8hi3"
4827 [(set (match_operand:V8HI 0 "register_operand" "=x")
4833 (match_operand:V8HI 1 "register_operand" "0")
4834 (parallel [(const_int 0)]))
4835 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4837 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4838 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4841 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4842 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4844 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4845 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4850 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4851 (parallel [(const_int 0)]))
4852 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4854 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4855 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4858 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4859 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4861 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4862 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4864 "phsubw\t{%2, %0|%0, %2}"
4865 [(set_attr "type" "sseiadd")
4866 (set_attr "mode" "TI")])
4868 (define_insn "ssse3_phsubwv4hi3"
4869 [(set (match_operand:V4HI 0 "register_operand" "=y")
4874 (match_operand:V4HI 1 "register_operand" "0")
4875 (parallel [(const_int 0)]))
4876 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4878 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4879 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4883 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4884 (parallel [(const_int 0)]))
4885 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4887 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4888 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4890 "phsubw\t{%2, %0|%0, %2}"
4891 [(set_attr "type" "sseiadd")
4892 (set_attr "mode" "DI")])
4894 (define_insn "ssse3_phsubdv4si3"
4895 [(set (match_operand:V4SI 0 "register_operand" "=x")
4900 (match_operand:V4SI 1 "register_operand" "0")
4901 (parallel [(const_int 0)]))
4902 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4904 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4905 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4909 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4910 (parallel [(const_int 0)]))
4911 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4913 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4914 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4916 "phsubd\t{%2, %0|%0, %2}"
4917 [(set_attr "type" "sseiadd")
4918 (set_attr "mode" "TI")])
4920 (define_insn "ssse3_phsubdv2si3"
4921 [(set (match_operand:V2SI 0 "register_operand" "=y")
4925 (match_operand:V2SI 1 "register_operand" "0")
4926 (parallel [(const_int 0)]))
4927 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4930 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4931 (parallel [(const_int 0)]))
4932 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4934 "phsubd\t{%2, %0|%0, %2}"
4935 [(set_attr "type" "sseiadd")
4936 (set_attr "mode" "DI")])
4938 (define_insn "ssse3_phsubswv8hi3"
4939 [(set (match_operand:V8HI 0 "register_operand" "=x")
4945 (match_operand:V8HI 1 "register_operand" "0")
4946 (parallel [(const_int 0)]))
4947 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4949 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4950 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4953 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4954 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4956 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4957 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4962 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4963 (parallel [(const_int 0)]))
4964 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4966 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4967 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4970 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4971 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4973 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4974 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4976 "phsubsw\t{%2, %0|%0, %2}"
4977 [(set_attr "type" "sseiadd")
4978 (set_attr "mode" "TI")])
4980 (define_insn "ssse3_phsubswv4hi3"
4981 [(set (match_operand:V4HI 0 "register_operand" "=y")
4986 (match_operand:V4HI 1 "register_operand" "0")
4987 (parallel [(const_int 0)]))
4988 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4990 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4991 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4995 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4996 (parallel [(const_int 0)]))
4997 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4999 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5000 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5002 "phsubsw\t{%2, %0|%0, %2}"
5003 [(set_attr "type" "sseiadd")
5004 (set_attr "mode" "DI")])
5006 (define_insn "ssse3_pmaddubswv8hi3"
5007 [(set (match_operand:V8HI 0 "register_operand" "=x")
5012 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5013 (parallel [(const_int 0)
5023 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5024 (parallel [(const_int 0)
5034 (vec_select:V16QI (match_dup 1)
5035 (parallel [(const_int 1)
5044 (vec_select:V16QI (match_dup 2)
5045 (parallel [(const_int 1)
5052 (const_int 15)]))))))]
5054 "pmaddubsw\t{%2, %0|%0, %2}"
5055 [(set_attr "type" "sseiadd")
5056 (set_attr "mode" "TI")])
5058 (define_insn "ssse3_pmaddubswv4hi3"
5059 [(set (match_operand:V4HI 0 "register_operand" "=y")
5064 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5065 (parallel [(const_int 0)
5071 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5072 (parallel [(const_int 0)
5078 (vec_select:V8QI (match_dup 1)
5079 (parallel [(const_int 1)
5084 (vec_select:V8QI (match_dup 2)
5085 (parallel [(const_int 1)
5088 (const_int 7)]))))))]
5090 "pmaddubsw\t{%2, %0|%0, %2}"
5091 [(set_attr "type" "sseiadd")
5092 (set_attr "mode" "DI")])
5094 (define_insn "ssse3_pmulhrswv8hi3"
5095 [(set (match_operand:V8HI 0 "register_operand" "=x")
5102 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5104 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5106 (const_vector:V8HI [(const_int 1) (const_int 1)
5107 (const_int 1) (const_int 1)
5108 (const_int 1) (const_int 1)
5109 (const_int 1) (const_int 1)]))
5111 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5112 "pmulhrsw\t{%2, %0|%0, %2}"
5113 [(set_attr "type" "sseimul")
5114 (set_attr "mode" "TI")])
5116 (define_insn "ssse3_pmulhrswv4hi3"
5117 [(set (match_operand:V4HI 0 "register_operand" "=y")
5124 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5126 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5128 (const_vector:V4HI [(const_int 1) (const_int 1)
5129 (const_int 1) (const_int 1)]))
5131 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5132 "pmulhrsw\t{%2, %0|%0, %2}"
5133 [(set_attr "type" "sseimul")
5134 (set_attr "mode" "DI")])
5136 (define_insn "ssse3_pshufbv16qi3"
5137 [(set (match_operand:V16QI 0 "register_operand" "=x")
5138 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5139 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5142 "pshufb\t{%2, %0|%0, %2}";
5143 [(set_attr "type" "sselog1")
5144 (set_attr "mode" "TI")])
5146 (define_insn "ssse3_pshufbv8qi3"
5147 [(set (match_operand:V8QI 0 "register_operand" "=y")
5148 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5149 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5152 "pshufb\t{%2, %0|%0, %2}";
5153 [(set_attr "type" "sselog1")
5154 (set_attr "mode" "DI")])
5156 (define_insn "ssse3_psign<mode>3"
5157 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5158 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5159 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5162 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5163 [(set_attr "type" "sselog1")
5164 (set_attr "mode" "TI")])
5166 (define_insn "ssse3_psign<mode>3"
5167 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5168 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5169 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5172 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5173 [(set_attr "type" "sselog1")
5174 (set_attr "mode" "DI")])
5176 (define_insn "ssse3_palignrti"
5177 [(set (match_operand:TI 0 "register_operand" "=x")
5178 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5179 (match_operand:TI 2 "nonimmediate_operand" "xm")
5180 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5184 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5185 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5187 [(set_attr "type" "sseishft")
5188 (set_attr "mode" "TI")])
5190 (define_insn "ssse3_palignrdi"
5191 [(set (match_operand:DI 0 "register_operand" "=y")
5192 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5193 (match_operand:DI 2 "nonimmediate_operand" "ym")
5194 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5198 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5199 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5201 [(set_attr "type" "sseishft")
5202 (set_attr "mode" "DI")])
5204 (define_insn "abs<mode>2"
5205 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5206 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5208 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5209 [(set_attr "type" "sselog1")
5210 (set_attr "mode" "TI")])
5212 (define_insn "abs<mode>2"
5213 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5214 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5216 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5217 [(set_attr "type" "sselog1")
5218 (set_attr "mode" "DI")])
5220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5222 ;; AMD SSE4A instructions
5224 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5226 (define_insn "sse4a_vmmovntv2df"
5227 [(set (match_operand:DF 0 "memory_operand" "=m")
5228 (unspec:DF [(vec_select:DF
5229 (match_operand:V2DF 1 "register_operand" "x")
5230 (parallel [(const_int 0)]))]
5233 "movntsd\t{%1, %0|%0, %1}"
5234 [(set_attr "type" "ssemov")
5235 (set_attr "mode" "DF")])
5237 (define_insn "sse4a_movntdf"
5238 [(set (match_operand:DF 0 "memory_operand" "=m")
5239 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5242 "movntsd\t{%1, %0|%0, %1}"
5243 [(set_attr "type" "ssemov")
5244 (set_attr "mode" "DF")])
5246 (define_insn "sse4a_vmmovntv4sf"
5247 [(set (match_operand:SF 0 "memory_operand" "=m")
5248 (unspec:SF [(vec_select:SF
5249 (match_operand:V4SF 1 "register_operand" "x")
5250 (parallel [(const_int 0)]))]
5253 "movntss\t{%1, %0|%0, %1}"
5254 [(set_attr "type" "ssemov")
5255 (set_attr "mode" "SF")])
5257 (define_insn "sse4a_movntsf"
5258 [(set (match_operand:SF 0 "memory_operand" "=m")
5259 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5262 "movntss\t{%1, %0|%0, %1}"
5263 [(set_attr "type" "ssemov")
5264 (set_attr "mode" "SF")])
5266 (define_insn "sse4a_extrqi"
5267 [(set (match_operand:V2DI 0 "register_operand" "=x")
5268 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5269 (match_operand 2 "const_int_operand" "")
5270 (match_operand 3 "const_int_operand" "")]
5273 "extrq\t{%3, %2, %0|%0, %2, %3}"
5274 [(set_attr "type" "sse")
5275 (set_attr "mode" "TI")])
5277 (define_insn "sse4a_extrq"
5278 [(set (match_operand:V2DI 0 "register_operand" "=x")
5279 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5280 (match_operand:V16QI 2 "register_operand" "x")]
5283 "extrq\t{%2, %0|%0, %2}"
5284 [(set_attr "type" "sse")
5285 (set_attr "mode" "TI")])
5287 (define_insn "sse4a_insertqi"
5288 [(set (match_operand:V2DI 0 "register_operand" "=x")
5289 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5290 (match_operand:V2DI 2 "register_operand" "x")
5291 (match_operand 3 "const_int_operand" "")
5292 (match_operand 4 "const_int_operand" "")]
5295 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5296 [(set_attr "type" "sseins")
5297 (set_attr "mode" "TI")])
5299 (define_insn "sse4a_insertq"
5300 [(set (match_operand:V2DI 0 "register_operand" "=x")
5301 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5302 (match_operand:V2DI 2 "register_operand" "x")]
5305 "insertq\t{%2, %0|%0, %2}"
5306 [(set_attr "type" "sseins")
5307 (set_attr "mode" "TI")])