1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 59 Temple Place - Suite 330,
20 ;; Boston, MA 02111-1307, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
62 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64 switch (which_alternative)
67 if (get_attr_mode (insn) == MODE_V4SF)
68 return "xorps\t%0, %0";
70 return "pxor\t%0, %0";
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
86 (eq_attr "alternative" "0,1")
88 (ne (symbol_ref "optimize_size")
92 (eq_attr "alternative" "2")
94 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
96 (ne (symbol_ref "optimize_size")
100 (const_string "TI")))])
102 (define_expand "movv4sf"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
104 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
107 ix86_expand_vector_move (V4SFmode, operands);
111 (define_insn "*movv4sf_internal"
112 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
113 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
117 movaps\t{%1, %0|%0, %1}
118 movaps\t{%1, %0|%0, %1}"
119 [(set_attr "type" "sselog1,ssemov,ssemov")
120 (set_attr "mode" "V4SF")])
123 [(set (match_operand:V4SF 0 "register_operand" "")
124 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
125 "TARGET_SSE && reload_completed"
128 (vec_duplicate:V4SF (match_dup 1))
132 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
133 operands[2] = CONST0_RTX (V4SFmode);
136 (define_expand "movv2df"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
138 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
141 ix86_expand_vector_move (V2DFmode, operands);
145 (define_insn "*movv2df_internal"
146 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
147 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
148 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
150 switch (which_alternative)
153 if (get_attr_mode (insn) == MODE_V4SF)
154 return "xorps\t%0, %0";
156 return "xorpd\t%0, %0";
159 if (get_attr_mode (insn) == MODE_V4SF)
160 return "movaps\t{%1, %0|%0, %1}";
162 return "movapd\t{%1, %0|%0, %1}";
167 [(set_attr "type" "sselog1,ssemov,ssemov")
169 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
170 (const_string "V4SF")
171 (eq_attr "alternative" "0,1")
173 (ne (symbol_ref "optimize_size")
175 (const_string "V4SF")
176 (const_string "V2DF"))
177 (eq_attr "alternative" "2")
179 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
181 (ne (symbol_ref "optimize_size")
183 (const_string "V4SF")
184 (const_string "V2DF"))]
185 (const_string "V2DF")))])
188 [(set (match_operand:V2DF 0 "register_operand" "")
189 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
190 "TARGET_SSE2 && reload_completed"
191 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
193 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
194 operands[2] = CONST0_RTX (DFmode);
197 (define_expand "push<mode>1"
198 [(match_operand:SSEMODE 0 "register_operand" "")]
201 ix86_expand_push (<MODE>mode, operands[0]);
205 (define_expand "movmisalign<mode>"
206 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
207 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
210 ix86_expand_vector_move_misalign (<MODE>mode, operands);
214 (define_insn "sse_movups"
215 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
216 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movups\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "V2DF")])
223 (define_insn "sse2_movupd"
224 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
225 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
227 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
228 "movupd\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V2DF")])
232 (define_insn "sse2_movdqu"
233 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
234 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
236 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
237 "movdqu\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssemov")
239 (set_attr "mode" "TI")])
241 (define_insn "sse_movntv4sf"
242 [(set (match_operand:V4SF 0 "memory_operand" "=m")
243 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
246 "movntps\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssemov")
248 (set_attr "mode" "V4SF")])
250 (define_insn "sse2_movntv2df"
251 [(set (match_operand:V2DF 0 "memory_operand" "=m")
252 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
255 "movntpd\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse2_movntv2di"
260 [(set (match_operand:V2DI 0 "memory_operand" "=m")
261 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
264 "movntdq\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 (define_insn "sse2_movntsi"
269 [(set (match_operand:SI 0 "memory_operand" "=m")
270 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
273 "movnti\t{%1, %0|%0, %1}"
274 [(set_attr "type" "ssecvt")
275 (set_attr "mode" "V2DF")])
277 (define_insn "sse3_lddqu"
278 [(set (match_operand:V16QI 0 "register_operand" "=x")
279 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
282 "lddqu\t{%1, %0|%0, %1}"
283 [(set_attr "type" "ssecvt")
284 (set_attr "mode" "TI")])
286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
288 ;; Parallel single-precision floating point arithmetic
290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
292 (define_expand "negv4sf2"
293 [(set (match_operand:V4SF 0 "register_operand" "")
294 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
296 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
298 (define_expand "absv4sf2"
299 [(set (match_operand:V4SF 0 "register_operand" "")
300 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
302 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
304 (define_expand "addv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "")
306 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
307 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
309 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
311 (define_insn "*addv4sf3"
312 [(set (match_operand:V4SF 0 "register_operand" "=x")
313 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
314 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
315 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
316 "addps\t{%2, %0|%0, %2}"
317 [(set_attr "type" "sseadd")
318 (set_attr "mode" "V4SF")])
320 (define_insn "sse_vmaddv4sf3"
321 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
327 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
328 "addss\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "SF")])
332 (define_expand "subv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "")
334 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
335 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
337 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
339 (define_insn "*subv4sf3"
340 [(set (match_operand:V4SF 0 "register_operand" "=x")
341 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
342 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
344 "subps\t{%2, %0|%0, %2}"
345 [(set_attr "type" "sseadd")
346 (set_attr "mode" "V4SF")])
348 (define_insn "sse_vmsubv4sf3"
349 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
356 "subss\t{%2, %0|%0, %2}"
357 [(set_attr "type" "sseadd")
358 (set_attr "mode" "SF")])
360 (define_expand "mulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "")
362 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
363 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
365 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
367 (define_insn "*mulv4sf3"
368 [(set (match_operand:V4SF 0 "register_operand" "=x")
369 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
370 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
371 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
372 "mulps\t{%2, %0|%0, %2}"
373 [(set_attr "type" "ssemul")
374 (set_attr "mode" "V4SF")])
376 (define_insn "sse_vmmulv4sf3"
377 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
383 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
384 "mulss\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssemul")
386 (set_attr "mode" "SF")])
388 (define_expand "divv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "")
390 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
391 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
393 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
395 (define_insn "*divv4sf3"
396 [(set (match_operand:V4SF 0 "register_operand" "=x")
397 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
398 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
400 "divps\t{%2, %0|%0, %2}"
401 [(set_attr "type" "ssediv")
402 (set_attr "mode" "V4SF")])
404 (define_insn "sse_vmdivv4sf3"
405 [(set (match_operand:V4SF 0 "register_operand" "=x")
407 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
408 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
412 "divss\t{%2, %0|%0, %2}"
413 [(set_attr "type" "ssediv")
414 (set_attr "mode" "SF")])
416 (define_insn "sse_rcpv4sf2"
417 [(set (match_operand:V4SF 0 "register_operand" "=x")
419 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
421 "rcpps\t{%1, %0|%0, %1}"
422 [(set_attr "type" "sse")
423 (set_attr "mode" "V4SF")])
425 (define_insn "sse_vmrcpv4sf2"
426 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
430 (match_operand:V4SF 2 "register_operand" "0")
433 "rcpss\t{%1, %0|%0, %1}"
434 [(set_attr "type" "sse")
435 (set_attr "mode" "SF")])
437 (define_insn "sse_rsqrtv4sf2"
438 [(set (match_operand:V4SF 0 "register_operand" "=x")
440 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
442 "rsqrtps\t{%1, %0|%0, %1}"
443 [(set_attr "type" "sse")
444 (set_attr "mode" "V4SF")])
446 (define_insn "sse_vmrsqrtv4sf2"
447 [(set (match_operand:V4SF 0 "register_operand" "=x")
449 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
451 (match_operand:V4SF 2 "register_operand" "0")
454 "rsqrtss\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "SF")])
458 (define_insn "sqrtv4sf2"
459 [(set (match_operand:V4SF 0 "register_operand" "=x")
460 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
462 "sqrtps\t{%1, %0|%0, %1}"
463 [(set_attr "type" "sse")
464 (set_attr "mode" "V4SF")])
466 (define_insn "sse_vmsqrtv4sf2"
467 [(set (match_operand:V4SF 0 "register_operand" "=x")
469 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
470 (match_operand:V4SF 2 "register_operand" "0")
473 "sqrtss\t{%1, %0|%0, %1}"
474 [(set_attr "type" "sse")
475 (set_attr "mode" "SF")])
477 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
478 ;; isn't really correct, as those rtl operators aren't defined when
479 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
481 (define_expand "smaxv4sf3"
482 [(set (match_operand:V4SF 0 "register_operand" "")
483 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
484 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
487 if (!flag_finite_math_only)
488 operands[1] = force_reg (V4SFmode, operands[1]);
489 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
492 (define_insn "*smaxv4sf3_finite"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
494 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
495 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
496 "TARGET_SSE && flag_finite_math_only
497 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
498 "maxps\t{%2, %0|%0, %2}"
499 [(set_attr "type" "sse")
500 (set_attr "mode" "V4SF")])
502 (define_insn "*smaxv4sf3"
503 [(set (match_operand:V4SF 0 "register_operand" "=x")
504 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
505 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
507 "maxps\t{%2, %0|%0, %2}"
508 [(set_attr "type" "sse")
509 (set_attr "mode" "V4SF")])
511 (define_insn "*sse_vmsmaxv4sf3_finite"
512 [(set (match_operand:V4SF 0 "register_operand" "=x")
514 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
515 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
518 "TARGET_SSE && flag_finite_math_only
519 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
520 "maxss\t{%2, %0|%0, %2}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "SF")])
524 (define_insn "sse_vmsmaxv4sf3"
525 [(set (match_operand:V4SF 0 "register_operand" "=x")
527 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
528 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
532 "maxss\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sse")
534 (set_attr "mode" "SF")])
536 (define_expand "sminv4sf3"
537 [(set (match_operand:V4SF 0 "register_operand" "")
538 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
539 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
542 if (!flag_finite_math_only)
543 operands[1] = force_reg (V4SFmode, operands[1]);
544 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
547 (define_insn "*sminv4sf3_finite"
548 [(set (match_operand:V4SF 0 "register_operand" "=x")
549 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
550 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
551 "TARGET_SSE && flag_finite_math_only
552 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
553 "minps\t{%2, %0|%0, %2}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "V4SF")])
557 (define_insn "*sminv4sf3"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
560 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
562 "minps\t{%2, %0|%0, %2}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "V4SF")])
566 (define_insn "*sse_vmsminv4sf3_finite"
567 [(set (match_operand:V4SF 0 "register_operand" "=x")
569 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
570 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
573 "TARGET_SSE && flag_finite_math_only
574 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
575 "minss\t{%2, %0|%0, %2}"
576 [(set_attr "type" "sse")
577 (set_attr "mode" "SF")])
579 (define_insn "sse_vmsminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
583 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
587 "minss\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sse")
589 (set_attr "mode" "SF")])
591 (define_insn "sse3_addsubv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
595 (match_operand:V4SF 1 "register_operand" "0")
596 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
597 (minus:V4SF (match_dup 1) (match_dup 2))
600 "addsubps\t{%2, %0|%0, %2}"
601 [(set_attr "type" "sseadd")
602 (set_attr "mode" "V4SF")])
604 (define_insn "sse3_haddv4sf3"
605 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (match_operand:V4SF 1 "register_operand" "0")
611 (parallel [(const_int 0)]))
612 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
614 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
615 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
619 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
620 (parallel [(const_int 0)]))
621 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
623 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
624 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
626 "haddps\t{%2, %0|%0, %2}"
627 [(set_attr "type" "sseadd")
628 (set_attr "mode" "V4SF")])
630 (define_insn "sse3_hsubv4sf3"
631 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (match_operand:V4SF 1 "register_operand" "0")
637 (parallel [(const_int 0)]))
638 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
641 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
645 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
646 (parallel [(const_int 0)]))
647 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
650 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
652 "hsubps\t{%2, %0|%0, %2}"
653 [(set_attr "type" "sseadd")
654 (set_attr "mode" "V4SF")])
656 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
658 ;; Parallel single-precision floating point comparisons
660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
662 (define_insn "sse_maskcmpv4sf3"
663 [(set (match_operand:V4SF 0 "register_operand" "=x")
664 (match_operator:V4SF 3 "sse_comparison_operator"
665 [(match_operand:V4SF 1 "register_operand" "0")
666 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
668 "cmp%D3ps\t{%2, %0|%0, %2}"
669 [(set_attr "type" "ssecmp")
670 (set_attr "mode" "V4SF")])
672 (define_insn "sse_vmmaskcmpv4sf3"
673 [(set (match_operand:V4SF 0 "register_operand" "=x")
675 (match_operator:V4SF 3 "sse_comparison_operator"
676 [(match_operand:V4SF 1 "register_operand" "0")
677 (match_operand:V4SF 2 "register_operand" "x")])
681 "cmp%D3ss\t{%2, %0|%0, %2}"
682 [(set_attr "type" "ssecmp")
683 (set_attr "mode" "SF")])
685 (define_insn "sse_comi"
686 [(set (reg:CCFP FLAGS_REG)
689 (match_operand:V4SF 0 "register_operand" "x")
690 (parallel [(const_int 0)]))
692 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
693 (parallel [(const_int 0)]))))]
695 "comiss\t{%1, %0|%0, %1}"
696 [(set_attr "type" "ssecomi")
697 (set_attr "mode" "SF")])
699 (define_insn "sse_ucomi"
700 [(set (reg:CCFPU FLAGS_REG)
703 (match_operand:V4SF 0 "register_operand" "x")
704 (parallel [(const_int 0)]))
706 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
707 (parallel [(const_int 0)]))))]
709 "ucomiss\t{%1, %0|%0, %1}"
710 [(set_attr "type" "ssecomi")
711 (set_attr "mode" "SF")])
713 (define_expand "vcondv4sf"
714 [(set (match_operand:V4SF 0 "register_operand" "")
717 [(match_operand:V4SF 4 "nonimmediate_operand" "")
718 (match_operand:V4SF 5 "nonimmediate_operand" "")])
719 (match_operand:V4SF 1 "general_operand" "")
720 (match_operand:V4SF 2 "general_operand" "")))]
723 if (ix86_expand_fp_vcond (operands))
729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
731 ;; Parallel single-precision floating point logical operations
733 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
735 (define_expand "andv4sf3"
736 [(set (match_operand:V4SF 0 "register_operand" "")
737 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
738 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
740 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
742 (define_insn "*andv4sf3"
743 [(set (match_operand:V4SF 0 "register_operand" "=x")
744 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
745 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
746 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
747 "andps\t{%2, %0|%0, %2}"
748 [(set_attr "type" "sselog")
749 (set_attr "mode" "V4SF")])
751 (define_insn "sse_nandv4sf3"
752 [(set (match_operand:V4SF 0 "register_operand" "=x")
753 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
754 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
756 "andnps\t{%2, %0|%0, %2}"
757 [(set_attr "type" "sselog")
758 (set_attr "mode" "V4SF")])
760 (define_expand "iorv4sf3"
761 [(set (match_operand:V4SF 0 "register_operand" "")
762 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
763 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
765 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
767 (define_insn "*iorv4sf3"
768 [(set (match_operand:V4SF 0 "register_operand" "=x")
769 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
770 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
771 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
772 "orps\t{%2, %0|%0, %2}"
773 [(set_attr "type" "sselog")
774 (set_attr "mode" "V4SF")])
776 (define_expand "xorv4sf3"
777 [(set (match_operand:V4SF 0 "register_operand" "")
778 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
779 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
781 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
783 (define_insn "*xorv4sf3"
784 [(set (match_operand:V4SF 0 "register_operand" "=x")
785 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
786 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
787 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
788 "xorps\t{%2, %0|%0, %2}"
789 [(set_attr "type" "sselog")
790 (set_attr "mode" "V4SF")])
792 ;; Also define scalar versions. These are used for abs, neg, and
793 ;; conditional move. Using subregs into vector modes causes register
794 ;; allocation lossage. These patterns do not allow memory operands
795 ;; because the native instructions read the full 128-bits.
797 (define_insn "*andsf3"
798 [(set (match_operand:SF 0 "register_operand" "=x")
799 (and:SF (match_operand:SF 1 "register_operand" "0")
800 (match_operand:SF 2 "register_operand" "x")))]
802 "andps\t{%2, %0|%0, %2}"
803 [(set_attr "type" "sselog")
804 (set_attr "mode" "V4SF")])
806 (define_insn "*nandsf3"
807 [(set (match_operand:SF 0 "register_operand" "=x")
808 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
809 (match_operand:SF 2 "register_operand" "x")))]
811 "andnps\t{%2, %0|%0, %2}"
812 [(set_attr "type" "sselog")
813 (set_attr "mode" "V4SF")])
815 (define_insn "*iorsf3"
816 [(set (match_operand:SF 0 "register_operand" "=x")
817 (ior:SF (match_operand:SF 1 "register_operand" "0")
818 (match_operand:SF 2 "register_operand" "x")))]
820 "orps\t{%2, %0|%0, %2}"
821 [(set_attr "type" "sselog")
822 (set_attr "mode" "V4SF")])
824 (define_insn "*xorsf3"
825 [(set (match_operand:SF 0 "register_operand" "=x")
826 (xor:SF (match_operand:SF 1 "register_operand" "0")
827 (match_operand:SF 2 "register_operand" "x")))]
829 "xorps\t{%2, %0|%0, %2}"
830 [(set_attr "type" "sselog")
831 (set_attr "mode" "V4SF")])
833 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
835 ;; Parallel single-precision floating point conversion operations
837 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
839 (define_insn "sse_cvtpi2ps"
840 [(set (match_operand:V4SF 0 "register_operand" "=x")
843 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
844 (match_operand:V4SF 1 "register_operand" "0")
847 "cvtpi2ps\t{%2, %0|%0, %2}"
848 [(set_attr "type" "ssecvt")
849 (set_attr "mode" "V4SF")])
851 (define_insn "sse_cvtps2pi"
852 [(set (match_operand:V2SI 0 "register_operand" "=y")
854 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
856 (parallel [(const_int 0) (const_int 1)])))]
858 "cvtps2pi\t{%1, %0|%0, %1}"
859 [(set_attr "type" "ssecvt")
860 (set_attr "mode" "DI")])
862 (define_insn "sse_cvttps2pi"
863 [(set (match_operand:V2SI 0 "register_operand" "=y")
865 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
866 (parallel [(const_int 0) (const_int 1)])))]
868 "cvttps2pi\t{%1, %0|%0, %1}"
869 [(set_attr "type" "ssecvt")
870 (set_attr "mode" "SF")])
872 (define_insn "sse_cvtsi2ss"
873 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
876 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
877 (match_operand:V4SF 1 "register_operand" "0,0")
880 "cvtsi2ss\t{%2, %0|%0, %2}"
881 [(set_attr "type" "sseicvt")
882 (set_attr "athlon_decode" "vector,double")
883 (set_attr "mode" "SF")])
885 (define_insn "sse_cvtsi2ssq"
886 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
889 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
890 (match_operand:V4SF 1 "register_operand" "0,0")
892 "TARGET_SSE && TARGET_64BIT"
893 "cvtsi2ssq\t{%2, %0|%0, %2}"
894 [(set_attr "type" "sseicvt")
895 (set_attr "athlon_decode" "vector,double")
896 (set_attr "mode" "SF")])
898 (define_insn "sse_cvtss2si"
899 [(set (match_operand:SI 0 "register_operand" "=r,r")
902 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
903 (parallel [(const_int 0)]))]
904 UNSPEC_FIX_NOTRUNC))]
906 "cvtss2si\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sseicvt")
908 (set_attr "athlon_decode" "double,vector")
909 (set_attr "mode" "SI")])
911 (define_insn "sse_cvtss2siq"
912 [(set (match_operand:DI 0 "register_operand" "=r,r")
915 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
916 (parallel [(const_int 0)]))]
917 UNSPEC_FIX_NOTRUNC))]
918 "TARGET_SSE && TARGET_64BIT"
919 "cvtss2siq\t{%1, %0|%0, %1}"
920 [(set_attr "type" "sseicvt")
921 (set_attr "athlon_decode" "double,vector")
922 (set_attr "mode" "DI")])
924 (define_insn "sse_cvttss2si"
925 [(set (match_operand:SI 0 "register_operand" "=r,r")
928 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
929 (parallel [(const_int 0)]))))]
931 "cvttss2si\t{%1, %0|%0, %1}"
932 [(set_attr "type" "sseicvt")
933 (set_attr "athlon_decode" "double,vector")
934 (set_attr "mode" "SI")])
936 (define_insn "sse_cvttss2siq"
937 [(set (match_operand:DI 0 "register_operand" "=r,r")
940 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
941 (parallel [(const_int 0)]))))]
942 "TARGET_SSE && TARGET_64BIT"
943 "cvttss2siq\t{%1, %0|%0, %1}"
944 [(set_attr "type" "sseicvt")
945 (set_attr "athlon_decode" "double,vector")
946 (set_attr "mode" "DI")])
948 (define_insn "sse2_cvtdq2ps"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
950 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
952 "cvtdq2ps\t{%1, %0|%0, %1}"
953 [(set_attr "type" "ssecvt")
954 (set_attr "mode" "V2DF")])
956 (define_insn "sse2_cvtps2dq"
957 [(set (match_operand:V4SI 0 "register_operand" "=x")
958 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
959 UNSPEC_FIX_NOTRUNC))]
961 "cvtps2dq\t{%1, %0|%0, %1}"
962 [(set_attr "type" "ssecvt")
963 (set_attr "mode" "TI")])
965 (define_insn "sse2_cvttps2dq"
966 [(set (match_operand:V4SI 0 "register_operand" "=x")
967 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
969 "cvttps2dq\t{%1, %0|%0, %1}"
970 [(set_attr "type" "ssecvt")
971 (set_attr "mode" "TI")])
973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
975 ;; Parallel single-precision floating point element swizzling
977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
979 (define_insn "sse_movhlps"
980 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
983 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
984 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
985 (parallel [(const_int 4)
989 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
991 movhlps\t{%2, %0|%0, %2}
992 movlps\t{%H1, %0|%0, %H1}
993 movhps\t{%1, %0|%0, %1}"
994 [(set_attr "type" "ssemov")
995 (set_attr "mode" "V4SF,V2SF,V2SF")])
997 (define_insn "sse_movlhps"
998 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1001 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1002 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1003 (parallel [(const_int 0)
1007 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1009 movlhps\t{%2, %0|%0, %2}
1010 movhps\t{%2, %0|%0, %2}
1011 movlps\t{%2, %H0|%H0, %2}"
1012 [(set_attr "type" "ssemov")
1013 (set_attr "mode" "V4SF,V2SF,V2SF")])
1015 (define_insn "sse_unpckhps"
1016 [(set (match_operand:V4SF 0 "register_operand" "=x")
1019 (match_operand:V4SF 1 "register_operand" "0")
1020 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1021 (parallel [(const_int 2) (const_int 6)
1022 (const_int 3) (const_int 7)])))]
1024 "unpckhps\t{%2, %0|%0, %2}"
1025 [(set_attr "type" "sselog")
1026 (set_attr "mode" "V4SF")])
1028 (define_insn "sse_unpcklps"
1029 [(set (match_operand:V4SF 0 "register_operand" "=x")
1032 (match_operand:V4SF 1 "register_operand" "0")
1033 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1034 (parallel [(const_int 0) (const_int 4)
1035 (const_int 1) (const_int 5)])))]
1037 "unpcklps\t{%2, %0|%0, %2}"
1038 [(set_attr "type" "sselog")
1039 (set_attr "mode" "V4SF")])
1041 ;; These are modeled with the same vec_concat as the others so that we
1042 ;; capture users of shufps that can use the new instructions
1043 (define_insn "sse3_movshdup"
1044 [(set (match_operand:V4SF 0 "register_operand" "=x")
1047 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1049 (parallel [(const_int 1)
1054 "movshdup\t{%1, %0|%0, %1}"
1055 [(set_attr "type" "sse")
1056 (set_attr "mode" "V4SF")])
1058 (define_insn "sse3_movsldup"
1059 [(set (match_operand:V4SF 0 "register_operand" "=x")
1062 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1064 (parallel [(const_int 0)
1069 "movsldup\t{%1, %0|%0, %1}"
1070 [(set_attr "type" "sse")
1071 (set_attr "mode" "V4SF")])
1073 (define_expand "sse_shufps"
1074 [(match_operand:V4SF 0 "register_operand" "")
1075 (match_operand:V4SF 1 "register_operand" "")
1076 (match_operand:V4SF 2 "nonimmediate_operand" "")
1077 (match_operand:SI 3 "const_int_operand" "")]
1080 int mask = INTVAL (operands[3]);
1081 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1082 GEN_INT ((mask >> 0) & 3),
1083 GEN_INT ((mask >> 2) & 3),
1084 GEN_INT (((mask >> 4) & 3) + 4),
1085 GEN_INT (((mask >> 6) & 3) + 4)));
1089 (define_insn "sse_shufps_1"
1090 [(set (match_operand:V4SF 0 "register_operand" "=x")
1093 (match_operand:V4SF 1 "register_operand" "0")
1094 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1095 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1096 (match_operand 4 "const_0_to_3_operand" "")
1097 (match_operand 5 "const_4_to_7_operand" "")
1098 (match_operand 6 "const_4_to_7_operand" "")])))]
1102 mask |= INTVAL (operands[3]) << 0;
1103 mask |= INTVAL (operands[4]) << 2;
1104 mask |= (INTVAL (operands[5]) - 4) << 4;
1105 mask |= (INTVAL (operands[6]) - 4) << 6;
1106 operands[3] = GEN_INT (mask);
1108 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1110 [(set_attr "type" "sselog")
1111 (set_attr "mode" "V4SF")])
1113 (define_insn "sse_storehps"
1114 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1116 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1117 (parallel [(const_int 2) (const_int 3)])))]
1120 movhps\t{%1, %0|%0, %1}
1121 movhlps\t{%1, %0|%0, %1}
1122 movlps\t{%H1, %0|%0, %H1}"
1123 [(set_attr "type" "ssemov")
1124 (set_attr "mode" "V2SF,V4SF,V2SF")])
1126 (define_insn "sse_loadhps"
1127 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1130 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1131 (parallel [(const_int 0) (const_int 1)]))
1132 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1135 movhps\t{%2, %0|%0, %2}
1136 movlhps\t{%2, %0|%0, %2}
1137 movlps\t{%2, %H0|%H0, %2}"
1138 [(set_attr "type" "ssemov")
1139 (set_attr "mode" "V2SF,V4SF,V2SF")])
1141 (define_insn "sse_storelps"
1142 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1144 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1145 (parallel [(const_int 0) (const_int 1)])))]
1148 movlps\t{%1, %0|%0, %1}
1149 movaps\t{%1, %0|%0, %1}
1150 movlps\t{%1, %0|%0, %1}"
1151 [(set_attr "type" "ssemov")
1152 (set_attr "mode" "V2SF,V4SF,V2SF")])
1154 (define_insn "sse_loadlps"
1155 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1157 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1159 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1160 (parallel [(const_int 2) (const_int 3)]))))]
1163 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1164 movlps\t{%2, %0|%0, %2}
1165 movlps\t{%2, %0|%0, %2}"
1166 [(set_attr "type" "sselog,ssemov,ssemov")
1167 (set_attr "mode" "V4SF,V2SF,V2SF")])
1169 (define_insn "sse_movss"
1170 [(set (match_operand:V4SF 0 "register_operand" "=x")
1172 (match_operand:V4SF 2 "register_operand" "x")
1173 (match_operand:V4SF 1 "register_operand" "0")
1176 "movss\t{%2, %0|%0, %2}"
1177 [(set_attr "type" "ssemov")
1178 (set_attr "mode" "SF")])
1180 (define_insn "*vec_dupv4sf"
1181 [(set (match_operand:V4SF 0 "register_operand" "=x")
1183 (match_operand:SF 1 "register_operand" "0")))]
1185 "shufps\t{$0, %0, %0|%0, %0, 0}"
1186 [(set_attr "type" "sselog1")
1187 (set_attr "mode" "V4SF")])
1189 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1190 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1191 ;; alternatives pretty much forces the MMX alternative to be chosen.
1192 (define_insn "*sse_concatv2sf"
1193 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1195 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1196 (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
1199 unpcklps\t{%2, %0|%0, %2}
1200 movss\t{%1, %0|%0, %1}
1201 punpckldq\t{%2, %0|%0, %2}
1202 movd\t{%1, %0|%0, %1}"
1203 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1204 (set_attr "mode" "V4SF,SF,DI,DI")])
1206 (define_insn "*sse_concatv4sf"
1207 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1209 (match_operand:V2SF 1 "register_operand" " 0,0")
1210 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1213 movlhps\t{%2, %0|%0, %2}
1214 movhps\t{%2, %0|%0, %2}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V4SF,V2SF")])
1218 (define_expand "vec_initv4sf"
1219 [(match_operand:V4SF 0 "register_operand" "")
1220 (match_operand 1 "" "")]
1223 ix86_expand_vector_init (false, operands[0], operands[1]);
1227 (define_insn "*vec_setv4sf_0"
1228 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1231 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1232 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1236 movss\t{%2, %0|%0, %2}
1237 movss\t{%2, %0|%0, %2}
1238 movd\t{%2, %0|%0, %2}
1240 [(set_attr "type" "ssemov")
1241 (set_attr "mode" "SF")])
1244 [(set (match_operand:V4SF 0 "memory_operand" "")
1247 (match_operand:SF 1 "nonmemory_operand" ""))
1250 "TARGET_SSE && reload_completed"
1253 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1257 (define_expand "vec_setv4sf"
1258 [(match_operand:V4SF 0 "register_operand" "")
1259 (match_operand:SF 1 "register_operand" "")
1260 (match_operand 2 "const_int_operand" "")]
1263 ix86_expand_vector_set (false, operands[0], operands[1],
1264 INTVAL (operands[2]));
1268 (define_insn_and_split "*vec_extractv4sf_0"
1269 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1271 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1272 (parallel [(const_int 0)])))]
1273 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1275 "&& reload_completed"
1278 rtx op1 = operands[1];
1280 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1282 op1 = gen_lowpart (SFmode, op1);
1283 emit_move_insn (operands[0], op1);
1287 (define_expand "vec_extractv4sf"
1288 [(match_operand:SF 0 "register_operand" "")
1289 (match_operand:V4SF 1 "register_operand" "")
1290 (match_operand 2 "const_int_operand" "")]
1293 ix86_expand_vector_extract (false, operands[0], operands[1],
1294 INTVAL (operands[2]));
1298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1300 ;; Parallel double-precision floating point arithmetic
1302 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1304 (define_expand "negv2df2"
1305 [(set (match_operand:V2DF 0 "register_operand" "")
1306 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1308 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1310 (define_expand "absv2df2"
1311 [(set (match_operand:V2DF 0 "register_operand" "")
1312 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1314 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1316 (define_expand "addv2df3"
1317 [(set (match_operand:V2DF 0 "register_operand" "")
1318 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1319 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1321 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1323 (define_insn "*addv2df3"
1324 [(set (match_operand:V2DF 0 "register_operand" "=x")
1325 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1326 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1327 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1328 "addpd\t{%2, %0|%0, %2}"
1329 [(set_attr "type" "sseadd")
1330 (set_attr "mode" "V2DF")])
1332 (define_insn "sse2_vmaddv2df3"
1333 [(set (match_operand:V2DF 0 "register_operand" "=x")
1335 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1336 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1339 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1340 "addsd\t{%2, %0|%0, %2}"
1341 [(set_attr "type" "sseadd")
1342 (set_attr "mode" "DF")])
1344 (define_expand "subv2df3"
1345 [(set (match_operand:V2DF 0 "register_operand" "")
1346 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1347 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1349 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1351 (define_insn "*subv2df3"
1352 [(set (match_operand:V2DF 0 "register_operand" "=x")
1353 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1354 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1356 "subpd\t{%2, %0|%0, %2}"
1357 [(set_attr "type" "sseadd")
1358 (set_attr "mode" "V2DF")])
1360 (define_insn "sse2_vmsubv2df3"
1361 [(set (match_operand:V2DF 0 "register_operand" "=x")
1363 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1364 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1368 "subsd\t{%2, %0|%0, %2}"
1369 [(set_attr "type" "sseadd")
1370 (set_attr "mode" "DF")])
1372 (define_expand "mulv2df3"
1373 [(set (match_operand:V2DF 0 "register_operand" "")
1374 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1375 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1377 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1379 (define_insn "*mulv2df3"
1380 [(set (match_operand:V2DF 0 "register_operand" "=x")
1381 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1382 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1383 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1384 "mulpd\t{%2, %0|%0, %2}"
1385 [(set_attr "type" "ssemul")
1386 (set_attr "mode" "V2DF")])
1388 (define_insn "sse2_vmmulv2df3"
1389 [(set (match_operand:V2DF 0 "register_operand" "=x")
1391 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1392 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1395 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1396 "mulsd\t{%2, %0|%0, %2}"
1397 [(set_attr "type" "ssemul")
1398 (set_attr "mode" "DF")])
1400 (define_expand "divv2df3"
1401 [(set (match_operand:V2DF 0 "register_operand" "")
1402 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1403 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1405 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1407 (define_insn "*divv2df3"
1408 [(set (match_operand:V2DF 0 "register_operand" "=x")
1409 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1410 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1412 "divpd\t{%2, %0|%0, %2}"
1413 [(set_attr "type" "ssediv")
1414 (set_attr "mode" "V2DF")])
1416 (define_insn "sse2_vmdivv2df3"
1417 [(set (match_operand:V2DF 0 "register_operand" "=x")
1419 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1420 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1424 "divsd\t{%2, %0|%0, %2}"
1425 [(set_attr "type" "ssediv")
1426 (set_attr "mode" "DF")])
1428 (define_insn "sqrtv2df2"
1429 [(set (match_operand:V2DF 0 "register_operand" "=x")
1430 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1432 "sqrtpd\t{%1, %0|%0, %1}"
1433 [(set_attr "type" "sse")
1434 (set_attr "mode" "V2DF")])
1436 (define_insn "sse2_vmsqrtv2df2"
1437 [(set (match_operand:V2DF 0 "register_operand" "=x")
1439 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1440 (match_operand:V2DF 2 "register_operand" "0")
1443 "sqrtsd\t{%1, %0|%0, %1}"
1444 [(set_attr "type" "sse")
1445 (set_attr "mode" "SF")])
1447 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1448 ;; isn't really correct, as those rtl operators aren't defined when
1449 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1451 (define_expand "smaxv2df3"
1452 [(set (match_operand:V2DF 0 "register_operand" "")
1453 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1454 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1457 if (!flag_finite_math_only)
1458 operands[1] = force_reg (V2DFmode, operands[1]);
1459 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1462 (define_insn "*smaxv2df3_finite"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1464 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1465 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1466 "TARGET_SSE2 && flag_finite_math_only
1467 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1468 "maxpd\t{%2, %0|%0, %2}"
1469 [(set_attr "type" "sseadd")
1470 (set_attr "mode" "V2DF")])
1472 (define_insn "*smaxv2df3"
1473 [(set (match_operand:V2DF 0 "register_operand" "=x")
1474 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1477 "maxpd\t{%2, %0|%0, %2}"
1478 [(set_attr "type" "sseadd")
1479 (set_attr "mode" "V2DF")])
1481 (define_insn "*sse2_vmsmaxv2df3_finite"
1482 [(set (match_operand:V2DF 0 "register_operand" "=x")
1484 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1485 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1488 "TARGET_SSE2 && flag_finite_math_only
1489 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1490 "maxsd\t{%2, %0|%0, %2}"
1491 [(set_attr "type" "sseadd")
1492 (set_attr "mode" "DF")])
1494 (define_insn "sse2_vmsmaxv2df3"
1495 [(set (match_operand:V2DF 0 "register_operand" "=x")
1497 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1498 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1502 "maxsd\t{%2, %0|%0, %2}"
1503 [(set_attr "type" "sseadd")
1504 (set_attr "mode" "DF")])
1506 (define_expand "sminv2df3"
1507 [(set (match_operand:V2DF 0 "register_operand" "")
1508 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1509 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1512 if (!flag_finite_math_only)
1513 operands[1] = force_reg (V2DFmode, operands[1]);
1514 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1517 (define_insn "*sminv2df3_finite"
1518 [(set (match_operand:V2DF 0 "register_operand" "=x")
1519 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1520 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1521 "TARGET_SSE2 && flag_finite_math_only
1522 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1523 "minpd\t{%2, %0|%0, %2}"
1524 [(set_attr "type" "sseadd")
1525 (set_attr "mode" "V2DF")])
1527 (define_insn "*sminv2df3"
1528 [(set (match_operand:V2DF 0 "register_operand" "=x")
1529 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1530 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1532 "minpd\t{%2, %0|%0, %2}"
1533 [(set_attr "type" "sseadd")
1534 (set_attr "mode" "V2DF")])
1536 (define_insn "*sse2_vmsminv2df3_finite"
1537 [(set (match_operand:V2DF 0 "register_operand" "=x")
1539 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1540 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1543 "TARGET_SSE2 && flag_finite_math_only
1544 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1545 "minsd\t{%2, %0|%0, %2}"
1546 [(set_attr "type" "sseadd")
1547 (set_attr "mode" "DF")])
1549 (define_insn "sse2_vmsminv2df3"
1550 [(set (match_operand:V2DF 0 "register_operand" "=x")
1552 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1553 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1557 "minsd\t{%2, %0|%0, %2}"
1558 [(set_attr "type" "sseadd")
1559 (set_attr "mode" "DF")])
1561 (define_insn "sse3_addsubv2df3"
1562 [(set (match_operand:V2DF 0 "register_operand" "=x")
1565 (match_operand:V2DF 1 "register_operand" "0")
1566 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1567 (minus:V2DF (match_dup 1) (match_dup 2))
1570 "addsubpd\t{%2, %0|%0, %2}"
1571 [(set_attr "type" "sseadd")
1572 (set_attr "mode" "V2DF")])
1574 (define_insn "sse3_haddv2df3"
1575 [(set (match_operand:V2DF 0 "register_operand" "=x")
1579 (match_operand:V2DF 1 "register_operand" "0")
1580 (parallel [(const_int 0)]))
1581 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1585 (parallel [(const_int 0)]))
1586 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1588 "haddpd\t{%2, %0|%0, %2}"
1589 [(set_attr "type" "sseadd")
1590 (set_attr "mode" "V2DF")])
1592 (define_insn "sse3_hsubv2df3"
1593 [(set (match_operand:V2DF 0 "register_operand" "=x")
1597 (match_operand:V2DF 1 "register_operand" "0")
1598 (parallel [(const_int 0)]))
1599 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1602 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1603 (parallel [(const_int 0)]))
1604 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1606 "hsubpd\t{%2, %0|%0, %2}"
1607 [(set_attr "type" "sseadd")
1608 (set_attr "mode" "V2DF")])
1610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1612 ;; Parallel double-precision floating point comparisons
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1616 (define_insn "sse2_maskcmpv2df3"
1617 [(set (match_operand:V2DF 0 "register_operand" "=x")
1618 (match_operator:V2DF 3 "sse_comparison_operator"
1619 [(match_operand:V2DF 1 "register_operand" "0")
1620 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1622 "cmp%D3pd\t{%2, %0|%0, %2}"
1623 [(set_attr "type" "ssecmp")
1624 (set_attr "mode" "V2DF")])
1626 (define_insn "sse2_vmmaskcmpv2df3"
1627 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (match_operator:V2DF 3 "sse_comparison_operator"
1630 [(match_operand:V2DF 1 "register_operand" "0")
1631 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1635 "cmp%D3sd\t{%2, %0|%0, %2}"
1636 [(set_attr "type" "ssecmp")
1637 (set_attr "mode" "DF")])
1639 (define_insn "sse2_comi"
1640 [(set (reg:CCFP FLAGS_REG)
1643 (match_operand:V2DF 0 "register_operand" "x")
1644 (parallel [(const_int 0)]))
1646 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1647 (parallel [(const_int 0)]))))]
1649 "comisd\t{%1, %0|%0, %1}"
1650 [(set_attr "type" "ssecomi")
1651 (set_attr "mode" "DF")])
1653 (define_insn "sse2_ucomi"
1654 [(set (reg:CCFPU FLAGS_REG)
1657 (match_operand:V2DF 0 "register_operand" "x")
1658 (parallel [(const_int 0)]))
1660 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1661 (parallel [(const_int 0)]))))]
1663 "ucomisd\t{%1, %0|%0, %1}"
1664 [(set_attr "type" "ssecomi")
1665 (set_attr "mode" "DF")])
1667 (define_expand "vcondv2df"
1668 [(set (match_operand:V2DF 0 "register_operand" "")
1670 (match_operator 3 ""
1671 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1672 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1673 (match_operand:V2DF 1 "general_operand" "")
1674 (match_operand:V2DF 2 "general_operand" "")))]
1677 if (ix86_expand_fp_vcond (operands))
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; Parallel double-precision floating point logical operations
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 (define_expand "andv2df3"
1690 [(set (match_operand:V2DF 0 "register_operand" "")
1691 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1692 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1694 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1696 (define_insn "*andv2df3"
1697 [(set (match_operand:V2DF 0 "register_operand" "=x")
1698 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1699 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1700 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1701 "andpd\t{%2, %0|%0, %2}"
1702 [(set_attr "type" "sselog")
1703 (set_attr "mode" "V2DF")])
1705 (define_insn "sse2_nandv2df3"
1706 [(set (match_operand:V2DF 0 "register_operand" "=x")
1707 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1708 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1710 "andnpd\t{%2, %0|%0, %2}"
1711 [(set_attr "type" "sselog")
1712 (set_attr "mode" "V2DF")])
1714 (define_expand "iorv2df3"
1715 [(set (match_operand:V2DF 0 "register_operand" "")
1716 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1717 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1719 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1721 (define_insn "*iorv2df3"
1722 [(set (match_operand:V2DF 0 "register_operand" "=x")
1723 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1724 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1725 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1726 "orpd\t{%2, %0|%0, %2}"
1727 [(set_attr "type" "sselog")
1728 (set_attr "mode" "V2DF")])
1730 (define_expand "xorv2df3"
1731 [(set (match_operand:V2DF 0 "register_operand" "")
1732 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1733 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1735 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1737 (define_insn "*xorv2df3"
1738 [(set (match_operand:V2DF 0 "register_operand" "=x")
1739 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1740 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1741 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1742 "xorpd\t{%2, %0|%0, %2}"
1743 [(set_attr "type" "sselog")
1744 (set_attr "mode" "V2DF")])
1746 ;; Also define scalar versions. These are used for abs, neg, and
1747 ;; conditional move. Using subregs into vector modes causes regiser
1748 ;; allocation lossage. These patterns do not allow memory operands
1749 ;; because the native instructions read the full 128-bits.
1751 (define_insn "*anddf3"
1752 [(set (match_operand:DF 0 "register_operand" "=x")
1753 (and:DF (match_operand:DF 1 "register_operand" "0")
1754 (match_operand:DF 2 "register_operand" "x")))]
1756 "andpd\t{%2, %0|%0, %2}"
1757 [(set_attr "type" "sselog")
1758 (set_attr "mode" "V2DF")])
1760 (define_insn "*nanddf3"
1761 [(set (match_operand:DF 0 "register_operand" "=x")
1762 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1763 (match_operand:DF 2 "register_operand" "x")))]
1765 "andnpd\t{%2, %0|%0, %2}"
1766 [(set_attr "type" "sselog")
1767 (set_attr "mode" "V2DF")])
1769 (define_insn "*iordf3"
1770 [(set (match_operand:DF 0 "register_operand" "=x")
1771 (ior:DF (match_operand:DF 1 "register_operand" "0")
1772 (match_operand:DF 2 "register_operand" "x")))]
1774 "orpd\t{%2, %0|%0, %2}"
1775 [(set_attr "type" "sselog")
1776 (set_attr "mode" "V2DF")])
1778 (define_insn "*xordf3"
1779 [(set (match_operand:DF 0 "register_operand" "=x")
1780 (xor:DF (match_operand:DF 1 "register_operand" "0")
1781 (match_operand:DF 2 "register_operand" "x")))]
1783 "xorpd\t{%2, %0|%0, %2}"
1784 [(set_attr "type" "sselog")
1785 (set_attr "mode" "V2DF")])
1787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1789 ;; Parallel double-precision floating point conversion operations
1791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1793 (define_insn "sse2_cvtpi2pd"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1795 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
1797 "cvtpi2pd\t{%1, %0|%0, %1}"
1798 [(set_attr "type" "ssecvt")
1799 (set_attr "mode" "V2DF")])
1801 (define_insn "sse2_cvtpd2pi"
1802 [(set (match_operand:V2SI 0 "register_operand" "=y")
1803 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1804 UNSPEC_FIX_NOTRUNC))]
1806 "cvtpd2pi\t{%1, %0|%0, %1}"
1807 [(set_attr "type" "ssecvt")
1808 (set_attr "mode" "DI")])
1810 (define_insn "sse2_cvttpd2pi"
1811 [(set (match_operand:V2SI 0 "register_operand" "=y")
1812 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1814 "cvttpd2pi\t{%1, %0|%0, %1}"
1815 [(set_attr "type" "ssecvt")
1816 (set_attr "mode" "TI")])
1818 (define_insn "sse2_cvtsi2sd"
1819 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1822 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1823 (match_operand:V2DF 1 "register_operand" "0,0")
1826 "cvtsi2sd\t{%2, %0|%0, %2}"
1827 [(set_attr "type" "sseicvt")
1828 (set_attr "mode" "DF")
1829 (set_attr "athlon_decode" "double,direct")])
1831 (define_insn "sse2_cvtsi2sdq"
1832 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1835 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1836 (match_operand:V2DF 1 "register_operand" "0,0")
1838 "TARGET_SSE2 && TARGET_64BIT"
1839 "cvtsi2sdq\t{%2, %0|%0, %2}"
1840 [(set_attr "type" "sseicvt")
1841 (set_attr "mode" "DF")
1842 (set_attr "athlon_decode" "double,direct")])
1844 (define_insn "sse2_cvtsd2si"
1845 [(set (match_operand:SI 0 "register_operand" "=r,r")
1848 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1849 (parallel [(const_int 0)]))]
1850 UNSPEC_FIX_NOTRUNC))]
1852 "cvtsd2si\t{%1, %0|%0, %1}"
1853 [(set_attr "type" "sseicvt")
1854 (set_attr "athlon_decode" "double,vector")
1855 (set_attr "mode" "SI")])
1857 (define_insn "sse2_cvtsd2siq"
1858 [(set (match_operand:DI 0 "register_operand" "=r,r")
1861 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1862 (parallel [(const_int 0)]))]
1863 UNSPEC_FIX_NOTRUNC))]
1864 "TARGET_SSE2 && TARGET_64BIT"
1865 "cvtsd2siq\t{%1, %0|%0, %1}"
1866 [(set_attr "type" "sseicvt")
1867 (set_attr "athlon_decode" "double,vector")
1868 (set_attr "mode" "DI")])
1870 (define_insn "sse2_cvttsd2si"
1871 [(set (match_operand:SI 0 "register_operand" "=r,r")
1874 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1875 (parallel [(const_int 0)]))))]
1877 "cvttsd2si\t{%1, %0|%0, %1}"
1878 [(set_attr "type" "sseicvt")
1879 (set_attr "mode" "SI")
1880 (set_attr "athlon_decode" "double,vector")])
1882 (define_insn "sse2_cvttsd2siq"
1883 [(set (match_operand:DI 0 "register_operand" "=r,r")
1886 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1887 (parallel [(const_int 0)]))))]
1888 "TARGET_SSE2 && TARGET_64BIT"
1889 "cvttsd2siq\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "sseicvt")
1891 (set_attr "mode" "DI")
1892 (set_attr "athlon_decode" "double,vector")])
1894 (define_insn "sse2_cvtdq2pd"
1895 [(set (match_operand:V2DF 0 "register_operand" "=x")
1898 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1899 (parallel [(const_int 0) (const_int 1)]))))]
1901 "cvtdq2pd\t{%1, %0|%0, %1}"
1902 [(set_attr "type" "ssecvt")
1903 (set_attr "mode" "V2DF")])
1905 (define_expand "sse2_cvtpd2dq"
1906 [(set (match_operand:V4SI 0 "register_operand" "")
1908 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1912 "operands[2] = CONST0_RTX (V2SImode);")
1914 (define_insn "*sse2_cvtpd2dq"
1915 [(set (match_operand:V4SI 0 "register_operand" "=x")
1917 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1919 (match_operand:V2SI 2 "const0_operand" "")))]
1921 "cvtpd2dq\t{%1, %0|%0, %1}"
1922 [(set_attr "type" "ssecvt")
1923 (set_attr "mode" "TI")])
1925 (define_expand "sse2_cvttpd2dq"
1926 [(set (match_operand:V4SI 0 "register_operand" "")
1928 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1931 "operands[2] = CONST0_RTX (V2SImode);")
1933 (define_insn "*sse2_cvttpd2dq"
1934 [(set (match_operand:V4SI 0 "register_operand" "=x")
1936 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1937 (match_operand:V2SI 2 "const0_operand" "")))]
1939 "cvttpd2dq\t{%1, %0|%0, %1}"
1940 [(set_attr "type" "ssecvt")
1941 (set_attr "mode" "TI")])
1943 (define_insn "sse2_cvtsd2ss"
1944 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1947 (float_truncate:V2SF
1948 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1949 (match_operand:V4SF 1 "register_operand" "0,0")
1952 "cvtsd2ss\t{%2, %0|%0, %2}"
1953 [(set_attr "type" "ssecvt")
1954 (set_attr "athlon_decode" "vector,double")
1955 (set_attr "mode" "SF")])
1957 (define_insn "sse2_cvtss2sd"
1958 [(set (match_operand:V2DF 0 "register_operand" "=x")
1962 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1963 (parallel [(const_int 0) (const_int 1)])))
1964 (match_operand:V2DF 1 "register_operand" "0")
1967 "cvtss2sd\t{%2, %0|%0, %2}"
1968 [(set_attr "type" "ssecvt")
1969 (set_attr "mode" "DF")])
1971 (define_expand "sse2_cvtpd2ps"
1972 [(set (match_operand:V4SF 0 "register_operand" "")
1974 (float_truncate:V2SF
1975 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1978 "operands[2] = CONST0_RTX (V2SFmode);")
1980 (define_insn "*sse2_cvtpd2ps"
1981 [(set (match_operand:V4SF 0 "register_operand" "=x")
1983 (float_truncate:V2SF
1984 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1985 (match_operand:V2SF 2 "const0_operand" "")))]
1987 "cvtpd2ps\t{%1, %0|%0, %1}"
1988 [(set_attr "type" "ssecvt")
1989 (set_attr "mode" "V4SF")])
1991 (define_insn "sse2_cvtps2pd"
1992 [(set (match_operand:V2DF 0 "register_operand" "=x")
1995 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1996 (parallel [(const_int 0) (const_int 1)]))))]
1998 "cvtps2pd\t{%1, %0|%0, %1}"
1999 [(set_attr "type" "ssecvt")
2000 (set_attr "mode" "V2DF")])
2002 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2004 ;; Parallel double-precision floating point element swizzling
2006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2008 (define_insn "sse2_unpckhpd"
2009 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2012 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2013 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2014 (parallel [(const_int 1)
2016 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2018 unpckhpd\t{%2, %0|%0, %2}
2019 movlpd\t{%H1, %0|%0, %H1}
2020 movhpd\t{%1, %0|%0, %1}"
2021 [(set_attr "type" "sselog,ssemov,ssemov")
2022 (set_attr "mode" "V2DF,V1DF,V1DF")])
2024 (define_insn "*sse3_movddup"
2025 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2028 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2030 (parallel [(const_int 0)
2032 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2034 movddup\t{%1, %0|%0, %1}
2036 [(set_attr "type" "sselog,ssemov")
2037 (set_attr "mode" "V2DF")])
2040 [(set (match_operand:V2DF 0 "memory_operand" "")
2043 (match_operand:V2DF 1 "register_operand" "")
2045 (parallel [(const_int 0)
2047 "TARGET_SSE3 && reload_completed"
2050 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2051 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2052 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2056 (define_insn "sse2_unpcklpd"
2057 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2060 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2061 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2062 (parallel [(const_int 0)
2064 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2066 unpcklpd\t{%2, %0|%0, %2}
2067 movhpd\t{%2, %0|%0, %2}
2068 movlpd\t{%2, %H0|%H0, %2}"
2069 [(set_attr "type" "sselog,ssemov,ssemov")
2070 (set_attr "mode" "V2DF,V1DF,V1DF")])
2072 (define_expand "sse2_shufpd"
2073 [(match_operand:V2DF 0 "register_operand" "")
2074 (match_operand:V2DF 1 "register_operand" "")
2075 (match_operand:V2DF 2 "nonimmediate_operand" "")
2076 (match_operand:SI 3 "const_int_operand" "")]
2079 int mask = INTVAL (operands[3]);
2080 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2082 GEN_INT (mask & 2 ? 3 : 2)));
2086 (define_insn "sse2_shufpd_1"
2087 [(set (match_operand:V2DF 0 "register_operand" "=x")
2090 (match_operand:V2DF 1 "register_operand" "0")
2091 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2092 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2093 (match_operand 4 "const_2_to_3_operand" "")])))]
2097 mask = INTVAL (operands[3]);
2098 mask |= (INTVAL (operands[4]) - 2) << 1;
2099 operands[3] = GEN_INT (mask);
2101 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2103 [(set_attr "type" "sselog")
2104 (set_attr "mode" "V2DF")])
2106 (define_insn "sse2_storehpd"
2107 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2109 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2110 (parallel [(const_int 1)])))]
2111 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2113 movhpd\t{%1, %0|%0, %1}
2116 [(set_attr "type" "ssemov,sselog1,ssemov")
2117 (set_attr "mode" "V1DF,V2DF,DF")])
2120 [(set (match_operand:DF 0 "register_operand" "")
2122 (match_operand:V2DF 1 "memory_operand" "")
2123 (parallel [(const_int 1)])))]
2124 "TARGET_SSE2 && reload_completed"
2125 [(set (match_dup 0) (match_dup 1))]
2127 operands[1] = adjust_address (operands[1], DFmode, 8);
2130 (define_insn "sse2_storelpd"
2131 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2133 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2134 (parallel [(const_int 0)])))]
2135 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2137 movlpd\t{%1, %0|%0, %1}
2140 [(set_attr "type" "ssemov")
2141 (set_attr "mode" "V1DF,DF,DF")])
2144 [(set (match_operand:DF 0 "register_operand" "")
2146 (match_operand:V2DF 1 "nonimmediate_operand" "")
2147 (parallel [(const_int 0)])))]
2148 "TARGET_SSE2 && reload_completed"
2151 rtx op1 = operands[1];
2153 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2155 op1 = gen_lowpart (DFmode, op1);
2156 emit_move_insn (operands[0], op1);
2160 (define_insn "sse2_loadhpd"
2161 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2164 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2165 (parallel [(const_int 0)]))
2166 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2167 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2169 movhpd\t{%2, %0|%0, %2}
2170 unpcklpd\t{%2, %0|%0, %2}
2171 shufpd\t{$1, %1, %0|%0, %1, 1}
2173 [(set_attr "type" "ssemov,sselog,sselog,other")
2174 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2177 [(set (match_operand:V2DF 0 "memory_operand" "")
2179 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2180 (match_operand:DF 1 "register_operand" "")))]
2181 "TARGET_SSE2 && reload_completed"
2182 [(set (match_dup 0) (match_dup 1))]
2184 operands[0] = adjust_address (operands[0], DFmode, 8);
2187 (define_insn "sse2_loadlpd"
2188 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2190 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2192 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2193 (parallel [(const_int 1)]))))]
2194 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2196 movsd\t{%2, %0|%0, %2}
2197 movlpd\t{%2, %0|%0, %2}
2198 movsd\t{%2, %0|%0, %2}
2199 shufpd\t{$2, %2, %0|%0, %2, 2}
2200 movhpd\t{%H1, %0|%0, %H1}
2202 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2203 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2206 [(set (match_operand:V2DF 0 "memory_operand" "")
2208 (match_operand:DF 1 "register_operand" "")
2209 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2210 "TARGET_SSE2 && reload_completed"
2211 [(set (match_dup 0) (match_dup 1))]
2213 operands[0] = adjust_address (operands[0], DFmode, 8);
2216 (define_insn "sse2_movsd"
2217 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2219 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2220 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2224 movsd\t{%2, %0|%0, %2}
2225 movlpd\t{%2, %0|%0, %2}
2226 movlpd\t{%2, %0|%0, %2}
2227 shufpd\t{$2, %2, %0|%0, %2, 2}
2228 movhps\t{%H1, %0|%0, %H1
2229 movhps\t{%1, %H0|%H0, %1"
2230 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2231 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2233 (define_insn "*vec_dupv2df_sse3"
2234 [(set (match_operand:V2DF 0 "register_operand" "=x")
2236 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2238 "movddup\t{%1, %0|%0, %1}"
2239 [(set_attr "type" "sselog1")
2240 (set_attr "mode" "DF")])
2242 (define_insn "*vec_dupv2df"
2243 [(set (match_operand:V2DF 0 "register_operand" "=x")
2245 (match_operand:DF 1 "register_operand" "0")))]
2248 [(set_attr "type" "sselog1")
2249 (set_attr "mode" "V4SF")])
2251 (define_insn "*vec_concatv2df_sse3"
2252 [(set (match_operand:V2DF 0 "register_operand" "=x")
2254 (match_operand:DF 1 "nonimmediate_operand" "xm")
2257 "movddup\t{%1, %0|%0, %1}"
2258 [(set_attr "type" "sselog1")
2259 (set_attr "mode" "DF")])
2261 (define_insn "*vec_concatv2df"
2262 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2264 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2265 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2268 unpcklpd\t{%2, %0|%0, %2}
2269 movhpd\t{%2, %0|%0, %2}
2270 movsd\t{%1, %0|%0, %1}
2271 movlhps\t{%2, %0|%0, %2}
2272 movhps\t{%2, %0|%0, %2}"
2273 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2274 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2276 (define_expand "vec_setv2df"
2277 [(match_operand:V2DF 0 "register_operand" "")
2278 (match_operand:DF 1 "register_operand" "")
2279 (match_operand 2 "const_int_operand" "")]
2282 ix86_expand_vector_set (false, operands[0], operands[1],
2283 INTVAL (operands[2]));
2287 (define_expand "vec_extractv2df"
2288 [(match_operand:DF 0 "register_operand" "")
2289 (match_operand:V2DF 1 "register_operand" "")
2290 (match_operand 2 "const_int_operand" "")]
2293 ix86_expand_vector_extract (false, operands[0], operands[1],
2294 INTVAL (operands[2]));
2298 (define_expand "vec_initv2df"
2299 [(match_operand:V2DF 0 "register_operand" "")
2300 (match_operand 1 "" "")]
2303 ix86_expand_vector_init (false, operands[0], operands[1]);
2307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2309 ;; Parallel integral arithmetic
2311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2313 (define_expand "neg<mode>2"
2314 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2317 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2319 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2321 (define_expand "add<mode>3"
2322 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2323 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2324 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2326 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2328 (define_insn "*add<mode>3"
2329 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2331 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2332 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2333 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2334 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2335 [(set_attr "type" "sseiadd")
2336 (set_attr "mode" "TI")])
2338 (define_insn "sse2_ssadd<mode>3"
2339 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2341 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2342 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2343 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2344 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2345 [(set_attr "type" "sseiadd")
2346 (set_attr "mode" "TI")])
2348 (define_insn "sse2_usadd<mode>3"
2349 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2351 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2352 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2353 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2354 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2355 [(set_attr "type" "sseiadd")
2356 (set_attr "mode" "TI")])
2358 (define_expand "sub<mode>3"
2359 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2360 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2361 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2363 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2365 (define_insn "*sub<mode>3"
2366 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2368 (match_operand:SSEMODEI 1 "register_operand" "0")
2369 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2371 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2372 [(set_attr "type" "sseiadd")
2373 (set_attr "mode" "TI")])
2375 (define_insn "sse2_sssub<mode>3"
2376 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2378 (match_operand:SSEMODE12 1 "register_operand" "0")
2379 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2381 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2382 [(set_attr "type" "sseiadd")
2383 (set_attr "mode" "TI")])
2385 (define_insn "sse2_ussub<mode>3"
2386 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2388 (match_operand:SSEMODE12 1 "register_operand" "0")
2389 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2391 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2392 [(set_attr "type" "sseiadd")
2393 (set_attr "mode" "TI")])
2395 (define_expand "mulv16qi3"
2396 [(set (match_operand:V16QI 0 "register_operand" "")
2397 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2398 (match_operand:V16QI 2 "register_operand" "")))]
2404 for (i = 0; i < 12; ++i)
2405 t[i] = gen_reg_rtx (V16QImode);
2407 /* Unpack data such that we've got a source byte in each low byte of
2408 each word. We don't care what goes into the high byte of each word.
2409 Rather than trying to get zero in there, most convenient is to let
2410 it be a copy of the low byte. */
2411 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2412 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2413 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2414 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2416 /* Multiply words. The end-of-line annotations here give a picture of what
2417 the output of that instruction looks like. Dot means don't care; the
2418 letters are the bytes of the result with A being the most significant. */
2419 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2420 gen_lowpart (V8HImode, t[0]),
2421 gen_lowpart (V8HImode, t[1])));
2422 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2423 gen_lowpart (V8HImode, t[2]),
2424 gen_lowpart (V8HImode, t[3])));
2426 /* Extract the relevant bytes and merge them back together. */
2427 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2428 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2429 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2430 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2431 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2432 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2435 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2439 (define_expand "mulv8hi3"
2440 [(set (match_operand:V8HI 0 "register_operand" "")
2441 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2442 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2444 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2446 (define_insn "*mulv8hi3"
2447 [(set (match_operand:V8HI 0 "register_operand" "=x")
2448 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2449 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2450 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2451 "pmullw\t{%2, %0|%0, %2}"
2452 [(set_attr "type" "sseimul")
2453 (set_attr "mode" "TI")])
2455 (define_insn "sse2_smulv8hi3_highpart"
2456 [(set (match_operand:V8HI 0 "register_operand" "=x")
2461 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2463 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2465 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2466 "pmulhw\t{%2, %0|%0, %2}"
2467 [(set_attr "type" "sseimul")
2468 (set_attr "mode" "TI")])
2470 (define_insn "sse2_umulv8hi3_highpart"
2471 [(set (match_operand:V8HI 0 "register_operand" "=x")
2476 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2478 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2480 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2481 "pmulhuw\t{%2, %0|%0, %2}"
2482 [(set_attr "type" "sseimul")
2483 (set_attr "mode" "TI")])
2485 (define_insn "sse2_umulv2siv2di3"
2486 [(set (match_operand:V2DI 0 "register_operand" "=x")
2490 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2491 (parallel [(const_int 0) (const_int 2)])))
2494 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2495 (parallel [(const_int 0) (const_int 2)])))))]
2496 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2497 "pmuludq\t{%2, %0|%0, %2}"
2498 [(set_attr "type" "sseimul")
2499 (set_attr "mode" "TI")])
2501 (define_insn "sse2_pmaddwd"
2502 [(set (match_operand:V4SI 0 "register_operand" "=x")
2507 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2508 (parallel [(const_int 0)
2514 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2515 (parallel [(const_int 0)
2521 (vec_select:V4HI (match_dup 1)
2522 (parallel [(const_int 1)
2527 (vec_select:V4HI (match_dup 2)
2528 (parallel [(const_int 1)
2531 (const_int 7)]))))))]
2533 "pmaddwd\t{%2, %0|%0, %2}"
2534 [(set_attr "type" "sseiadd")
2535 (set_attr "mode" "TI")])
2537 (define_expand "mulv4si3"
2538 [(set (match_operand:V4SI 0 "register_operand" "")
2539 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2540 (match_operand:V4SI 2 "register_operand" "")))]
2543 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2549 t1 = gen_reg_rtx (V4SImode);
2550 t2 = gen_reg_rtx (V4SImode);
2551 t3 = gen_reg_rtx (V4SImode);
2552 t4 = gen_reg_rtx (V4SImode);
2553 t5 = gen_reg_rtx (V4SImode);
2554 t6 = gen_reg_rtx (V4SImode);
2555 thirtytwo = GEN_INT (32);
2557 /* Multiply elements 2 and 0. */
2558 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2560 /* Shift both input vectors down one element, so that elements 3 and 1
2561 are now in the slots for elements 2 and 0. For K8, at least, this is
2562 faster than using a shuffle. */
2563 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2564 gen_lowpart (TImode, op1), thirtytwo));
2565 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2566 gen_lowpart (TImode, op2), thirtytwo));
2568 /* Multiply elements 3 and 1. */
2569 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2571 /* Move the results in element 2 down to element 1; we don't care what
2572 goes in elements 2 and 3. */
2573 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2574 const0_rtx, const0_rtx));
2575 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2576 const0_rtx, const0_rtx));
2578 /* Merge the parts back together. */
2579 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2583 (define_expand "mulv2di3"
2584 [(set (match_operand:V2DI 0 "register_operand" "")
2585 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2586 (match_operand:V2DI 2 "register_operand" "")))]
2589 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2595 t1 = gen_reg_rtx (V2DImode);
2596 t2 = gen_reg_rtx (V2DImode);
2597 t3 = gen_reg_rtx (V2DImode);
2598 t4 = gen_reg_rtx (V2DImode);
2599 t5 = gen_reg_rtx (V2DImode);
2600 t6 = gen_reg_rtx (V2DImode);
2601 thirtytwo = GEN_INT (32);
2603 /* Multiply low parts. */
2604 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2605 gen_lowpart (V4SImode, op2)));
2607 /* Shift input vectors left 32 bits so we can multiply high parts. */
2608 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2609 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2611 /* Multiply high parts by low parts. */
2612 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2613 gen_lowpart (V4SImode, t3)));
2614 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2615 gen_lowpart (V4SImode, t2)));
2617 /* Shift them back. */
2618 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2619 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2621 /* Add the three parts together. */
2622 emit_insn (gen_addv2di3 (t6, t1, t4));
2623 emit_insn (gen_addv2di3 (op0, t6, t5));
2627 (define_insn "ashr<mode>3"
2628 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2630 (match_operand:SSEMODE24 1 "register_operand" "0")
2631 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2633 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2634 [(set_attr "type" "sseishft")
2635 (set_attr "mode" "TI")])
2637 (define_insn "lshr<mode>3"
2638 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2639 (lshiftrt:SSEMODE248
2640 (match_operand:SSEMODE248 1 "register_operand" "0")
2641 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2643 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2644 [(set_attr "type" "sseishft")
2645 (set_attr "mode" "TI")])
2647 (define_insn "ashl<mode>3"
2648 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2650 (match_operand:SSEMODE248 1 "register_operand" "0")
2651 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2653 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2654 [(set_attr "type" "sseishft")
2655 (set_attr "mode" "TI")])
2657 (define_insn "sse2_ashlti3"
2658 [(set (match_operand:TI 0 "register_operand" "=x")
2659 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2660 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2663 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2664 return "pslldq\t{%2, %0|%0, %2}";
2666 [(set_attr "type" "sseishft")
2667 (set_attr "mode" "TI")])
2669 (define_insn "sse2_lshrti3"
2670 [(set (match_operand:TI 0 "register_operand" "=x")
2671 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2672 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2675 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2676 return "psrldq\t{%2, %0|%0, %2}";
2678 [(set_attr "type" "sseishft")
2679 (set_attr "mode" "TI")])
2681 (define_expand "umaxv16qi3"
2682 [(set (match_operand:V16QI 0 "register_operand" "")
2683 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2684 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2686 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2688 (define_insn "*umaxv16qi3"
2689 [(set (match_operand:V16QI 0 "register_operand" "=x")
2690 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2691 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2692 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2693 "pmaxub\t{%2, %0|%0, %2}"
2694 [(set_attr "type" "sseiadd")
2695 (set_attr "mode" "TI")])
2697 (define_expand "smaxv8hi3"
2698 [(set (match_operand:V8HI 0 "register_operand" "")
2699 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2700 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2702 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2704 (define_insn "*smaxv8hi3"
2705 [(set (match_operand:V8HI 0 "register_operand" "=x")
2706 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2707 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2708 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2709 "pmaxsw\t{%2, %0|%0, %2}"
2710 [(set_attr "type" "sseiadd")
2711 (set_attr "mode" "TI")])
2713 (define_expand "uminv16qi3"
2714 [(set (match_operand:V16QI 0 "register_operand" "")
2715 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2716 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2718 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2720 (define_insn "*uminv16qi3"
2721 [(set (match_operand:V16QI 0 "register_operand" "=x")
2722 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2723 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2724 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2725 "pminub\t{%2, %0|%0, %2}"
2726 [(set_attr "type" "sseiadd")
2727 (set_attr "mode" "TI")])
2729 (define_expand "sminv8hi3"
2730 [(set (match_operand:V8HI 0 "register_operand" "")
2731 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2732 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2734 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2736 (define_insn "*sminv8hi3"
2737 [(set (match_operand:V8HI 0 "register_operand" "=x")
2738 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2739 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2740 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2741 "pminsw\t{%2, %0|%0, %2}"
2742 [(set_attr "type" "sseiadd")
2743 (set_attr "mode" "TI")])
2745 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2747 ;; Parallel integral comparisons
2749 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2751 (define_insn "sse2_eq<mode>3"
2752 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2754 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2755 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2756 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2757 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2758 [(set_attr "type" "ssecmp")
2759 (set_attr "mode" "TI")])
2761 (define_insn "sse2_gt<mode>3"
2762 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2764 (match_operand:SSEMODE124 1 "register_operand" "0")
2765 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2767 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2768 [(set_attr "type" "ssecmp")
2769 (set_attr "mode" "TI")])
2771 (define_expand "vcond<mode>"
2772 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2773 (if_then_else:SSEMODE124
2774 (match_operator 3 ""
2775 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2776 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2777 (match_operand:SSEMODE124 1 "general_operand" "")
2778 (match_operand:SSEMODE124 2 "general_operand" "")))]
2781 if (ix86_expand_int_vcond (operands, false))
2787 (define_expand "vcondu<mode>"
2788 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2789 (if_then_else:SSEMODE12
2790 (match_operator 3 ""
2791 [(match_operand:SSEMODE12 4 "nonimmediate_operand" "")
2792 (match_operand:SSEMODE12 5 "nonimmediate_operand" "")])
2793 (match_operand:SSEMODE12 1 "general_operand" "")
2794 (match_operand:SSEMODE12 2 "general_operand" "")))]
2797 if (ix86_expand_int_vcond (operands, true))
2803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2805 ;; Parallel integral logical operations
2807 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2809 (define_expand "one_cmpl<mode>2"
2810 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2811 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2815 int i, n = GET_MODE_NUNITS (<MODE>mode);
2816 rtvec v = rtvec_alloc (n);
2818 for (i = 0; i < n; ++i)
2819 RTVEC_ELT (v, i) = constm1_rtx;
2821 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
2824 (define_expand "and<mode>3"
2825 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2826 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2827 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2829 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
2831 (define_insn "*and<mode>3"
2832 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2834 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2835 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2836 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
2837 "pand\t{%2, %0|%0, %2}"
2838 [(set_attr "type" "sselog")
2839 (set_attr "mode" "TI")])
2841 (define_insn "sse2_nand<mode>3"
2842 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2844 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
2845 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2847 "pandn\t{%2, %0|%0, %2}"
2848 [(set_attr "type" "sselog")
2849 (set_attr "mode" "TI")])
2851 (define_expand "ior<mode>3"
2852 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2853 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2854 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2856 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
2858 (define_insn "*ior<mode>3"
2859 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2861 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2862 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2863 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
2864 "por\t{%2, %0|%0, %2}"
2865 [(set_attr "type" "sselog")
2866 (set_attr "mode" "TI")])
2868 (define_expand "xor<mode>3"
2869 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2870 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2871 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2873 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
2875 (define_insn "*xor<mode>3"
2876 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2878 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2879 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2880 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
2881 "pxor\t{%2, %0|%0, %2}"
2882 [(set_attr "type" "sselog")
2883 (set_attr "mode" "TI")])
2885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2887 ;; Parallel integral element swizzling
2889 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2891 (define_insn "sse2_packsswb"
2892 [(set (match_operand:V16QI 0 "register_operand" "=x")
2895 (match_operand:V8HI 1 "register_operand" "0"))
2897 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
2899 "packsswb\t{%2, %0|%0, %2}"
2900 [(set_attr "type" "sselog")
2901 (set_attr "mode" "TI")])
2903 (define_insn "sse2_packssdw"
2904 [(set (match_operand:V8HI 0 "register_operand" "=x")
2907 (match_operand:V4SI 1 "register_operand" "0"))
2909 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
2911 "packssdw\t{%2, %0|%0, %2}"
2912 [(set_attr "type" "sselog")
2913 (set_attr "mode" "TI")])
2915 (define_insn "sse2_packuswb"
2916 [(set (match_operand:V16QI 0 "register_operand" "=x")
2919 (match_operand:V8HI 1 "register_operand" "0"))
2921 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
2923 "packuswb\t{%2, %0|%0, %2}"
2924 [(set_attr "type" "sselog")
2925 (set_attr "mode" "TI")])
2927 (define_insn "sse2_punpckhbw"
2928 [(set (match_operand:V16QI 0 "register_operand" "=x")
2931 (match_operand:V16QI 1 "register_operand" "0")
2932 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
2933 (parallel [(const_int 8) (const_int 24)
2934 (const_int 9) (const_int 25)
2935 (const_int 10) (const_int 26)
2936 (const_int 11) (const_int 27)
2937 (const_int 12) (const_int 28)
2938 (const_int 13) (const_int 29)
2939 (const_int 14) (const_int 30)
2940 (const_int 15) (const_int 31)])))]
2942 "punpckhbw\t{%2, %0|%0, %2}"
2943 [(set_attr "type" "sselog")
2944 (set_attr "mode" "TI")])
2946 (define_insn "sse2_punpcklbw"
2947 [(set (match_operand:V16QI 0 "register_operand" "=x")
2950 (match_operand:V16QI 1 "register_operand" "0")
2951 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
2952 (parallel [(const_int 0) (const_int 16)
2953 (const_int 1) (const_int 17)
2954 (const_int 2) (const_int 18)
2955 (const_int 3) (const_int 19)
2956 (const_int 4) (const_int 20)
2957 (const_int 5) (const_int 21)
2958 (const_int 6) (const_int 22)
2959 (const_int 7) (const_int 23)])))]
2961 "punpcklbw\t{%2, %0|%0, %2}"
2962 [(set_attr "type" "sselog")
2963 (set_attr "mode" "TI")])
2965 (define_insn "sse2_punpckhwd"
2966 [(set (match_operand:V8HI 0 "register_operand" "=x")
2969 (match_operand:V8HI 1 "register_operand" "0")
2970 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
2971 (parallel [(const_int 4) (const_int 12)
2972 (const_int 5) (const_int 13)
2973 (const_int 6) (const_int 14)
2974 (const_int 7) (const_int 15)])))]
2976 "punpckhwd\t{%2, %0|%0, %2}"
2977 [(set_attr "type" "sselog")
2978 (set_attr "mode" "TI")])
2980 (define_insn "sse2_punpcklwd"
2981 [(set (match_operand:V8HI 0 "register_operand" "=x")
2984 (match_operand:V8HI 1 "register_operand" "0")
2985 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
2986 (parallel [(const_int 0) (const_int 8)
2987 (const_int 1) (const_int 9)
2988 (const_int 2) (const_int 10)
2989 (const_int 3) (const_int 11)])))]
2991 "punpcklwd\t{%2, %0|%0, %2}"
2992 [(set_attr "type" "sselog")
2993 (set_attr "mode" "TI")])
2995 (define_insn "sse2_punpckhdq"
2996 [(set (match_operand:V4SI 0 "register_operand" "=x")
2999 (match_operand:V4SI 1 "register_operand" "0")
3000 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3001 (parallel [(const_int 2) (const_int 6)
3002 (const_int 3) (const_int 7)])))]
3004 "punpckhdq\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "sselog")
3006 (set_attr "mode" "TI")])
3008 (define_insn "sse2_punpckldq"
3009 [(set (match_operand:V4SI 0 "register_operand" "=x")
3012 (match_operand:V4SI 1 "register_operand" "0")
3013 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3014 (parallel [(const_int 0) (const_int 4)
3015 (const_int 1) (const_int 5)])))]
3017 "punpckldq\t{%2, %0|%0, %2}"
3018 [(set_attr "type" "sselog")
3019 (set_attr "mode" "TI")])
3021 (define_insn "sse2_punpckhqdq"
3022 [(set (match_operand:V2DI 0 "register_operand" "=x")
3025 (match_operand:V2DI 1 "register_operand" "0")
3026 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3027 (parallel [(const_int 1)
3030 "punpckhqdq\t{%2, %0|%0, %2}"
3031 [(set_attr "type" "sselog")
3032 (set_attr "mode" "TI")])
3034 (define_insn "sse2_punpcklqdq"
3035 [(set (match_operand:V2DI 0 "register_operand" "=x")
3038 (match_operand:V2DI 1 "register_operand" "0")
3039 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3040 (parallel [(const_int 0)
3043 "punpcklqdq\t{%2, %0|%0, %2}"
3044 [(set_attr "type" "sselog")
3045 (set_attr "mode" "TI")])
3047 (define_expand "sse2_pinsrw"
3048 [(set (match_operand:V8HI 0 "register_operand" "")
3051 (match_operand:SI 2 "nonimmediate_operand" ""))
3052 (match_operand:V8HI 1 "register_operand" "")
3053 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3056 operands[2] = gen_lowpart (HImode, operands[2]);
3057 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3060 (define_insn "*sse2_pinsrw"
3061 [(set (match_operand:V8HI 0 "register_operand" "=x")
3064 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3065 (match_operand:V8HI 1 "register_operand" "0")
3066 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3069 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3070 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3072 [(set_attr "type" "sselog")
3073 (set_attr "mode" "TI")])
3075 (define_insn "sse2_pextrw"
3076 [(set (match_operand:SI 0 "register_operand" "=r")
3079 (match_operand:V8HI 1 "register_operand" "x")
3080 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3082 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3083 [(set_attr "type" "sselog")
3084 (set_attr "mode" "TI")])
3086 (define_expand "sse2_pshufd"
3087 [(match_operand:V4SI 0 "register_operand" "")
3088 (match_operand:V4SI 1 "nonimmediate_operand" "")
3089 (match_operand:SI 2 "const_int_operand" "")]
3092 int mask = INTVAL (operands[2]);
3093 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3094 GEN_INT ((mask >> 0) & 3),
3095 GEN_INT ((mask >> 2) & 3),
3096 GEN_INT ((mask >> 4) & 3),
3097 GEN_INT ((mask >> 6) & 3)));
3101 (define_insn "sse2_pshufd_1"
3102 [(set (match_operand:V4SI 0 "register_operand" "=x")
3104 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3105 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3106 (match_operand 3 "const_0_to_3_operand" "")
3107 (match_operand 4 "const_0_to_3_operand" "")
3108 (match_operand 5 "const_0_to_3_operand" "")])))]
3112 mask |= INTVAL (operands[2]) << 0;
3113 mask |= INTVAL (operands[3]) << 2;
3114 mask |= INTVAL (operands[4]) << 4;
3115 mask |= INTVAL (operands[5]) << 6;
3116 operands[2] = GEN_INT (mask);
3118 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3120 [(set_attr "type" "sselog1")
3121 (set_attr "mode" "TI")])
3123 (define_expand "sse2_pshuflw"
3124 [(match_operand:V8HI 0 "register_operand" "")
3125 (match_operand:V8HI 1 "nonimmediate_operand" "")
3126 (match_operand:SI 2 "const_int_operand" "")]
3129 int mask = INTVAL (operands[2]);
3130 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3131 GEN_INT ((mask >> 0) & 3),
3132 GEN_INT ((mask >> 2) & 3),
3133 GEN_INT ((mask >> 4) & 3),
3134 GEN_INT ((mask >> 6) & 3)));
3138 (define_insn "sse2_pshuflw_1"
3139 [(set (match_operand:V8HI 0 "register_operand" "=x")
3141 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3142 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3143 (match_operand 3 "const_0_to_3_operand" "")
3144 (match_operand 4 "const_0_to_3_operand" "")
3145 (match_operand 5 "const_0_to_3_operand" "")
3153 mask |= INTVAL (operands[2]) << 0;
3154 mask |= INTVAL (operands[3]) << 2;
3155 mask |= INTVAL (operands[4]) << 4;
3156 mask |= INTVAL (operands[5]) << 6;
3157 operands[2] = GEN_INT (mask);
3159 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3161 [(set_attr "type" "sselog")
3162 (set_attr "mode" "TI")])
3164 (define_expand "sse2_pshufhw"
3165 [(match_operand:V8HI 0 "register_operand" "")
3166 (match_operand:V8HI 1 "nonimmediate_operand" "")
3167 (match_operand:SI 2 "const_int_operand" "")]
3170 int mask = INTVAL (operands[2]);
3171 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3172 GEN_INT (((mask >> 0) & 3) + 4),
3173 GEN_INT (((mask >> 2) & 3) + 4),
3174 GEN_INT (((mask >> 4) & 3) + 4),
3175 GEN_INT (((mask >> 6) & 3) + 4)));
3179 (define_insn "sse2_pshufhw_1"
3180 [(set (match_operand:V8HI 0 "register_operand" "=x")
3182 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3183 (parallel [(const_int 0)
3187 (match_operand 2 "const_4_to_7_operand" "")
3188 (match_operand 3 "const_4_to_7_operand" "")
3189 (match_operand 4 "const_4_to_7_operand" "")
3190 (match_operand 5 "const_4_to_7_operand" "")])))]
3194 mask |= (INTVAL (operands[2]) - 4) << 0;
3195 mask |= (INTVAL (operands[3]) - 4) << 2;
3196 mask |= (INTVAL (operands[4]) - 4) << 4;
3197 mask |= (INTVAL (operands[5]) - 4) << 6;
3198 operands[2] = GEN_INT (mask);
3200 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3202 [(set_attr "type" "sselog")
3203 (set_attr "mode" "TI")])
3205 (define_expand "sse2_loadd"
3206 [(set (match_operand:V4SI 0 "register_operand" "")
3209 (match_operand:SI 1 "nonimmediate_operand" ""))
3213 "operands[2] = CONST0_RTX (V4SImode);")
3215 (define_insn "sse2_loadld"
3216 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3219 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3220 (match_operand:V4SI 1 "vector_move_operand" " C,C,0")
3224 movd\t{%2, %0|%0, %2}
3225 movss\t{%2, %0|%0, %2}
3226 movss\t{%2, %0|%0, %2}"
3227 [(set_attr "type" "ssemov")
3228 (set_attr "mode" "TI,V4SF,SF")])
3230 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3231 ;; be taken into account, and movdi isn't fully populated even without.
3232 (define_insn_and_split "sse2_stored"
3233 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3235 (match_operand:V4SI 1 "register_operand" "x")
3236 (parallel [(const_int 0)])))]
3239 "&& reload_completed"
3240 [(set (match_dup 0) (match_dup 1))]
3242 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3245 (define_expand "sse_storeq"
3246 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3248 (match_operand:V2DI 1 "register_operand" "")
3249 (parallel [(const_int 0)])))]
3253 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3254 ;; be taken into account, and movdi isn't fully populated even without.
3255 (define_insn "*sse2_storeq"
3256 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3258 (match_operand:V2DI 1 "register_operand" "x")
3259 (parallel [(const_int 0)])))]
3264 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3266 (match_operand:V2DI 1 "register_operand" "")
3267 (parallel [(const_int 0)])))]
3268 "TARGET_SSE && reload_completed"
3269 [(set (match_dup 0) (match_dup 1))]
3271 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3274 (define_insn "*vec_dupv4si"
3275 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3277 (match_operand:SI 1 "register_operand" " Y,0")))]
3280 pshufd\t{$0, %1, %0|%0, %1, 0}
3281 shufps\t{$0, %0, %0|%0, %0, 0}"
3282 [(set_attr "type" "sselog1")
3283 (set_attr "mode" "TI,V4SF")])
3285 (define_insn "*vec_dupv2di"
3286 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3288 (match_operand:DI 1 "register_operand" " 0,0")))]
3293 [(set_attr "type" "sselog1,ssemov")
3294 (set_attr "mode" "TI,V4SF")])
3296 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3297 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3298 ;; alternatives pretty much forces the MMX alternative to be chosen.
3299 (define_insn "*sse2_concatv2si"
3300 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3302 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3303 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3306 punpckldq\t{%2, %0|%0, %2}
3307 movd\t{%1, %0|%0, %1}
3308 punpckldq\t{%2, %0|%0, %2}
3309 movd\t{%1, %0|%0, %1}"
3310 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3311 (set_attr "mode" "TI,TI,DI,DI")])
3313 (define_insn "*sse1_concatv2si"
3314 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3316 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3317 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3320 unpcklps\t{%2, %0|%0, %2}
3321 movss\t{%1, %0|%0, %1}
3322 punpckldq\t{%2, %0|%0, %2}
3323 movd\t{%1, %0|%0, %1}"
3324 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3325 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3327 (define_insn "*vec_concatv4si_1"
3328 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3330 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3331 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3334 punpcklqdq\t{%2, %0|%0, %2}
3335 movlhps\t{%2, %0|%0, %2}
3336 movhps\t{%2, %0|%0, %2}"
3337 [(set_attr "type" "sselog,ssemov,ssemov")
3338 (set_attr "mode" "TI,V4SF,V2SF")])
3340 (define_insn "*vec_concatv2di"
3341 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3343 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3344 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3347 movq\t{%1, %0|%0, %1}
3348 movq2dq\t{%1, %0|%0, %1}
3349 punpcklqdq\t{%2, %0|%0, %2}
3350 movlhps\t{%2, %0|%0, %2}
3351 movhps\t{%2, %0|%0, %2}
3352 movlps\t{%1, %0|%0, %1}"
3353 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3354 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3356 (define_expand "vec_setv2di"
3357 [(match_operand:V2DI 0 "register_operand" "")
3358 (match_operand:DI 1 "register_operand" "")
3359 (match_operand 2 "const_int_operand" "")]
3362 ix86_expand_vector_set (false, operands[0], operands[1],
3363 INTVAL (operands[2]));
3367 (define_expand "vec_extractv2di"
3368 [(match_operand:DI 0 "register_operand" "")
3369 (match_operand:V2DI 1 "register_operand" "")
3370 (match_operand 2 "const_int_operand" "")]
3373 ix86_expand_vector_extract (false, operands[0], operands[1],
3374 INTVAL (operands[2]));
3378 (define_expand "vec_initv2di"
3379 [(match_operand:V2DI 0 "register_operand" "")
3380 (match_operand 1 "" "")]
3383 ix86_expand_vector_init (false, operands[0], operands[1]);
3387 (define_expand "vec_setv4si"
3388 [(match_operand:V4SI 0 "register_operand" "")
3389 (match_operand:SI 1 "register_operand" "")
3390 (match_operand 2 "const_int_operand" "")]
3393 ix86_expand_vector_set (false, operands[0], operands[1],
3394 INTVAL (operands[2]));
3398 (define_expand "vec_extractv4si"
3399 [(match_operand:SI 0 "register_operand" "")
3400 (match_operand:V4SI 1 "register_operand" "")
3401 (match_operand 2 "const_int_operand" "")]
3404 ix86_expand_vector_extract (false, operands[0], operands[1],
3405 INTVAL (operands[2]));
3409 (define_expand "vec_initv4si"
3410 [(match_operand:V4SI 0 "register_operand" "")
3411 (match_operand 1 "" "")]
3414 ix86_expand_vector_init (false, operands[0], operands[1]);
3418 (define_expand "vec_setv8hi"
3419 [(match_operand:V8HI 0 "register_operand" "")
3420 (match_operand:HI 1 "register_operand" "")
3421 (match_operand 2 "const_int_operand" "")]
3424 ix86_expand_vector_set (false, operands[0], operands[1],
3425 INTVAL (operands[2]));
3429 (define_expand "vec_extractv8hi"
3430 [(match_operand:HI 0 "register_operand" "")
3431 (match_operand:V8HI 1 "register_operand" "")
3432 (match_operand 2 "const_int_operand" "")]
3435 ix86_expand_vector_extract (false, operands[0], operands[1],
3436 INTVAL (operands[2]));
3440 (define_expand "vec_initv8hi"
3441 [(match_operand:V8HI 0 "register_operand" "")
3442 (match_operand 1 "" "")]
3445 ix86_expand_vector_init (false, operands[0], operands[1]);
3449 (define_expand "vec_setv16qi"
3450 [(match_operand:V16QI 0 "register_operand" "")
3451 (match_operand:QI 1 "register_operand" "")
3452 (match_operand 2 "const_int_operand" "")]
3455 ix86_expand_vector_set (false, operands[0], operands[1],
3456 INTVAL (operands[2]));
3460 (define_expand "vec_extractv16qi"
3461 [(match_operand:QI 0 "register_operand" "")
3462 (match_operand:V16QI 1 "register_operand" "")
3463 (match_operand 2 "const_int_operand" "")]
3466 ix86_expand_vector_extract (false, operands[0], operands[1],
3467 INTVAL (operands[2]));
3471 (define_expand "vec_initv16qi"
3472 [(match_operand:V16QI 0 "register_operand" "")
3473 (match_operand 1 "" "")]
3476 ix86_expand_vector_init (false, operands[0], operands[1]);
3480 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3484 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3486 (define_insn "sse2_uavgv16qi3"
3487 [(set (match_operand:V16QI 0 "register_operand" "=x")
3493 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3495 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3496 (const_vector:V16QI [(const_int 1) (const_int 1)
3497 (const_int 1) (const_int 1)
3498 (const_int 1) (const_int 1)
3499 (const_int 1) (const_int 1)
3500 (const_int 1) (const_int 1)
3501 (const_int 1) (const_int 1)
3502 (const_int 1) (const_int 1)
3503 (const_int 1) (const_int 1)]))
3505 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3506 "pavgb\t{%2, %0|%0, %2}"
3507 [(set_attr "type" "sseiadd")
3508 (set_attr "mode" "TI")])
3510 (define_insn "sse2_uavgv8hi3"
3511 [(set (match_operand:V8HI 0 "register_operand" "=x")
3517 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3519 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3520 (const_vector:V8HI [(const_int 1) (const_int 1)
3521 (const_int 1) (const_int 1)
3522 (const_int 1) (const_int 1)
3523 (const_int 1) (const_int 1)]))
3525 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3526 "pavgw\t{%2, %0|%0, %2}"
3527 [(set_attr "type" "sseiadd")
3528 (set_attr "mode" "TI")])
3530 ;; The correct representation for this is absolutely enormous, and
3531 ;; surely not generally useful.
3532 (define_insn "sse2_psadbw"
3533 [(set (match_operand:V2DI 0 "register_operand" "=x")
3534 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3535 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3538 "psadbw\t{%2, %0|%0, %2}"
3539 [(set_attr "type" "sseiadd")
3540 (set_attr "mode" "TI")])
3542 (define_insn "sse_movmskps"
3543 [(set (match_operand:SI 0 "register_operand" "=r")
3544 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3547 "movmskps\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "ssecvt")
3549 (set_attr "mode" "V4SF")])
3551 (define_insn "sse2_movmskpd"
3552 [(set (match_operand:SI 0 "register_operand" "=r")
3553 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3556 "movmskpd\t{%1, %0|%0, %1}"
3557 [(set_attr "type" "ssecvt")
3558 (set_attr "mode" "V2DF")])
3560 (define_insn "sse2_pmovmskb"
3561 [(set (match_operand:SI 0 "register_operand" "=r")
3562 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3565 "pmovmskb\t{%1, %0|%0, %1}"
3566 [(set_attr "type" "ssecvt")
3567 (set_attr "mode" "V2DF")])
3569 (define_expand "sse2_maskmovdqu"
3570 [(set (match_operand:V16QI 0 "memory_operand" "")
3571 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3572 (match_operand:V16QI 2 "register_operand" "x")
3578 (define_insn "*sse2_maskmovdqu"
3579 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3580 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3581 (match_operand:V16QI 2 "register_operand" "x")
3582 (mem:V16QI (match_dup 0))]
3584 "TARGET_SSE2 && !TARGET_64BIT"
3585 ;; @@@ check ordering of operands in intel/nonintel syntax
3586 "maskmovdqu\t{%2, %1|%1, %2}"
3587 [(set_attr "type" "ssecvt")
3588 (set_attr "mode" "TI")])
3590 (define_insn "*sse2_maskmovdqu_rex64"
3591 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3592 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3593 (match_operand:V16QI 2 "register_operand" "x")
3594 (mem:V16QI (match_dup 0))]
3596 "TARGET_SSE2 && TARGET_64BIT"
3597 ;; @@@ check ordering of operands in intel/nonintel syntax
3598 "maskmovdqu\t{%2, %1|%1, %2}"
3599 [(set_attr "type" "ssecvt")
3600 (set_attr "mode" "TI")])
3602 (define_insn "sse_ldmxcsr"
3603 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3607 [(set_attr "type" "sse")
3608 (set_attr "memory" "load")])
3610 (define_insn "sse_stmxcsr"
3611 [(set (match_operand:SI 0 "memory_operand" "=m")
3612 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3615 [(set_attr "type" "sse")
3616 (set_attr "memory" "store")])
3618 (define_expand "sse_sfence"
3620 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3621 "TARGET_SSE || TARGET_3DNOW_A"
3623 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3624 MEM_VOLATILE_P (operands[0]) = 1;
3627 (define_insn "*sse_sfence"
3628 [(set (match_operand:BLK 0 "" "")
3629 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3630 "TARGET_SSE || TARGET_3DNOW_A"
3632 [(set_attr "type" "sse")
3633 (set_attr "memory" "unknown")])
3635 (define_insn "sse2_clflush"
3636 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3640 [(set_attr "type" "sse")
3641 (set_attr "memory" "unknown")])
3643 (define_expand "sse2_mfence"
3645 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3648 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3649 MEM_VOLATILE_P (operands[0]) = 1;
3652 (define_insn "*sse2_mfence"
3653 [(set (match_operand:BLK 0 "" "")
3654 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3657 [(set_attr "type" "sse")
3658 (set_attr "memory" "unknown")])
3660 (define_expand "sse2_lfence"
3662 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3665 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3666 MEM_VOLATILE_P (operands[0]) = 1;
3669 (define_insn "*sse2_lfence"
3670 [(set (match_operand:BLK 0 "" "")
3671 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3674 [(set_attr "type" "sse")
3675 (set_attr "memory" "unknown")])
3677 (define_insn "sse3_mwait"
3678 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3679 (match_operand:SI 1 "register_operand" "c")]
3683 [(set_attr "length" "3")])
3685 (define_insn "sse3_monitor"
3686 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3687 (match_operand:SI 1 "register_operand" "c")
3688 (match_operand:SI 2 "register_operand" "d")]
3691 "monitor\t%0, %1, %2"
3692 [(set_attr "length" "3")])