1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 && (register_operand (operands[0], <MODE>mode)
64 || register_operand (operands[1], <MODE>mode))"
66 switch (which_alternative)
69 return standard_sse_constant_opcode (insn, operands[1]);
72 if (get_attr_mode (insn) == MODE_V4SF)
73 return "movaps\t{%1, %0|%0, %1}";
75 return "movdqa\t{%1, %0|%0, %1}";
80 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
84 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
85 (and (eq_attr "alternative" "2")
86 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
89 (const_string "TI")))])
91 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
92 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
93 ;; from memory, we'd prefer to load the memory directly into the %xmm
94 ;; register. To facilitate this happy circumstance, this pattern won't
95 ;; split until after register allocation. If the 64-bit value didn't
96 ;; come from memory, this is the best we can do. This is much better
97 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
100 (define_insn_and_split "movdi_to_sse"
102 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
103 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
104 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
105 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
107 "&& reload_completed"
110 if (register_operand (operands[1], DImode))
112 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
113 Assemble the 64-bit DImode value in an xmm register. */
114 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
115 gen_rtx_SUBREG (SImode, operands[1], 0)));
116 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 4)));
118 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
120 else if (memory_operand (operands[1], DImode))
121 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
126 (define_expand "movv4sf"
127 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
128 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
131 ix86_expand_vector_move (V4SFmode, operands);
135 (define_insn "*movv4sf_internal"
136 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
137 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
139 && (register_operand (operands[0], V4SFmode)
140 || register_operand (operands[1], V4SFmode))"
142 switch (which_alternative)
145 return standard_sse_constant_opcode (insn, operands[1]);
148 return "movaps\t{%1, %0|%0, %1}";
153 [(set_attr "type" "sselog1,ssemov,ssemov")
154 (set_attr "mode" "V4SF")])
157 [(set (match_operand:V4SF 0 "register_operand" "")
158 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
159 "TARGET_SSE && reload_completed"
162 (vec_duplicate:V4SF (match_dup 1))
166 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
167 operands[2] = CONST0_RTX (V4SFmode);
170 (define_expand "movv2df"
171 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
172 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
175 ix86_expand_vector_move (V2DFmode, operands);
179 (define_insn "*movv2df_internal"
180 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
181 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
183 && (register_operand (operands[0], V2DFmode)
184 || register_operand (operands[1], V2DFmode))"
186 switch (which_alternative)
189 return standard_sse_constant_opcode (insn, operands[1]);
192 if (get_attr_mode (insn) == MODE_V4SF)
193 return "movaps\t{%1, %0|%0, %1}";
195 return "movapd\t{%1, %0|%0, %1}";
200 [(set_attr "type" "sselog1,ssemov,ssemov")
203 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
204 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
205 (and (eq_attr "alternative" "2")
206 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
208 (const_string "V4SF")
209 (const_string "V2DF")))])
212 [(set (match_operand:V2DF 0 "register_operand" "")
213 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
214 "TARGET_SSE2 && reload_completed"
215 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
217 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
218 operands[2] = CONST0_RTX (DFmode);
221 (define_expand "push<mode>1"
222 [(match_operand:SSEMODE 0 "register_operand" "")]
225 ix86_expand_push (<MODE>mode, operands[0]);
229 (define_expand "movmisalign<mode>"
230 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
231 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
234 ix86_expand_vector_move_misalign (<MODE>mode, operands);
238 (define_insn "sse_movups"
239 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
240 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
242 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
243 "movups\t{%1, %0|%0, %1}"
244 [(set_attr "type" "ssemov")
245 (set_attr "mode" "V2DF")])
247 (define_insn "sse2_movupd"
248 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
249 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
251 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
252 "movupd\t{%1, %0|%0, %1}"
253 [(set_attr "type" "ssemov")
254 (set_attr "mode" "V2DF")])
256 (define_insn "sse2_movdqu"
257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
258 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
260 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
261 "movdqu\t{%1, %0|%0, %1}"
262 [(set_attr "type" "ssemov")
263 (set_attr "prefix_data16" "1")
264 (set_attr "mode" "TI")])
266 (define_insn "sse_movntv4sf"
267 [(set (match_operand:V4SF 0 "memory_operand" "=m")
268 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
271 "movntps\t{%1, %0|%0, %1}"
272 [(set_attr "type" "ssemov")
273 (set_attr "mode" "V4SF")])
275 (define_insn "sse2_movntv2df"
276 [(set (match_operand:V2DF 0 "memory_operand" "=m")
277 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
280 "movntpd\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssecvt")
282 (set_attr "mode" "V2DF")])
284 (define_insn "sse2_movntv2di"
285 [(set (match_operand:V2DI 0 "memory_operand" "=m")
286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
289 "movntdq\t{%1, %0|%0, %1}"
290 [(set_attr "type" "ssecvt")
291 (set_attr "prefix_data16" "1")
292 (set_attr "mode" "TI")])
294 (define_insn "sse2_movntsi"
295 [(set (match_operand:SI 0 "memory_operand" "=m")
296 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
299 "movnti\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssecvt")
301 (set_attr "mode" "V2DF")])
303 (define_insn "sse3_lddqu"
304 [(set (match_operand:V16QI 0 "register_operand" "=x")
305 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
308 "lddqu\t{%1, %0|%0, %1}"
309 [(set_attr "type" "ssecvt")
310 (set_attr "prefix_rep" "1")
311 (set_attr "mode" "TI")])
313 ; Expand patterns for non-temporal stores. At the moment, only those
314 ; that directly map to insns are defined; it would be possible to
315 ; define patterns for other modes that would expand to several insns.
317 (define_expand "storentv4sf"
318 [(set (match_operand:V4SF 0 "memory_operand" "=m")
319 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
324 (define_expand "storentv2df"
325 [(set (match_operand:V2DF 0 "memory_operand" "=m")
326 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "=m")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "=m")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
345 (define_expand "storentdf"
346 [(set (match_operand:DF 0 "memory_operand" "")
347 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
352 (define_expand "storentsf"
353 [(set (match_operand:SF 0 "memory_operand" "")
354 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
361 ;; Parallel single-precision floating point arithmetic
363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
365 (define_expand "negv4sf2"
366 [(set (match_operand:V4SF 0 "register_operand" "")
367 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
369 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
371 (define_expand "absv4sf2"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
375 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
377 (define_expand "addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
380 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
384 (define_insn "*addv4sf3"
385 [(set (match_operand:V4SF 0 "register_operand" "=x")
386 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
387 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
388 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
389 "addps\t{%2, %0|%0, %2}"
390 [(set_attr "type" "sseadd")
391 (set_attr "mode" "V4SF")])
393 (define_insn "sse_vmaddv4sf3"
394 [(set (match_operand:V4SF 0 "register_operand" "=x")
396 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
397 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
400 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
401 "addss\t{%2, %0|%0, %2}"
402 [(set_attr "type" "sseadd")
403 (set_attr "mode" "SF")])
405 (define_expand "subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
408 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
410 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
412 (define_insn "*subv4sf3"
413 [(set (match_operand:V4SF 0 "register_operand" "=x")
414 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
415 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
417 "subps\t{%2, %0|%0, %2}"
418 [(set_attr "type" "sseadd")
419 (set_attr "mode" "V4SF")])
421 (define_insn "sse_vmsubv4sf3"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
425 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
429 "subss\t{%2, %0|%0, %2}"
430 [(set_attr "type" "sseadd")
431 (set_attr "mode" "SF")])
433 (define_expand "mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
436 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
438 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
440 (define_insn "*mulv4sf3"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
443 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
444 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
445 "mulps\t{%2, %0|%0, %2}"
446 [(set_attr "type" "ssemul")
447 (set_attr "mode" "V4SF")])
449 (define_insn "sse_vmmulv4sf3"
450 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
456 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
457 "mulss\t{%2, %0|%0, %2}"
458 [(set_attr "type" "ssemul")
459 (set_attr "mode" "SF")])
461 (define_expand "divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
464 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
467 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
469 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
470 && flag_finite_math_only && !flag_trapping_math
471 && flag_unsafe_math_optimizations)
473 ix86_emit_swdivsf (operands[0], operands[1],
474 operands[2], V4SFmode);
479 (define_insn "*divv4sf3"
480 [(set (match_operand:V4SF 0 "register_operand" "=x")
481 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
482 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
484 "divps\t{%2, %0|%0, %2}"
485 [(set_attr "type" "ssediv")
486 (set_attr "mode" "V4SF")])
488 (define_insn "sse_vmdivv4sf3"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
491 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
492 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
496 "divss\t{%2, %0|%0, %2}"
497 [(set_attr "type" "ssediv")
498 (set_attr "mode" "SF")])
500 (define_insn "sse_rcpv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
503 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
505 "rcpps\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "V4SF")])
509 (define_insn "sse_vmrcpv4sf2"
510 [(set (match_operand:V4SF 0 "register_operand" "=x")
512 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
514 (match_operand:V4SF 2 "register_operand" "0")
517 "rcpss\t{%1, %0|%0, %1}"
518 [(set_attr "type" "sse")
519 (set_attr "mode" "SF")])
521 (define_insn "*sse_rsqrtv4sf2"
522 [(set (match_operand:V4SF 0 "register_operand" "=x")
524 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
526 "rsqrtps\t{%1, %0|%0, %1}"
527 [(set_attr "type" "sse")
528 (set_attr "mode" "V4SF")])
530 (define_expand "sse_rsqrtv4sf2"
531 [(set (match_operand:V4SF 0 "register_operand" "")
533 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
536 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
537 && flag_finite_math_only && !flag_trapping_math
538 && flag_unsafe_math_optimizations)
540 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_vmrsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
550 (match_operand:V4SF 2 "register_operand" "0")
553 "rsqrtss\t{%1, %0|%0, %1}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
557 (define_insn "*sqrtv4sf2"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
561 "sqrtps\t{%1, %0|%0, %1}"
562 [(set_attr "type" "sse")
563 (set_attr "mode" "V4SF")])
565 (define_expand "sqrtv4sf2"
566 [(set (match_operand:V4SF 0 "register_operand" "=")
567 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
570 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
571 && flag_finite_math_only && !flag_trapping_math
572 && flag_unsafe_math_optimizations)
574 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
579 (define_insn "sse_vmsqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
583 (match_operand:V4SF 2 "register_operand" "0")
586 "sqrtss\t{%1, %0|%0, %1}"
587 [(set_attr "type" "sse")
588 (set_attr "mode" "SF")])
590 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
591 ;; isn't really correct, as those rtl operators aren't defined when
592 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
594 (define_expand "smaxv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "")
596 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
597 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
600 if (!flag_finite_math_only)
601 operands[1] = force_reg (V4SFmode, operands[1]);
602 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
605 (define_insn "*smaxv4sf3_finite"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
609 "TARGET_SSE && flag_finite_math_only
610 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
611 "maxps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sse")
613 (set_attr "mode" "V4SF")])
615 (define_insn "*smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
620 "maxps\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sse")
622 (set_attr "mode" "V4SF")])
624 (define_insn "sse_vmsmaxv4sf3"
625 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
628 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
632 "maxss\t{%2, %0|%0, %2}"
633 [(set_attr "type" "sse")
634 (set_attr "mode" "SF")])
636 (define_expand "sminv4sf3"
637 [(set (match_operand:V4SF 0 "register_operand" "")
638 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
639 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
642 if (!flag_finite_math_only)
643 operands[1] = force_reg (V4SFmode, operands[1]);
644 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
647 (define_insn "*sminv4sf3_finite"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
651 "TARGET_SSE && flag_finite_math_only
652 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
653 "minps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sse")
655 (set_attr "mode" "V4SF")])
657 (define_insn "*sminv4sf3"
658 [(set (match_operand:V4SF 0 "register_operand" "=x")
659 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
660 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
662 "minps\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sse")
664 (set_attr "mode" "V4SF")])
666 (define_insn "sse_vmsminv4sf3"
667 [(set (match_operand:V4SF 0 "register_operand" "=x")
669 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
670 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
674 "minss\t{%2, %0|%0, %2}"
675 [(set_attr "type" "sse")
676 (set_attr "mode" "SF")])
678 ;; These versions of the min/max patterns implement exactly the operations
679 ;; min = (op1 < op2 ? op1 : op2)
680 ;; max = (!(op1 < op2) ? op1 : op2)
681 ;; Their operands are not commutative, and thus they may be used in the
682 ;; presence of -0.0 and NaN.
684 (define_insn "*ieee_sminv4sf3"
685 [(set (match_operand:V4SF 0 "register_operand" "=x")
686 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
687 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
690 "minps\t{%2, %0|%0, %2}"
691 [(set_attr "type" "sseadd")
692 (set_attr "mode" "V4SF")])
694 (define_insn "*ieee_smaxv4sf3"
695 [(set (match_operand:V4SF 0 "register_operand" "=x")
696 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
697 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
700 "maxps\t{%2, %0|%0, %2}"
701 [(set_attr "type" "sseadd")
702 (set_attr "mode" "V4SF")])
704 (define_insn "*ieee_sminv2df3"
705 [(set (match_operand:V2DF 0 "register_operand" "=x")
706 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
707 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
710 "minpd\t{%2, %0|%0, %2}"
711 [(set_attr "type" "sseadd")
712 (set_attr "mode" "V2DF")])
714 (define_insn "*ieee_smaxv2df3"
715 [(set (match_operand:V2DF 0 "register_operand" "=x")
716 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
717 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
720 "maxpd\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "mode" "V2DF")])
724 (define_insn "sse3_addsubv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (match_operand:V4SF 1 "register_operand" "0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
730 (minus:V4SF (match_dup 1) (match_dup 2))
733 "addsubps\t{%2, %0|%0, %2}"
734 [(set_attr "type" "sseadd")
735 (set_attr "prefix_rep" "1")
736 (set_attr "mode" "V4SF")])
738 (define_insn "sse3_haddv4sf3"
739 [(set (match_operand:V4SF 0 "register_operand" "=x")
744 (match_operand:V4SF 1 "register_operand" "0")
745 (parallel [(const_int 0)]))
746 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
748 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
749 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
753 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))
755 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
757 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
758 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
760 "haddps\t{%2, %0|%0, %2}"
761 [(set_attr "type" "sseadd")
762 (set_attr "prefix_rep" "1")
763 (set_attr "mode" "V4SF")])
765 (define_insn "sse3_hsubv4sf3"
766 [(set (match_operand:V4SF 0 "register_operand" "=x")
771 (match_operand:V4SF 1 "register_operand" "0")
772 (parallel [(const_int 0)]))
773 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
775 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
776 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
780 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))
782 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
784 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
785 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
787 "hsubps\t{%2, %0|%0, %2}"
788 [(set_attr "type" "sseadd")
789 (set_attr "prefix_rep" "1")
790 (set_attr "mode" "V4SF")])
792 (define_expand "reduc_splus_v4sf"
793 [(match_operand:V4SF 0 "register_operand" "")
794 (match_operand:V4SF 1 "register_operand" "")]
799 rtx tmp = gen_reg_rtx (V4SFmode);
800 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
801 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
804 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
808 (define_expand "reduc_smax_v4sf"
809 [(match_operand:V4SF 0 "register_operand" "")
810 (match_operand:V4SF 1 "register_operand" "")]
813 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
817 (define_expand "reduc_smin_v4sf"
818 [(match_operand:V4SF 0 "register_operand" "")
819 (match_operand:V4SF 1 "register_operand" "")]
822 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
828 ;; Parallel single-precision floating point comparisons
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
832 (define_insn "sse_maskcmpv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (match_operator:V4SF 3 "sse_comparison_operator"
835 [(match_operand:V4SF 1 "register_operand" "0")
836 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
838 "cmp%D3ps\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssecmp")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sse_maskcmpsf3"
843 [(set (match_operand:SF 0 "register_operand" "=x")
844 (match_operator:SF 3 "sse_comparison_operator"
845 [(match_operand:SF 1 "register_operand" "0")
846 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
852 (define_insn "sse_vmmaskcmpv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
855 (match_operator:V4SF 3 "sse_comparison_operator"
856 [(match_operand:V4SF 1 "register_operand" "0")
857 (match_operand:V4SF 2 "register_operand" "x")])
861 "cmp%D3ss\t{%2, %0|%0, %2}"
862 [(set_attr "type" "ssecmp")
863 (set_attr "mode" "SF")])
865 (define_insn "sse_comi"
866 [(set (reg:CCFP FLAGS_REG)
869 (match_operand:V4SF 0 "register_operand" "x")
870 (parallel [(const_int 0)]))
872 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
873 (parallel [(const_int 0)]))))]
875 "comiss\t{%1, %0|%0, %1}"
876 [(set_attr "type" "ssecomi")
877 (set_attr "mode" "SF")])
879 (define_insn "sse_ucomi"
880 [(set (reg:CCFPU FLAGS_REG)
883 (match_operand:V4SF 0 "register_operand" "x")
884 (parallel [(const_int 0)]))
886 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
887 (parallel [(const_int 0)]))))]
889 "ucomiss\t{%1, %0|%0, %1}"
890 [(set_attr "type" "ssecomi")
891 (set_attr "mode" "SF")])
893 (define_expand "vcondv4sf"
894 [(set (match_operand:V4SF 0 "register_operand" "")
897 [(match_operand:V4SF 4 "nonimmediate_operand" "")
898 (match_operand:V4SF 5 "nonimmediate_operand" "")])
899 (match_operand:V4SF 1 "general_operand" "")
900 (match_operand:V4SF 2 "general_operand" "")))]
903 if (ix86_expand_fp_vcond (operands))
909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
911 ;; Parallel single-precision floating point logical operations
913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
915 (define_expand "andv4sf3"
916 [(set (match_operand:V4SF 0 "register_operand" "")
917 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
918 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
920 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
922 (define_insn "*andv4sf3"
923 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
925 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
926 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
927 "andps\t{%2, %0|%0, %2}"
928 [(set_attr "type" "sselog")
929 (set_attr "mode" "V4SF")])
931 (define_insn "sse_nandv4sf3"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
934 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
936 "andnps\t{%2, %0|%0, %2}"
937 [(set_attr "type" "sselog")
938 (set_attr "mode" "V4SF")])
940 (define_expand "iorv4sf3"
941 [(set (match_operand:V4SF 0 "register_operand" "")
942 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
943 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
945 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
947 (define_insn "*iorv4sf3"
948 [(set (match_operand:V4SF 0 "register_operand" "=x")
949 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
950 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
951 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
952 "orps\t{%2, %0|%0, %2}"
953 [(set_attr "type" "sselog")
954 (set_attr "mode" "V4SF")])
956 (define_expand "xorv4sf3"
957 [(set (match_operand:V4SF 0 "register_operand" "")
958 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
959 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
961 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
963 (define_insn "*xorv4sf3"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
965 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
966 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
967 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
968 "xorps\t{%2, %0|%0, %2}"
969 [(set_attr "type" "sselog")
970 (set_attr "mode" "V4SF")])
972 ;; Also define scalar versions. These are used for abs, neg, and
973 ;; conditional move. Using subregs into vector modes causes register
974 ;; allocation lossage. These patterns do not allow memory operands
975 ;; because the native instructions read the full 128-bits.
977 (define_insn "*andsf3"
978 [(set (match_operand:SF 0 "register_operand" "=x")
979 (and:SF (match_operand:SF 1 "register_operand" "0")
980 (match_operand:SF 2 "register_operand" "x")))]
982 "andps\t{%2, %0|%0, %2}"
983 [(set_attr "type" "sselog")
984 (set_attr "mode" "V4SF")])
986 (define_insn "*nandsf3"
987 [(set (match_operand:SF 0 "register_operand" "=x")
988 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
989 (match_operand:SF 2 "register_operand" "x")))]
991 "andnps\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sselog")
993 (set_attr "mode" "V4SF")])
995 (define_insn "*iorsf3"
996 [(set (match_operand:SF 0 "register_operand" "=x")
997 (ior:SF (match_operand:SF 1 "register_operand" "0")
998 (match_operand:SF 2 "register_operand" "x")))]
1000 "orps\t{%2, %0|%0, %2}"
1001 [(set_attr "type" "sselog")
1002 (set_attr "mode" "V4SF")])
1004 (define_insn "*xorsf3"
1005 [(set (match_operand:SF 0 "register_operand" "=x")
1006 (xor:SF (match_operand:SF 1 "register_operand" "0")
1007 (match_operand:SF 2 "register_operand" "x")))]
1009 "xorps\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "V4SF")])
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 ;; Parallel single-precision floating point conversion operations
1017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1019 (define_insn "sse_cvtpi2ps"
1020 [(set (match_operand:V4SF 0 "register_operand" "=x")
1023 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1024 (match_operand:V4SF 1 "register_operand" "0")
1027 "cvtpi2ps\t{%2, %0|%0, %2}"
1028 [(set_attr "type" "ssecvt")
1029 (set_attr "mode" "V4SF")])
1031 (define_insn "sse_cvtps2pi"
1032 [(set (match_operand:V2SI 0 "register_operand" "=y")
1034 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1036 (parallel [(const_int 0) (const_int 1)])))]
1038 "cvtps2pi\t{%1, %0|%0, %1}"
1039 [(set_attr "type" "ssecvt")
1040 (set_attr "unit" "mmx")
1041 (set_attr "mode" "DI")])
1043 (define_insn "sse_cvttps2pi"
1044 [(set (match_operand:V2SI 0 "register_operand" "=y")
1046 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1047 (parallel [(const_int 0) (const_int 1)])))]
1049 "cvttps2pi\t{%1, %0|%0, %1}"
1050 [(set_attr "type" "ssecvt")
1051 (set_attr "unit" "mmx")
1052 (set_attr "mode" "SF")])
1054 (define_insn "sse_cvtsi2ss"
1055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1058 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1059 (match_operand:V4SF 1 "register_operand" "0,0")
1062 "cvtsi2ss\t{%2, %0|%0, %2}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "vector,double")
1065 (set_attr "amdfam10_decode" "vector,double")
1066 (set_attr "mode" "SF")])
1068 (define_insn "sse_cvtsi2ssq"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1072 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1073 (match_operand:V4SF 1 "register_operand" "0,0")
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvtsi2ssq\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "vector,double")
1079 (set_attr "amdfam10_decode" "vector,double")
1080 (set_attr "mode" "SF")])
1082 (define_insn "sse_cvtss2si"
1083 [(set (match_operand:SI 0 "register_operand" "=r,r")
1086 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1087 (parallel [(const_int 0)]))]
1088 UNSPEC_FIX_NOTRUNC))]
1090 "cvtss2si\t{%1, %0|%0, %1}"
1091 [(set_attr "type" "sseicvt")
1092 (set_attr "athlon_decode" "double,vector")
1093 (set_attr "prefix_rep" "1")
1094 (set_attr "mode" "SI")])
1096 (define_insn "sse_cvtss2si_2"
1097 [(set (match_operand:SI 0 "register_operand" "=r,r")
1098 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1099 UNSPEC_FIX_NOTRUNC))]
1101 "cvtss2si\t{%1, %0|%0, %1}"
1102 [(set_attr "type" "sseicvt")
1103 (set_attr "athlon_decode" "double,vector")
1104 (set_attr "amdfam10_decode" "double,double")
1105 (set_attr "prefix_rep" "1")
1106 (set_attr "mode" "SI")])
1108 (define_insn "sse_cvtss2siq"
1109 [(set (match_operand:DI 0 "register_operand" "=r,r")
1112 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1113 (parallel [(const_int 0)]))]
1114 UNSPEC_FIX_NOTRUNC))]
1115 "TARGET_SSE && TARGET_64BIT"
1116 "cvtss2siq\t{%1, %0|%0, %1}"
1117 [(set_attr "type" "sseicvt")
1118 (set_attr "athlon_decode" "double,vector")
1119 (set_attr "prefix_rep" "1")
1120 (set_attr "mode" "DI")])
1122 (define_insn "sse_cvtss2siq_2"
1123 [(set (match_operand:DI 0 "register_operand" "=r,r")
1124 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1125 UNSPEC_FIX_NOTRUNC))]
1126 "TARGET_SSE && TARGET_64BIT"
1127 "cvtss2siq\t{%1, %0|%0, %1}"
1128 [(set_attr "type" "sseicvt")
1129 (set_attr "athlon_decode" "double,vector")
1130 (set_attr "amdfam10_decode" "double,double")
1131 (set_attr "prefix_rep" "1")
1132 (set_attr "mode" "DI")])
1134 (define_insn "sse_cvttss2si"
1135 [(set (match_operand:SI 0 "register_operand" "=r,r")
1138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1139 (parallel [(const_int 0)]))))]
1141 "cvttss2si\t{%1, %0|%0, %1}"
1142 [(set_attr "type" "sseicvt")
1143 (set_attr "athlon_decode" "double,vector")
1144 (set_attr "amdfam10_decode" "double,double")
1145 (set_attr "prefix_rep" "1")
1146 (set_attr "mode" "SI")])
1148 (define_insn "sse_cvttss2siq"
1149 [(set (match_operand:DI 0 "register_operand" "=r,r")
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1153 (parallel [(const_int 0)]))))]
1154 "TARGET_SSE && TARGET_64BIT"
1155 "cvttss2siq\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sseicvt")
1157 (set_attr "athlon_decode" "double,vector")
1158 (set_attr "amdfam10_decode" "double,double")
1159 (set_attr "prefix_rep" "1")
1160 (set_attr "mode" "DI")])
1162 (define_insn "sse2_cvtdq2ps"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1164 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1166 "cvtdq2ps\t{%1, %0|%0, %1}"
1167 [(set_attr "type" "ssecvt")
1168 (set_attr "mode" "V4SF")])
1170 (define_insn "sse2_cvtps2dq"
1171 [(set (match_operand:V4SI 0 "register_operand" "=x")
1172 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1173 UNSPEC_FIX_NOTRUNC))]
1175 "cvtps2dq\t{%1, %0|%0, %1}"
1176 [(set_attr "type" "ssecvt")
1177 (set_attr "prefix_data16" "1")
1178 (set_attr "mode" "TI")])
1180 (define_insn "sse2_cvttps2dq"
1181 [(set (match_operand:V4SI 0 "register_operand" "=x")
1182 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1184 "cvttps2dq\t{%1, %0|%0, %1}"
1185 [(set_attr "type" "ssecvt")
1186 (set_attr "prefix_rep" "1")
1187 (set_attr "mode" "TI")])
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1191 ;; Parallel single-precision floating point element swizzling
1193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1195 (define_insn "sse_movhlps"
1196 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1199 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1200 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1201 (parallel [(const_int 6)
1205 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1207 movhlps\t{%2, %0|%0, %2}
1208 movlps\t{%H2, %0|%0, %H2}
1209 movhps\t{%2, %0|%0, %2}"
1210 [(set_attr "type" "ssemov")
1211 (set_attr "mode" "V4SF,V2SF,V2SF")])
1213 (define_insn "sse_movlhps"
1214 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1217 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1218 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1219 (parallel [(const_int 0)
1223 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1225 movlhps\t{%2, %0|%0, %2}
1226 movhps\t{%2, %0|%0, %2}
1227 movlps\t{%2, %H0|%H0, %2}"
1228 [(set_attr "type" "ssemov")
1229 (set_attr "mode" "V4SF,V2SF,V2SF")])
1231 (define_insn "sse_unpckhps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 2) (const_int 6)
1238 (const_int 3) (const_int 7)])))]
1240 "unpckhps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1244 (define_insn "sse_unpcklps"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1248 (match_operand:V4SF 1 "register_operand" "0")
1249 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1250 (parallel [(const_int 0) (const_int 4)
1251 (const_int 1) (const_int 5)])))]
1253 "unpcklps\t{%2, %0|%0, %2}"
1254 [(set_attr "type" "sselog")
1255 (set_attr "mode" "V4SF")])
1257 ;; These are modeled with the same vec_concat as the others so that we
1258 ;; capture users of shufps that can use the new instructions
1259 (define_insn "sse3_movshdup"
1260 [(set (match_operand:V4SF 0 "register_operand" "=x")
1263 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 1)
1270 "movshdup\t{%1, %0|%0, %1}"
1271 [(set_attr "type" "sse")
1272 (set_attr "prefix_rep" "1")
1273 (set_attr "mode" "V4SF")])
1275 (define_insn "sse3_movsldup"
1276 [(set (match_operand:V4SF 0 "register_operand" "=x")
1279 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1281 (parallel [(const_int 0)
1286 "movsldup\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "sse")
1288 (set_attr "prefix_rep" "1")
1289 (set_attr "mode" "V4SF")])
1291 (define_expand "sse_shufps"
1292 [(match_operand:V4SF 0 "register_operand" "")
1293 (match_operand:V4SF 1 "register_operand" "")
1294 (match_operand:V4SF 2 "nonimmediate_operand" "")
1295 (match_operand:SI 3 "const_int_operand" "")]
1298 int mask = INTVAL (operands[3]);
1299 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1300 GEN_INT ((mask >> 0) & 3),
1301 GEN_INT ((mask >> 2) & 3),
1302 GEN_INT (((mask >> 4) & 3) + 4),
1303 GEN_INT (((mask >> 6) & 3) + 4)));
1307 (define_insn "sse_shufps_1"
1308 [(set (match_operand:V4SF 0 "register_operand" "=x")
1311 (match_operand:V4SF 1 "register_operand" "0")
1312 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1313 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1314 (match_operand 4 "const_0_to_3_operand" "")
1315 (match_operand 5 "const_4_to_7_operand" "")
1316 (match_operand 6 "const_4_to_7_operand" "")])))]
1320 mask |= INTVAL (operands[3]) << 0;
1321 mask |= INTVAL (operands[4]) << 2;
1322 mask |= (INTVAL (operands[5]) - 4) << 4;
1323 mask |= (INTVAL (operands[6]) - 4) << 6;
1324 operands[3] = GEN_INT (mask);
1326 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1328 [(set_attr "type" "sselog")
1329 (set_attr "mode" "V4SF")])
1331 (define_insn "sse_storehps"
1332 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1334 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1335 (parallel [(const_int 2) (const_int 3)])))]
1338 movhps\t{%1, %0|%0, %1}
1339 movhlps\t{%1, %0|%0, %1}
1340 movlps\t{%H1, %0|%0, %H1}"
1341 [(set_attr "type" "ssemov")
1342 (set_attr "mode" "V2SF,V4SF,V2SF")])
1344 (define_insn "sse_loadhps"
1345 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1348 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1349 (parallel [(const_int 0) (const_int 1)]))
1350 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1353 movhps\t{%2, %0|%0, %2}
1354 movlhps\t{%2, %0|%0, %2}
1355 movlps\t{%2, %H0|%H0, %2}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1359 (define_insn "sse_storelps"
1360 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1362 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1363 (parallel [(const_int 0) (const_int 1)])))]
1366 movlps\t{%1, %0|%0, %1}
1367 movaps\t{%1, %0|%0, %1}
1368 movlps\t{%1, %0|%0, %1}"
1369 [(set_attr "type" "ssemov")
1370 (set_attr "mode" "V2SF,V4SF,V2SF")])
1372 (define_insn "sse_loadlps"
1373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1375 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1377 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1378 (parallel [(const_int 2) (const_int 3)]))))]
1381 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1382 movlps\t{%2, %0|%0, %2}
1383 movlps\t{%2, %0|%0, %2}"
1384 [(set_attr "type" "sselog,ssemov,ssemov")
1385 (set_attr "mode" "V4SF,V2SF,V2SF")])
1387 (define_insn "sse_movss"
1388 [(set (match_operand:V4SF 0 "register_operand" "=x")
1390 (match_operand:V4SF 2 "register_operand" "x")
1391 (match_operand:V4SF 1 "register_operand" "0")
1394 "movss\t{%2, %0|%0, %2}"
1395 [(set_attr "type" "ssemov")
1396 (set_attr "mode" "SF")])
1398 (define_insn "*vec_dupv4sf"
1399 [(set (match_operand:V4SF 0 "register_operand" "=x")
1401 (match_operand:SF 1 "register_operand" "0")))]
1403 "shufps\t{$0, %0, %0|%0, %0, 0}"
1404 [(set_attr "type" "sselog1")
1405 (set_attr "mode" "V4SF")])
1407 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1408 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1409 ;; alternatives pretty much forces the MMX alternative to be chosen.
1410 (define_insn "*sse_concatv2sf"
1411 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1413 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1414 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1417 unpcklps\t{%2, %0|%0, %2}
1418 movss\t{%1, %0|%0, %1}
1419 punpckldq\t{%2, %0|%0, %2}
1420 movd\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1422 (set_attr "mode" "V4SF,SF,DI,DI")])
1424 (define_insn "*sse_concatv4sf"
1425 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1427 (match_operand:V2SF 1 "register_operand" " 0,0")
1428 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1431 movlhps\t{%2, %0|%0, %2}
1432 movhps\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "ssemov")
1434 (set_attr "mode" "V4SF,V2SF")])
1436 (define_expand "vec_initv4sf"
1437 [(match_operand:V4SF 0 "register_operand" "")
1438 (match_operand 1 "" "")]
1441 ix86_expand_vector_init (false, operands[0], operands[1]);
1445 (define_insn "vec_setv4sf_0"
1446 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1449 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1450 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1454 movss\t{%2, %0|%0, %2}
1455 movss\t{%2, %0|%0, %2}
1456 movd\t{%2, %0|%0, %2}
1458 [(set_attr "type" "ssemov")
1459 (set_attr "mode" "SF")])
1461 ;; A subset is vec_setv4sf.
1462 (define_insn "*vec_setv4sf_sse4_1"
1463 [(set (match_operand:V4SF 0 "register_operand" "=x")
1466 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1467 (match_operand:V4SF 1 "register_operand" "0")
1468 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1471 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1472 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1474 [(set_attr "type" "sselog")
1475 (set_attr "prefix_extra" "1")
1476 (set_attr "mode" "V4SF")])
1478 (define_insn "sse4_1_insertps"
1479 [(set (match_operand:V4SF 0 "register_operand" "=x")
1480 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1481 (match_operand:V4SF 1 "register_operand" "0")
1482 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1485 "insertps\t{%3, %2, %0|%0, %2, %3}";
1486 [(set_attr "type" "sselog")
1487 (set_attr "prefix_extra" "1")
1488 (set_attr "mode" "V4SF")])
1491 [(set (match_operand:V4SF 0 "memory_operand" "")
1494 (match_operand:SF 1 "nonmemory_operand" ""))
1497 "TARGET_SSE && reload_completed"
1500 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1504 (define_expand "vec_setv4sf"
1505 [(match_operand:V4SF 0 "register_operand" "")
1506 (match_operand:SF 1 "register_operand" "")
1507 (match_operand 2 "const_int_operand" "")]
1510 ix86_expand_vector_set (false, operands[0], operands[1],
1511 INTVAL (operands[2]));
1515 (define_insn_and_split "*vec_extractv4sf_0"
1516 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1518 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1519 (parallel [(const_int 0)])))]
1520 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1522 "&& reload_completed"
1525 rtx op1 = operands[1];
1527 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1529 op1 = gen_lowpart (SFmode, op1);
1530 emit_move_insn (operands[0], op1);
1534 (define_insn "*sse4_1_extractps"
1535 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1537 (match_operand:V4SF 1 "register_operand" "x")
1538 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1540 "extractps\t{%2, %1, %0|%0, %1, %2}"
1541 [(set_attr "type" "sselog")
1542 (set_attr "prefix_extra" "1")
1543 (set_attr "mode" "V4SF")])
1545 (define_expand "vec_extractv4sf"
1546 [(match_operand:SF 0 "register_operand" "")
1547 (match_operand:V4SF 1 "register_operand" "")
1548 (match_operand 2 "const_int_operand" "")]
1551 ix86_expand_vector_extract (false, operands[0], operands[1],
1552 INTVAL (operands[2]));
1556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1558 ;; Parallel double-precision floating point arithmetic
1560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1562 (define_expand "negv2df2"
1563 [(set (match_operand:V2DF 0 "register_operand" "")
1564 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1566 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1568 (define_expand "absv2df2"
1569 [(set (match_operand:V2DF 0 "register_operand" "")
1570 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1572 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1574 (define_expand "addv2df3"
1575 [(set (match_operand:V2DF 0 "register_operand" "")
1576 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1577 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1579 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1581 (define_insn "*addv2df3"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1586 "addpd\t{%2, %0|%0, %2}"
1587 [(set_attr "type" "sseadd")
1588 (set_attr "mode" "V2DF")])
1590 (define_insn "sse2_vmaddv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1597 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1598 "addsd\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sseadd")
1600 (set_attr "mode" "DF")])
1602 (define_expand "subv2df3"
1603 [(set (match_operand:V2DF 0 "register_operand" "")
1604 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1605 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1607 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1609 (define_insn "*subv2df3"
1610 [(set (match_operand:V2DF 0 "register_operand" "=x")
1611 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1614 "subpd\t{%2, %0|%0, %2}"
1615 [(set_attr "type" "sseadd")
1616 (set_attr "mode" "V2DF")])
1618 (define_insn "sse2_vmsubv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "=x")
1621 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1622 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1626 "subsd\t{%2, %0|%0, %2}"
1627 [(set_attr "type" "sseadd")
1628 (set_attr "mode" "DF")])
1630 (define_expand "mulv2df3"
1631 [(set (match_operand:V2DF 0 "register_operand" "")
1632 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1633 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1635 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1637 (define_insn "*mulv2df3"
1638 [(set (match_operand:V2DF 0 "register_operand" "=x")
1639 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1640 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1641 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1642 "mulpd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "ssemul")
1644 (set_attr "mode" "V2DF")])
1646 (define_insn "sse2_vmmulv2df3"
1647 [(set (match_operand:V2DF 0 "register_operand" "=x")
1649 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1650 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1653 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1654 "mulsd\t{%2, %0|%0, %2}"
1655 [(set_attr "type" "ssemul")
1656 (set_attr "mode" "DF")])
1658 (define_expand "divv2df3"
1659 [(set (match_operand:V2DF 0 "register_operand" "")
1660 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1661 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1663 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1665 (define_insn "*divv2df3"
1666 [(set (match_operand:V2DF 0 "register_operand" "=x")
1667 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1668 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1670 "divpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "ssediv")
1672 (set_attr "mode" "V2DF")])
1674 (define_insn "sse2_vmdivv2df3"
1675 [(set (match_operand:V2DF 0 "register_operand" "=x")
1677 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1678 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1682 "divsd\t{%2, %0|%0, %2}"
1683 [(set_attr "type" "ssediv")
1684 (set_attr "mode" "DF")])
1686 (define_insn "sqrtv2df2"
1687 [(set (match_operand:V2DF 0 "register_operand" "=x")
1688 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1690 "sqrtpd\t{%1, %0|%0, %1}"
1691 [(set_attr "type" "sse")
1692 (set_attr "mode" "V2DF")])
1694 (define_insn "sse2_vmsqrtv2df2"
1695 [(set (match_operand:V2DF 0 "register_operand" "=x")
1697 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1698 (match_operand:V2DF 2 "register_operand" "0")
1701 "sqrtsd\t{%1, %0|%0, %1}"
1702 [(set_attr "type" "sse")
1703 (set_attr "mode" "DF")])
1705 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1706 ;; isn't really correct, as those rtl operators aren't defined when
1707 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1709 (define_expand "smaxv2df3"
1710 [(set (match_operand:V2DF 0 "register_operand" "")
1711 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1712 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1715 if (!flag_finite_math_only)
1716 operands[1] = force_reg (V2DFmode, operands[1]);
1717 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1720 (define_insn "*smaxv2df3_finite"
1721 [(set (match_operand:V2DF 0 "register_operand" "=x")
1722 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1723 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1724 "TARGET_SSE2 && flag_finite_math_only
1725 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1726 "maxpd\t{%2, %0|%0, %2}"
1727 [(set_attr "type" "sseadd")
1728 (set_attr "mode" "V2DF")])
1730 (define_insn "*smaxv2df3"
1731 [(set (match_operand:V2DF 0 "register_operand" "=x")
1732 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1733 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1735 "maxpd\t{%2, %0|%0, %2}"
1736 [(set_attr "type" "sseadd")
1737 (set_attr "mode" "V2DF")])
1739 (define_insn "sse2_vmsmaxv2df3"
1740 [(set (match_operand:V2DF 0 "register_operand" "=x")
1742 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1743 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1747 "maxsd\t{%2, %0|%0, %2}"
1748 [(set_attr "type" "sseadd")
1749 (set_attr "mode" "DF")])
1751 (define_expand "sminv2df3"
1752 [(set (match_operand:V2DF 0 "register_operand" "")
1753 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1754 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1757 if (!flag_finite_math_only)
1758 operands[1] = force_reg (V2DFmode, operands[1]);
1759 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1762 (define_insn "*sminv2df3_finite"
1763 [(set (match_operand:V2DF 0 "register_operand" "=x")
1764 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1766 "TARGET_SSE2 && flag_finite_math_only
1767 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1768 "minpd\t{%2, %0|%0, %2}"
1769 [(set_attr "type" "sseadd")
1770 (set_attr "mode" "V2DF")])
1772 (define_insn "*sminv2df3"
1773 [(set (match_operand:V2DF 0 "register_operand" "=x")
1774 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1775 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1777 "minpd\t{%2, %0|%0, %2}"
1778 [(set_attr "type" "sseadd")
1779 (set_attr "mode" "V2DF")])
1781 (define_insn "sse2_vmsminv2df3"
1782 [(set (match_operand:V2DF 0 "register_operand" "=x")
1784 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1785 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1789 "minsd\t{%2, %0|%0, %2}"
1790 [(set_attr "type" "sseadd")
1791 (set_attr "mode" "DF")])
1793 (define_insn "sse3_addsubv2df3"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1797 (match_operand:V2DF 1 "register_operand" "0")
1798 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1799 (minus:V2DF (match_dup 1) (match_dup 2))
1802 "addsubpd\t{%2, %0|%0, %2}"
1803 [(set_attr "type" "sseadd")
1804 (set_attr "mode" "V2DF")])
1806 (define_insn "sse3_haddv2df3"
1807 [(set (match_operand:V2DF 0 "register_operand" "=x")
1811 (match_operand:V2DF 1 "register_operand" "0")
1812 (parallel [(const_int 0)]))
1813 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1816 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1817 (parallel [(const_int 0)]))
1818 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1820 "haddpd\t{%2, %0|%0, %2}"
1821 [(set_attr "type" "sseadd")
1822 (set_attr "mode" "V2DF")])
1824 (define_insn "sse3_hsubv2df3"
1825 [(set (match_operand:V2DF 0 "register_operand" "=x")
1829 (match_operand:V2DF 1 "register_operand" "0")
1830 (parallel [(const_int 0)]))
1831 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1834 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1835 (parallel [(const_int 0)]))
1836 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1838 "hsubpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sseadd")
1840 (set_attr "mode" "V2DF")])
1842 (define_expand "reduc_splus_v2df"
1843 [(match_operand:V2DF 0 "register_operand" "")
1844 (match_operand:V2DF 1 "register_operand" "")]
1847 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1853 ;; Parallel double-precision floating point comparisons
1855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1857 (define_insn "sse2_maskcmpv2df3"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x")
1859 (match_operator:V2DF 3 "sse_comparison_operator"
1860 [(match_operand:V2DF 1 "register_operand" "0")
1861 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1863 "cmp%D3pd\t{%2, %0|%0, %2}"
1864 [(set_attr "type" "ssecmp")
1865 (set_attr "mode" "V2DF")])
1867 (define_insn "sse2_maskcmpdf3"
1868 [(set (match_operand:DF 0 "register_operand" "=x")
1869 (match_operator:DF 3 "sse_comparison_operator"
1870 [(match_operand:DF 1 "register_operand" "0")
1871 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1873 "cmp%D3sd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "ssecmp")
1875 (set_attr "mode" "DF")])
1877 (define_insn "sse2_vmmaskcmpv2df3"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1880 (match_operator:V2DF 3 "sse_comparison_operator"
1881 [(match_operand:V2DF 1 "register_operand" "0")
1882 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1886 "cmp%D3sd\t{%2, %0|%0, %2}"
1887 [(set_attr "type" "ssecmp")
1888 (set_attr "mode" "DF")])
1890 (define_insn "sse2_comi"
1891 [(set (reg:CCFP FLAGS_REG)
1894 (match_operand:V2DF 0 "register_operand" "x")
1895 (parallel [(const_int 0)]))
1897 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1898 (parallel [(const_int 0)]))))]
1900 "comisd\t{%1, %0|%0, %1}"
1901 [(set_attr "type" "ssecomi")
1902 (set_attr "mode" "DF")])
1904 (define_insn "sse2_ucomi"
1905 [(set (reg:CCFPU FLAGS_REG)
1908 (match_operand:V2DF 0 "register_operand" "x")
1909 (parallel [(const_int 0)]))
1911 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1912 (parallel [(const_int 0)]))))]
1914 "ucomisd\t{%1, %0|%0, %1}"
1915 [(set_attr "type" "ssecomi")
1916 (set_attr "mode" "DF")])
1918 (define_expand "vcondv2df"
1919 [(set (match_operand:V2DF 0 "register_operand" "")
1921 (match_operator 3 ""
1922 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1923 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1924 (match_operand:V2DF 1 "general_operand" "")
1925 (match_operand:V2DF 2 "general_operand" "")))]
1928 if (ix86_expand_fp_vcond (operands))
1934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1936 ;; Parallel double-precision floating point logical operations
1938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1940 (define_expand "andv2df3"
1941 [(set (match_operand:V2DF 0 "register_operand" "")
1942 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1943 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1945 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1947 (define_insn "*andv2df3"
1948 [(set (match_operand:V2DF 0 "register_operand" "=x")
1949 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1950 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1951 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1952 "andpd\t{%2, %0|%0, %2}"
1953 [(set_attr "type" "sselog")
1954 (set_attr "mode" "V2DF")])
1956 (define_insn "sse2_nandv2df3"
1957 [(set (match_operand:V2DF 0 "register_operand" "=x")
1958 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1959 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1961 "andnpd\t{%2, %0|%0, %2}"
1962 [(set_attr "type" "sselog")
1963 (set_attr "mode" "V2DF")])
1965 (define_expand "iorv2df3"
1966 [(set (match_operand:V2DF 0 "register_operand" "")
1967 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1968 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1970 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1972 (define_insn "*iorv2df3"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1975 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1976 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1977 "orpd\t{%2, %0|%0, %2}"
1978 [(set_attr "type" "sselog")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "xorv2df3"
1982 [(set (match_operand:V2DF 0 "register_operand" "")
1983 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1984 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1986 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1988 (define_insn "*xorv2df3"
1989 [(set (match_operand:V2DF 0 "register_operand" "=x")
1990 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1991 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1992 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1993 "xorpd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "sselog")
1995 (set_attr "mode" "V2DF")])
1997 ;; Also define scalar versions. These are used for abs, neg, and
1998 ;; conditional move. Using subregs into vector modes causes register
1999 ;; allocation lossage. These patterns do not allow memory operands
2000 ;; because the native instructions read the full 128-bits.
2002 (define_insn "*anddf3"
2003 [(set (match_operand:DF 0 "register_operand" "=x")
2004 (and:DF (match_operand:DF 1 "register_operand" "0")
2005 (match_operand:DF 2 "register_operand" "x")))]
2007 "andpd\t{%2, %0|%0, %2}"
2008 [(set_attr "type" "sselog")
2009 (set_attr "mode" "V2DF")])
2011 (define_insn "*nanddf3"
2012 [(set (match_operand:DF 0 "register_operand" "=x")
2013 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2014 (match_operand:DF 2 "register_operand" "x")))]
2016 "andnpd\t{%2, %0|%0, %2}"
2017 [(set_attr "type" "sselog")
2018 (set_attr "mode" "V2DF")])
2020 (define_insn "*iordf3"
2021 [(set (match_operand:DF 0 "register_operand" "=x")
2022 (ior:DF (match_operand:DF 1 "register_operand" "0")
2023 (match_operand:DF 2 "register_operand" "x")))]
2025 "orpd\t{%2, %0|%0, %2}"
2026 [(set_attr "type" "sselog")
2027 (set_attr "mode" "V2DF")])
2029 (define_insn "*xordf3"
2030 [(set (match_operand:DF 0 "register_operand" "=x")
2031 (xor:DF (match_operand:DF 1 "register_operand" "0")
2032 (match_operand:DF 2 "register_operand" "x")))]
2034 "xorpd\t{%2, %0|%0, %2}"
2035 [(set_attr "type" "sselog")
2036 (set_attr "mode" "V2DF")])
2038 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2040 ;; Parallel double-precision floating point conversion operations
2042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2044 (define_insn "sse2_cvtpi2pd"
2045 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2046 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2048 "cvtpi2pd\t{%1, %0|%0, %1}"
2049 [(set_attr "type" "ssecvt")
2050 (set_attr "unit" "mmx,*")
2051 (set_attr "mode" "V2DF")])
2053 (define_insn "sse2_cvtpd2pi"
2054 [(set (match_operand:V2SI 0 "register_operand" "=y")
2055 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2056 UNSPEC_FIX_NOTRUNC))]
2058 "cvtpd2pi\t{%1, %0|%0, %1}"
2059 [(set_attr "type" "ssecvt")
2060 (set_attr "unit" "mmx")
2061 (set_attr "prefix_data16" "1")
2062 (set_attr "mode" "DI")])
2064 (define_insn "sse2_cvttpd2pi"
2065 [(set (match_operand:V2SI 0 "register_operand" "=y")
2066 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2068 "cvttpd2pi\t{%1, %0|%0, %1}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "unit" "mmx")
2071 (set_attr "prefix_data16" "1")
2072 (set_attr "mode" "TI")])
2074 (define_insn "sse2_cvtsi2sd"
2075 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2078 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2079 (match_operand:V2DF 1 "register_operand" "0,0")
2082 "cvtsi2sd\t{%2, %0|%0, %2}"
2083 [(set_attr "type" "sseicvt")
2084 (set_attr "mode" "DF")
2085 (set_attr "athlon_decode" "double,direct")
2086 (set_attr "amdfam10_decode" "vector,double")])
2088 (define_insn "sse2_cvtsi2sdq"
2089 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2092 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2093 (match_operand:V2DF 1 "register_operand" "0,0")
2095 "TARGET_SSE2 && TARGET_64BIT"
2096 "cvtsi2sdq\t{%2, %0|%0, %2}"
2097 [(set_attr "type" "sseicvt")
2098 (set_attr "mode" "DF")
2099 (set_attr "athlon_decode" "double,direct")
2100 (set_attr "amdfam10_decode" "vector,double")])
2102 (define_insn "sse2_cvtsd2si"
2103 [(set (match_operand:SI 0 "register_operand" "=r,r")
2106 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2107 (parallel [(const_int 0)]))]
2108 UNSPEC_FIX_NOTRUNC))]
2110 "cvtsd2si\t{%1, %0|%0, %1}"
2111 [(set_attr "type" "sseicvt")
2112 (set_attr "athlon_decode" "double,vector")
2113 (set_attr "prefix_rep" "1")
2114 (set_attr "mode" "SI")])
2116 (define_insn "sse2_cvtsd2si_2"
2117 [(set (match_operand:SI 0 "register_operand" "=r,r")
2118 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2119 UNSPEC_FIX_NOTRUNC))]
2121 "cvtsd2si\t{%1, %0|%0, %1}"
2122 [(set_attr "type" "sseicvt")
2123 (set_attr "athlon_decode" "double,vector")
2124 (set_attr "amdfam10_decode" "double,double")
2125 (set_attr "prefix_rep" "1")
2126 (set_attr "mode" "SI")])
2128 (define_insn "sse2_cvtsd2siq"
2129 [(set (match_operand:DI 0 "register_operand" "=r,r")
2132 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2133 (parallel [(const_int 0)]))]
2134 UNSPEC_FIX_NOTRUNC))]
2135 "TARGET_SSE2 && TARGET_64BIT"
2136 "cvtsd2siq\t{%1, %0|%0, %1}"
2137 [(set_attr "type" "sseicvt")
2138 (set_attr "athlon_decode" "double,vector")
2139 (set_attr "prefix_rep" "1")
2140 (set_attr "mode" "DI")])
2142 (define_insn "sse2_cvtsd2siq_2"
2143 [(set (match_operand:DI 0 "register_operand" "=r,r")
2144 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2145 UNSPEC_FIX_NOTRUNC))]
2146 "TARGET_SSE2 && TARGET_64BIT"
2147 "cvtsd2siq\t{%1, %0|%0, %1}"
2148 [(set_attr "type" "sseicvt")
2149 (set_attr "athlon_decode" "double,vector")
2150 (set_attr "amdfam10_decode" "double,double")
2151 (set_attr "prefix_rep" "1")
2152 (set_attr "mode" "DI")])
2154 (define_insn "sse2_cvttsd2si"
2155 [(set (match_operand:SI 0 "register_operand" "=r,r")
2158 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2159 (parallel [(const_int 0)]))))]
2161 "cvttsd2si\t{%1, %0|%0, %1}"
2162 [(set_attr "type" "sseicvt")
2163 (set_attr "prefix_rep" "1")
2164 (set_attr "mode" "SI")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")])
2168 (define_insn "sse2_cvttsd2siq"
2169 [(set (match_operand:DI 0 "register_operand" "=r,r")
2172 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2173 (parallel [(const_int 0)]))))]
2174 "TARGET_SSE2 && TARGET_64BIT"
2175 "cvttsd2siq\t{%1, %0|%0, %1}"
2176 [(set_attr "type" "sseicvt")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "mode" "DI")
2179 (set_attr "athlon_decode" "double,vector")
2180 (set_attr "amdfam10_decode" "double,double")])
2182 (define_insn "sse2_cvtdq2pd"
2183 [(set (match_operand:V2DF 0 "register_operand" "=x")
2186 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2187 (parallel [(const_int 0) (const_int 1)]))))]
2189 "cvtdq2pd\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "ssecvt")
2191 (set_attr "mode" "V2DF")])
2193 (define_expand "sse2_cvtpd2dq"
2194 [(set (match_operand:V4SI 0 "register_operand" "")
2196 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2200 "operands[2] = CONST0_RTX (V2SImode);")
2202 (define_insn "*sse2_cvtpd2dq"
2203 [(set (match_operand:V4SI 0 "register_operand" "=x")
2205 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2207 (match_operand:V2SI 2 "const0_operand" "")))]
2209 "cvtpd2dq\t{%1, %0|%0, %1}"
2210 [(set_attr "type" "ssecvt")
2211 (set_attr "prefix_rep" "1")
2212 (set_attr "mode" "TI")
2213 (set_attr "amdfam10_decode" "double")])
2215 (define_expand "sse2_cvttpd2dq"
2216 [(set (match_operand:V4SI 0 "register_operand" "")
2218 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2221 "operands[2] = CONST0_RTX (V2SImode);")
2223 (define_insn "*sse2_cvttpd2dq"
2224 [(set (match_operand:V4SI 0 "register_operand" "=x")
2226 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2227 (match_operand:V2SI 2 "const0_operand" "")))]
2229 "cvttpd2dq\t{%1, %0|%0, %1}"
2230 [(set_attr "type" "ssecvt")
2231 (set_attr "prefix_rep" "1")
2232 (set_attr "mode" "TI")
2233 (set_attr "amdfam10_decode" "double")])
2235 (define_insn "sse2_cvtsd2ss"
2236 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2239 (float_truncate:V2SF
2240 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2241 (match_operand:V4SF 1 "register_operand" "0,0")
2244 "cvtsd2ss\t{%2, %0|%0, %2}"
2245 [(set_attr "type" "ssecvt")
2246 (set_attr "athlon_decode" "vector,double")
2247 (set_attr "amdfam10_decode" "vector,double")
2248 (set_attr "mode" "SF")])
2250 (define_insn "sse2_cvtss2sd"
2251 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2255 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2256 (parallel [(const_int 0) (const_int 1)])))
2257 (match_operand:V2DF 1 "register_operand" "0,0")
2260 "cvtss2sd\t{%2, %0|%0, %2}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "amdfam10_decode" "vector,double")
2263 (set_attr "mode" "DF")])
2265 (define_expand "sse2_cvtpd2ps"
2266 [(set (match_operand:V4SF 0 "register_operand" "")
2268 (float_truncate:V2SF
2269 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2272 "operands[2] = CONST0_RTX (V2SFmode);")
2274 (define_insn "*sse2_cvtpd2ps"
2275 [(set (match_operand:V4SF 0 "register_operand" "=x")
2277 (float_truncate:V2SF
2278 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2279 (match_operand:V2SF 2 "const0_operand" "")))]
2281 "cvtpd2ps\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix_data16" "1")
2284 (set_attr "mode" "V4SF")
2285 (set_attr "amdfam10_decode" "double")])
2287 (define_insn "sse2_cvtps2pd"
2288 [(set (match_operand:V2DF 0 "register_operand" "=x")
2291 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2292 (parallel [(const_int 0) (const_int 1)]))))]
2294 "cvtps2pd\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "ssecvt")
2296 (set_attr "mode" "V2DF")
2297 (set_attr "amdfam10_decode" "direct")])
2299 (define_expand "vec_unpacks_hi_v4sf"
2304 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2305 (parallel [(const_int 6)
2309 (set (match_operand:V2DF 0 "register_operand" "")
2313 (parallel [(const_int 0) (const_int 1)]))))]
2316 operands[2] = gen_reg_rtx (V4SFmode);
2319 (define_expand "vec_unpacks_lo_v4sf"
2320 [(set (match_operand:V2DF 0 "register_operand" "")
2323 (match_operand:V4SF 1 "nonimmediate_operand" "")
2324 (parallel [(const_int 0) (const_int 1)]))))]
2327 (define_expand "vec_unpacks_float_hi_v8hi"
2328 [(match_operand:V4SF 0 "register_operand" "")
2329 (match_operand:V8HI 1 "register_operand" "")]
2332 rtx tmp = gen_reg_rtx (V4SImode);
2334 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2335 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2339 (define_expand "vec_unpacks_float_lo_v8hi"
2340 [(match_operand:V4SF 0 "register_operand" "")
2341 (match_operand:V8HI 1 "register_operand" "")]
2344 rtx tmp = gen_reg_rtx (V4SImode);
2346 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2347 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2351 (define_expand "vec_unpacku_float_hi_v8hi"
2352 [(match_operand:V4SF 0 "register_operand" "")
2353 (match_operand:V8HI 1 "register_operand" "")]
2356 rtx tmp = gen_reg_rtx (V4SImode);
2358 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2359 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2363 (define_expand "vec_unpacku_float_lo_v8hi"
2364 [(match_operand:V4SF 0 "register_operand" "")
2365 (match_operand:V8HI 1 "register_operand" "")]
2368 rtx tmp = gen_reg_rtx (V4SImode);
2370 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2371 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2375 (define_expand "vec_unpacks_float_hi_v4si"
2378 (match_operand:V4SI 1 "nonimmediate_operand" "")
2379 (parallel [(const_int 2)
2383 (set (match_operand:V2DF 0 "register_operand" "")
2387 (parallel [(const_int 0) (const_int 1)]))))]
2390 operands[2] = gen_reg_rtx (V4SImode);
2393 (define_expand "vec_unpacks_float_lo_v4si"
2394 [(set (match_operand:V2DF 0 "register_operand" "")
2397 (match_operand:V4SI 1 "nonimmediate_operand" "")
2398 (parallel [(const_int 0) (const_int 1)]))))]
2401 (define_expand "vec_pack_trunc_v2df"
2402 [(match_operand:V4SF 0 "register_operand" "")
2403 (match_operand:V2DF 1 "nonimmediate_operand" "")
2404 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2409 r1 = gen_reg_rtx (V4SFmode);
2410 r2 = gen_reg_rtx (V4SFmode);
2412 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2413 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2414 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2418 (define_expand "vec_pack_sfix_trunc_v2df"
2419 [(match_operand:V4SI 0 "register_operand" "")
2420 (match_operand:V2DF 1 "nonimmediate_operand" "")
2421 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2426 r1 = gen_reg_rtx (V4SImode);
2427 r2 = gen_reg_rtx (V4SImode);
2429 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2430 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2431 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2432 gen_lowpart (V2DImode, r1),
2433 gen_lowpart (V2DImode, r2)));
2437 (define_expand "vec_pack_sfix_v2df"
2438 [(match_operand:V4SI 0 "register_operand" "")
2439 (match_operand:V2DF 1 "nonimmediate_operand" "")
2440 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2445 r1 = gen_reg_rtx (V4SImode);
2446 r2 = gen_reg_rtx (V4SImode);
2448 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2449 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2450 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2451 gen_lowpart (V2DImode, r1),
2452 gen_lowpart (V2DImode, r2)));
2457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2459 ;; Parallel double-precision floating point element swizzling
2461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2463 (define_insn "sse2_unpckhpd"
2464 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2467 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2468 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2469 (parallel [(const_int 1)
2471 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2473 unpckhpd\t{%2, %0|%0, %2}
2474 movlpd\t{%H1, %0|%0, %H1}
2475 movhpd\t{%1, %0|%0, %1}"
2476 [(set_attr "type" "sselog,ssemov,ssemov")
2477 (set_attr "mode" "V2DF,V1DF,V1DF")])
2479 (define_insn "*sse3_movddup"
2480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2483 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2485 (parallel [(const_int 0)
2487 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2489 movddup\t{%1, %0|%0, %1}
2491 [(set_attr "type" "sselog1,ssemov")
2492 (set_attr "mode" "V2DF")])
2495 [(set (match_operand:V2DF 0 "memory_operand" "")
2498 (match_operand:V2DF 1 "register_operand" "")
2500 (parallel [(const_int 0)
2502 "TARGET_SSE3 && reload_completed"
2505 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2506 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2507 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2511 (define_insn "sse2_unpcklpd"
2512 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2515 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2516 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2517 (parallel [(const_int 0)
2519 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2521 unpcklpd\t{%2, %0|%0, %2}
2522 movhpd\t{%2, %0|%0, %2}
2523 movlpd\t{%2, %H0|%H0, %2}"
2524 [(set_attr "type" "sselog,ssemov,ssemov")
2525 (set_attr "mode" "V2DF,V1DF,V1DF")])
2527 (define_expand "sse2_shufpd"
2528 [(match_operand:V2DF 0 "register_operand" "")
2529 (match_operand:V2DF 1 "register_operand" "")
2530 (match_operand:V2DF 2 "nonimmediate_operand" "")
2531 (match_operand:SI 3 "const_int_operand" "")]
2534 int mask = INTVAL (operands[3]);
2535 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2537 GEN_INT (mask & 2 ? 3 : 2)));
2541 (define_insn "sse2_shufpd_1"
2542 [(set (match_operand:V2DF 0 "register_operand" "=x")
2545 (match_operand:V2DF 1 "register_operand" "0")
2546 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2547 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2548 (match_operand 4 "const_2_to_3_operand" "")])))]
2552 mask = INTVAL (operands[3]);
2553 mask |= (INTVAL (operands[4]) - 2) << 1;
2554 operands[3] = GEN_INT (mask);
2556 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2558 [(set_attr "type" "sselog")
2559 (set_attr "mode" "V2DF")])
2561 (define_insn "sse2_storehpd"
2562 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2564 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2565 (parallel [(const_int 1)])))]
2566 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2568 movhpd\t{%1, %0|%0, %1}
2571 [(set_attr "type" "ssemov,sselog1,ssemov")
2572 (set_attr "mode" "V1DF,V2DF,DF")])
2575 [(set (match_operand:DF 0 "register_operand" "")
2577 (match_operand:V2DF 1 "memory_operand" "")
2578 (parallel [(const_int 1)])))]
2579 "TARGET_SSE2 && reload_completed"
2580 [(set (match_dup 0) (match_dup 1))]
2582 operands[1] = adjust_address (operands[1], DFmode, 8);
2585 (define_insn "sse2_storelpd"
2586 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2588 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2589 (parallel [(const_int 0)])))]
2590 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2592 movlpd\t{%1, %0|%0, %1}
2595 [(set_attr "type" "ssemov")
2596 (set_attr "mode" "V1DF,DF,DF")])
2599 [(set (match_operand:DF 0 "register_operand" "")
2601 (match_operand:V2DF 1 "nonimmediate_operand" "")
2602 (parallel [(const_int 0)])))]
2603 "TARGET_SSE2 && reload_completed"
2606 rtx op1 = operands[1];
2608 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2610 op1 = gen_lowpart (DFmode, op1);
2611 emit_move_insn (operands[0], op1);
2615 (define_insn "sse2_loadhpd"
2616 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2619 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2620 (parallel [(const_int 0)]))
2621 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2622 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2624 movhpd\t{%2, %0|%0, %2}
2625 unpcklpd\t{%2, %0|%0, %2}
2626 shufpd\t{$1, %1, %0|%0, %1, 1}
2628 [(set_attr "type" "ssemov,sselog,sselog,other")
2629 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2632 [(set (match_operand:V2DF 0 "memory_operand" "")
2634 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2635 (match_operand:DF 1 "register_operand" "")))]
2636 "TARGET_SSE2 && reload_completed"
2637 [(set (match_dup 0) (match_dup 1))]
2639 operands[0] = adjust_address (operands[0], DFmode, 8);
2642 (define_insn "sse2_loadlpd"
2643 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2645 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2647 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2648 (parallel [(const_int 1)]))))]
2649 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2651 movsd\t{%2, %0|%0, %2}
2652 movlpd\t{%2, %0|%0, %2}
2653 movsd\t{%2, %0|%0, %2}
2654 shufpd\t{$2, %2, %0|%0, %2, 2}
2655 movhpd\t{%H1, %0|%0, %H1}
2657 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2658 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2661 [(set (match_operand:V2DF 0 "memory_operand" "")
2663 (match_operand:DF 1 "register_operand" "")
2664 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2665 "TARGET_SSE2 && reload_completed"
2666 [(set (match_dup 0) (match_dup 1))]
2668 operands[0] = adjust_address (operands[0], DFmode, 8);
2671 ;; Not sure these two are ever used, but it doesn't hurt to have
2673 (define_insn "*vec_extractv2df_1_sse"
2674 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2676 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2677 (parallel [(const_int 1)])))]
2678 "!TARGET_SSE2 && TARGET_SSE
2679 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2681 movhps\t{%1, %0|%0, %1}
2682 movhlps\t{%1, %0|%0, %1}
2683 movlps\t{%H1, %0|%0, %H1}"
2684 [(set_attr "type" "ssemov")
2685 (set_attr "mode" "V2SF,V4SF,V2SF")])
2687 (define_insn "*vec_extractv2df_0_sse"
2688 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2690 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2691 (parallel [(const_int 0)])))]
2692 "!TARGET_SSE2 && TARGET_SSE
2693 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2695 movlps\t{%1, %0|%0, %1}
2696 movaps\t{%1, %0|%0, %1}
2697 movlps\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "ssemov")
2699 (set_attr "mode" "V2SF,V4SF,V2SF")])
2701 (define_insn "sse2_movsd"
2702 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2704 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2705 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2709 movsd\t{%2, %0|%0, %2}
2710 movlpd\t{%2, %0|%0, %2}
2711 movlpd\t{%2, %0|%0, %2}
2712 shufpd\t{$2, %2, %0|%0, %2, 2}
2713 movhps\t{%H1, %0|%0, %H1}
2714 movhps\t{%1, %H0|%H0, %1}"
2715 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2716 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2718 (define_insn "*vec_dupv2df_sse3"
2719 [(set (match_operand:V2DF 0 "register_operand" "=x")
2721 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2723 "movddup\t{%1, %0|%0, %1}"
2724 [(set_attr "type" "sselog1")
2725 (set_attr "mode" "DF")])
2727 (define_insn "*vec_dupv2df"
2728 [(set (match_operand:V2DF 0 "register_operand" "=x")
2730 (match_operand:DF 1 "register_operand" "0")))]
2733 [(set_attr "type" "sselog1")
2734 (set_attr "mode" "V2DF")])
2736 (define_insn "*vec_concatv2df_sse3"
2737 [(set (match_operand:V2DF 0 "register_operand" "=x")
2739 (match_operand:DF 1 "nonimmediate_operand" "xm")
2742 "movddup\t{%1, %0|%0, %1}"
2743 [(set_attr "type" "sselog1")
2744 (set_attr "mode" "DF")])
2746 (define_insn "*vec_concatv2df"
2747 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2749 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2750 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2753 unpcklpd\t{%2, %0|%0, %2}
2754 movhpd\t{%2, %0|%0, %2}
2755 movsd\t{%1, %0|%0, %1}
2756 movlhps\t{%2, %0|%0, %2}
2757 movhps\t{%2, %0|%0, %2}"
2758 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2759 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2761 (define_expand "vec_setv2df"
2762 [(match_operand:V2DF 0 "register_operand" "")
2763 (match_operand:DF 1 "register_operand" "")
2764 (match_operand 2 "const_int_operand" "")]
2767 ix86_expand_vector_set (false, operands[0], operands[1],
2768 INTVAL (operands[2]));
2772 (define_expand "vec_extractv2df"
2773 [(match_operand:DF 0 "register_operand" "")
2774 (match_operand:V2DF 1 "register_operand" "")
2775 (match_operand 2 "const_int_operand" "")]
2778 ix86_expand_vector_extract (false, operands[0], operands[1],
2779 INTVAL (operands[2]));
2783 (define_expand "vec_initv2df"
2784 [(match_operand:V2DF 0 "register_operand" "")
2785 (match_operand 1 "" "")]
2788 ix86_expand_vector_init (false, operands[0], operands[1]);
2792 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2794 ;; Parallel integral arithmetic
2796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2798 (define_expand "neg<mode>2"
2799 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2802 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2804 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2806 (define_expand "add<mode>3"
2807 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2808 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2809 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2811 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2813 (define_insn "*add<mode>3"
2814 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2816 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2817 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2818 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2819 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2820 [(set_attr "type" "sseiadd")
2821 (set_attr "prefix_data16" "1")
2822 (set_attr "mode" "TI")])
2824 (define_insn "sse2_ssadd<mode>3"
2825 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2827 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2828 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2829 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2830 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2831 [(set_attr "type" "sseiadd")
2832 (set_attr "prefix_data16" "1")
2833 (set_attr "mode" "TI")])
2835 (define_insn "sse2_usadd<mode>3"
2836 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2838 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2839 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2840 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2841 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2842 [(set_attr "type" "sseiadd")
2843 (set_attr "prefix_data16" "1")
2844 (set_attr "mode" "TI")])
2846 (define_expand "sub<mode>3"
2847 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2848 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2849 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2851 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2853 (define_insn "*sub<mode>3"
2854 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2856 (match_operand:SSEMODEI 1 "register_operand" "0")
2857 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2859 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2860 [(set_attr "type" "sseiadd")
2861 (set_attr "prefix_data16" "1")
2862 (set_attr "mode" "TI")])
2864 (define_insn "sse2_sssub<mode>3"
2865 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2867 (match_operand:SSEMODE12 1 "register_operand" "0")
2868 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2870 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2871 [(set_attr "type" "sseiadd")
2872 (set_attr "prefix_data16" "1")
2873 (set_attr "mode" "TI")])
2875 (define_insn "sse2_ussub<mode>3"
2876 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2878 (match_operand:SSEMODE12 1 "register_operand" "0")
2879 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2881 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2882 [(set_attr "type" "sseiadd")
2883 (set_attr "prefix_data16" "1")
2884 (set_attr "mode" "TI")])
2886 (define_expand "mulv16qi3"
2887 [(set (match_operand:V16QI 0 "register_operand" "")
2888 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2889 (match_operand:V16QI 2 "register_operand" "")))]
2895 for (i = 0; i < 12; ++i)
2896 t[i] = gen_reg_rtx (V16QImode);
2898 /* Unpack data such that we've got a source byte in each low byte of
2899 each word. We don't care what goes into the high byte of each word.
2900 Rather than trying to get zero in there, most convenient is to let
2901 it be a copy of the low byte. */
2902 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2903 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2904 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2905 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2907 /* Multiply words. The end-of-line annotations here give a picture of what
2908 the output of that instruction looks like. Dot means don't care; the
2909 letters are the bytes of the result with A being the most significant. */
2910 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2911 gen_lowpart (V8HImode, t[0]),
2912 gen_lowpart (V8HImode, t[1])));
2913 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2914 gen_lowpart (V8HImode, t[2]),
2915 gen_lowpart (V8HImode, t[3])));
2917 /* Extract the relevant bytes and merge them back together. */
2918 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2919 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2920 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2921 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2922 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2923 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2926 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2930 (define_expand "mulv8hi3"
2931 [(set (match_operand:V8HI 0 "register_operand" "")
2932 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2933 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2935 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2937 (define_insn "*mulv8hi3"
2938 [(set (match_operand:V8HI 0 "register_operand" "=x")
2939 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2940 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2941 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2942 "pmullw\t{%2, %0|%0, %2}"
2943 [(set_attr "type" "sseimul")
2944 (set_attr "prefix_data16" "1")
2945 (set_attr "mode" "TI")])
2947 (define_expand "smulv8hi3_highpart"
2948 [(set (match_operand:V8HI 0 "register_operand" "")
2953 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2955 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2958 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2960 (define_insn "*smulv8hi3_highpart"
2961 [(set (match_operand:V8HI 0 "register_operand" "=x")
2966 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2968 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2970 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2971 "pmulhw\t{%2, %0|%0, %2}"
2972 [(set_attr "type" "sseimul")
2973 (set_attr "prefix_data16" "1")
2974 (set_attr "mode" "TI")])
2976 (define_expand "umulv8hi3_highpart"
2977 [(set (match_operand:V8HI 0 "register_operand" "")
2982 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2984 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2987 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2989 (define_insn "*umulv8hi3_highpart"
2990 [(set (match_operand:V8HI 0 "register_operand" "=x")
2995 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2997 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2999 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3000 "pmulhuw\t{%2, %0|%0, %2}"
3001 [(set_attr "type" "sseimul")
3002 (set_attr "prefix_data16" "1")
3003 (set_attr "mode" "TI")])
3005 (define_insn "sse2_umulv2siv2di3"
3006 [(set (match_operand:V2DI 0 "register_operand" "=x")
3010 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3011 (parallel [(const_int 0) (const_int 2)])))
3014 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3015 (parallel [(const_int 0) (const_int 2)])))))]
3016 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3017 "pmuludq\t{%2, %0|%0, %2}"
3018 [(set_attr "type" "sseimul")
3019 (set_attr "prefix_data16" "1")
3020 (set_attr "mode" "TI")])
3022 (define_insn "sse4_1_mulv2siv2di3"
3023 [(set (match_operand:V2DI 0 "register_operand" "=x")
3027 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3028 (parallel [(const_int 0) (const_int 2)])))
3031 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3032 (parallel [(const_int 0) (const_int 2)])))))]
3033 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3034 "pmuldq\t{%2, %0|%0, %2}"
3035 [(set_attr "type" "sseimul")
3036 (set_attr "prefix_extra" "1")
3037 (set_attr "mode" "TI")])
3039 (define_insn "sse2_pmaddwd"
3040 [(set (match_operand:V4SI 0 "register_operand" "=x")
3045 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3046 (parallel [(const_int 0)
3052 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3053 (parallel [(const_int 0)
3059 (vec_select:V4HI (match_dup 1)
3060 (parallel [(const_int 1)
3065 (vec_select:V4HI (match_dup 2)
3066 (parallel [(const_int 1)
3069 (const_int 7)]))))))]
3070 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3071 "pmaddwd\t{%2, %0|%0, %2}"
3072 [(set_attr "type" "sseiadd")
3073 (set_attr "prefix_data16" "1")
3074 (set_attr "mode" "TI")])
3076 (define_expand "mulv4si3"
3077 [(set (match_operand:V4SI 0 "register_operand" "")
3078 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3079 (match_operand:V4SI 2 "register_operand" "")))]
3083 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3086 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3092 t1 = gen_reg_rtx (V4SImode);
3093 t2 = gen_reg_rtx (V4SImode);
3094 t3 = gen_reg_rtx (V4SImode);
3095 t4 = gen_reg_rtx (V4SImode);
3096 t5 = gen_reg_rtx (V4SImode);
3097 t6 = gen_reg_rtx (V4SImode);
3098 thirtytwo = GEN_INT (32);
3100 /* Multiply elements 2 and 0. */
3101 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3104 /* Shift both input vectors down one element, so that elements 3
3105 and 1 are now in the slots for elements 2 and 0. For K8, at
3106 least, this is faster than using a shuffle. */
3107 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3108 gen_lowpart (TImode, op1),
3110 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3111 gen_lowpart (TImode, op2),
3113 /* Multiply elements 3 and 1. */
3114 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3117 /* Move the results in element 2 down to element 1; we don't care
3118 what goes in elements 2 and 3. */
3119 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3120 const0_rtx, const0_rtx));
3121 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3122 const0_rtx, const0_rtx));
3124 /* Merge the parts back together. */
3125 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3130 (define_insn "*sse4_1_mulv4si3"
3131 [(set (match_operand:V4SI 0 "register_operand" "=x")
3132 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3133 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3134 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3135 "pmulld\t{%2, %0|%0, %2}"
3136 [(set_attr "type" "sseimul")
3137 (set_attr "prefix_extra" "1")
3138 (set_attr "mode" "TI")])
3140 (define_expand "mulv2di3"
3141 [(set (match_operand:V2DI 0 "register_operand" "")
3142 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3143 (match_operand:V2DI 2 "register_operand" "")))]
3146 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3152 t1 = gen_reg_rtx (V2DImode);
3153 t2 = gen_reg_rtx (V2DImode);
3154 t3 = gen_reg_rtx (V2DImode);
3155 t4 = gen_reg_rtx (V2DImode);
3156 t5 = gen_reg_rtx (V2DImode);
3157 t6 = gen_reg_rtx (V2DImode);
3158 thirtytwo = GEN_INT (32);
3160 /* Multiply low parts. */
3161 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3162 gen_lowpart (V4SImode, op2)));
3164 /* Shift input vectors left 32 bits so we can multiply high parts. */
3165 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3166 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3168 /* Multiply high parts by low parts. */
3169 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3170 gen_lowpart (V4SImode, t3)));
3171 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3172 gen_lowpart (V4SImode, t2)));
3174 /* Shift them back. */
3175 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3176 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3178 /* Add the three parts together. */
3179 emit_insn (gen_addv2di3 (t6, t1, t4));
3180 emit_insn (gen_addv2di3 (op0, t6, t5));
3184 (define_expand "vec_widen_smult_hi_v8hi"
3185 [(match_operand:V4SI 0 "register_operand" "")
3186 (match_operand:V8HI 1 "register_operand" "")
3187 (match_operand:V8HI 2 "register_operand" "")]
3190 rtx op1, op2, t1, t2, dest;
3194 t1 = gen_reg_rtx (V8HImode);
3195 t2 = gen_reg_rtx (V8HImode);
3196 dest = gen_lowpart (V8HImode, operands[0]);
3198 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3199 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3200 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3204 (define_expand "vec_widen_smult_lo_v8hi"
3205 [(match_operand:V4SI 0 "register_operand" "")
3206 (match_operand:V8HI 1 "register_operand" "")
3207 (match_operand:V8HI 2 "register_operand" "")]
3210 rtx op1, op2, t1, t2, dest;
3214 t1 = gen_reg_rtx (V8HImode);
3215 t2 = gen_reg_rtx (V8HImode);
3216 dest = gen_lowpart (V8HImode, operands[0]);
3218 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3219 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3220 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3224 (define_expand "vec_widen_umult_hi_v8hi"
3225 [(match_operand:V4SI 0 "register_operand" "")
3226 (match_operand:V8HI 1 "register_operand" "")
3227 (match_operand:V8HI 2 "register_operand" "")]
3230 rtx op1, op2, t1, t2, dest;
3234 t1 = gen_reg_rtx (V8HImode);
3235 t2 = gen_reg_rtx (V8HImode);
3236 dest = gen_lowpart (V8HImode, operands[0]);
3238 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3239 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3240 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3244 (define_expand "vec_widen_umult_lo_v8hi"
3245 [(match_operand:V4SI 0 "register_operand" "")
3246 (match_operand:V8HI 1 "register_operand" "")
3247 (match_operand:V8HI 2 "register_operand" "")]
3250 rtx op1, op2, t1, t2, dest;
3254 t1 = gen_reg_rtx (V8HImode);
3255 t2 = gen_reg_rtx (V8HImode);
3256 dest = gen_lowpart (V8HImode, operands[0]);
3258 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3259 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3260 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3264 (define_expand "vec_widen_smult_hi_v4si"
3265 [(match_operand:V2DI 0 "register_operand" "")
3266 (match_operand:V4SI 1 "register_operand" "")
3267 (match_operand:V4SI 2 "register_operand" "")]
3270 rtx op1, op2, t1, t2;
3274 t1 = gen_reg_rtx (V4SImode);
3275 t2 = gen_reg_rtx (V4SImode);
3277 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3278 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3279 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3283 (define_expand "vec_widen_smult_lo_v4si"
3284 [(match_operand:V2DI 0 "register_operand" "")
3285 (match_operand:V4SI 1 "register_operand" "")
3286 (match_operand:V4SI 2 "register_operand" "")]
3289 rtx op1, op2, t1, t2;
3293 t1 = gen_reg_rtx (V4SImode);
3294 t2 = gen_reg_rtx (V4SImode);
3296 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3297 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3298 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3302 (define_expand "vec_widen_umult_hi_v4si"
3303 [(match_operand:V2DI 0 "register_operand" "")
3304 (match_operand:V4SI 1 "register_operand" "")
3305 (match_operand:V4SI 2 "register_operand" "")]
3308 rtx op1, op2, t1, t2;
3312 t1 = gen_reg_rtx (V4SImode);
3313 t2 = gen_reg_rtx (V4SImode);
3315 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3316 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3317 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3321 (define_expand "vec_widen_umult_lo_v4si"
3322 [(match_operand:V2DI 0 "register_operand" "")
3323 (match_operand:V4SI 1 "register_operand" "")
3324 (match_operand:V4SI 2 "register_operand" "")]
3327 rtx op1, op2, t1, t2;
3331 t1 = gen_reg_rtx (V4SImode);
3332 t2 = gen_reg_rtx (V4SImode);
3334 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3335 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3336 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3340 (define_expand "sdot_prodv8hi"
3341 [(match_operand:V4SI 0 "register_operand" "")
3342 (match_operand:V8HI 1 "register_operand" "")
3343 (match_operand:V8HI 2 "register_operand" "")
3344 (match_operand:V4SI 3 "register_operand" "")]
3347 rtx t = gen_reg_rtx (V4SImode);
3348 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3349 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3353 (define_expand "udot_prodv4si"
3354 [(match_operand:V2DI 0 "register_operand" "")
3355 (match_operand:V4SI 1 "register_operand" "")
3356 (match_operand:V4SI 2 "register_operand" "")
3357 (match_operand:V2DI 3 "register_operand" "")]
3362 t1 = gen_reg_rtx (V2DImode);
3363 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3364 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3366 t2 = gen_reg_rtx (V4SImode);
3367 t3 = gen_reg_rtx (V4SImode);
3368 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3369 gen_lowpart (TImode, operands[1]),
3371 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3372 gen_lowpart (TImode, operands[2]),
3375 t4 = gen_reg_rtx (V2DImode);
3376 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3378 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3382 (define_insn "ashr<mode>3"
3383 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3385 (match_operand:SSEMODE24 1 "register_operand" "0")
3386 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3388 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3389 [(set_attr "type" "sseishft")
3390 (set_attr "prefix_data16" "1")
3391 (set_attr "mode" "TI")])
3393 (define_insn "lshr<mode>3"
3394 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3395 (lshiftrt:SSEMODE248
3396 (match_operand:SSEMODE248 1 "register_operand" "0")
3397 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3399 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3400 [(set_attr "type" "sseishft")
3401 (set_attr "prefix_data16" "1")
3402 (set_attr "mode" "TI")])
3404 (define_insn "ashl<mode>3"
3405 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3407 (match_operand:SSEMODE248 1 "register_operand" "0")
3408 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3410 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3411 [(set_attr "type" "sseishft")
3412 (set_attr "prefix_data16" "1")
3413 (set_attr "mode" "TI")])
3415 (define_expand "vec_shl_<mode>"
3416 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3417 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3418 (match_operand:SI 2 "general_operand" "")))]
3421 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3423 operands[0] = gen_lowpart (TImode, operands[0]);
3424 operands[1] = gen_lowpart (TImode, operands[1]);
3427 (define_expand "vec_shr_<mode>"
3428 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3429 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3430 (match_operand:SI 2 "general_operand" "")))]
3433 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3435 operands[0] = gen_lowpart (TImode, operands[0]);
3436 operands[1] = gen_lowpart (TImode, operands[1]);
3439 (define_expand "umaxv16qi3"
3440 [(set (match_operand:V16QI 0 "register_operand" "")
3441 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3442 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3444 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3446 (define_insn "*umaxv16qi3"
3447 [(set (match_operand:V16QI 0 "register_operand" "=x")
3448 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3449 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3450 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3451 "pmaxub\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sseiadd")
3453 (set_attr "prefix_data16" "1")
3454 (set_attr "mode" "TI")])
3456 (define_expand "smaxv8hi3"
3457 [(set (match_operand:V8HI 0 "register_operand" "")
3458 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3459 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3461 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3463 (define_insn "*smaxv8hi3"
3464 [(set (match_operand:V8HI 0 "register_operand" "=x")
3465 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3466 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3467 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3468 "pmaxsw\t{%2, %0|%0, %2}"
3469 [(set_attr "type" "sseiadd")
3470 (set_attr "prefix_data16" "1")
3471 (set_attr "mode" "TI")])
3473 (define_expand "umaxv8hi3"
3474 [(set (match_operand:V8HI 0 "register_operand" "")
3475 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3476 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3480 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3483 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3484 if (rtx_equal_p (op3, op2))
3485 op3 = gen_reg_rtx (V8HImode);
3486 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3487 emit_insn (gen_addv8hi3 (op0, op3, op2));
3492 (define_expand "smax<mode>3"
3493 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3494 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3495 (match_operand:SSEMODE14 2 "register_operand" "")))]
3499 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3505 xops[0] = operands[0];
3506 xops[1] = operands[1];
3507 xops[2] = operands[2];
3508 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3509 xops[4] = operands[1];
3510 xops[5] = operands[2];
3511 ok = ix86_expand_int_vcond (xops);
3517 (define_insn "*sse4_1_smax<mode>3"
3518 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3520 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3521 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3522 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3523 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3524 [(set_attr "type" "sseiadd")
3525 (set_attr "prefix_extra" "1")
3526 (set_attr "mode" "TI")])
3528 (define_expand "umaxv4si3"
3529 [(set (match_operand:V4SI 0 "register_operand" "")
3530 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3531 (match_operand:V4SI 2 "register_operand" "")))]
3535 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3541 xops[0] = operands[0];
3542 xops[1] = operands[1];
3543 xops[2] = operands[2];
3544 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3545 xops[4] = operands[1];
3546 xops[5] = operands[2];
3547 ok = ix86_expand_int_vcond (xops);
3553 (define_insn "*sse4_1_umax<mode>3"
3554 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3556 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3557 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3558 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3559 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3560 [(set_attr "type" "sseiadd")
3561 (set_attr "prefix_extra" "1")
3562 (set_attr "mode" "TI")])
3564 (define_expand "uminv16qi3"
3565 [(set (match_operand:V16QI 0 "register_operand" "")
3566 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3567 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3569 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3571 (define_insn "*uminv16qi3"
3572 [(set (match_operand:V16QI 0 "register_operand" "=x")
3573 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3574 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3575 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3576 "pminub\t{%2, %0|%0, %2}"
3577 [(set_attr "type" "sseiadd")
3578 (set_attr "prefix_data16" "1")
3579 (set_attr "mode" "TI")])
3581 (define_expand "sminv8hi3"
3582 [(set (match_operand:V8HI 0 "register_operand" "")
3583 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3584 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3586 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3588 (define_insn "*sminv8hi3"
3589 [(set (match_operand:V8HI 0 "register_operand" "=x")
3590 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3591 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3592 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3593 "pminsw\t{%2, %0|%0, %2}"
3594 [(set_attr "type" "sseiadd")
3595 (set_attr "prefix_data16" "1")
3596 (set_attr "mode" "TI")])
3598 (define_expand "smin<mode>3"
3599 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3600 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3601 (match_operand:SSEMODE14 2 "register_operand" "")))]
3605 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3611 xops[0] = operands[0];
3612 xops[1] = operands[2];
3613 xops[2] = operands[1];
3614 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3615 xops[4] = operands[1];
3616 xops[5] = operands[2];
3617 ok = ix86_expand_int_vcond (xops);
3623 (define_insn "*sse4_1_smin<mode>3"
3624 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3626 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3627 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3628 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3629 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3630 [(set_attr "type" "sseiadd")
3631 (set_attr "prefix_extra" "1")
3632 (set_attr "mode" "TI")])
3634 (define_expand "umin<mode>3"
3635 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3636 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3637 (match_operand:SSEMODE24 2 "register_operand" "")))]
3641 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3647 xops[0] = operands[0];
3648 xops[1] = operands[2];
3649 xops[2] = operands[1];
3650 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3651 xops[4] = operands[1];
3652 xops[5] = operands[2];
3653 ok = ix86_expand_int_vcond (xops);
3659 (define_insn "*sse4_1_umin<mode>3"
3660 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3662 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3663 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3664 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3665 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3666 [(set_attr "type" "sseiadd")
3667 (set_attr "prefix_extra" "1")
3668 (set_attr "mode" "TI")])
3670 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3672 ;; Parallel integral comparisons
3674 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3676 (define_insn "sse2_eq<mode>3"
3677 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3679 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3680 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3681 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3682 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3683 [(set_attr "type" "ssecmp")
3684 (set_attr "prefix_data16" "1")
3685 (set_attr "mode" "TI")])
3687 (define_insn "sse4_1_eqv2di3"
3688 [(set (match_operand:V2DI 0 "register_operand" "=x")
3690 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3691 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3692 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3693 "pcmpeqq\t{%2, %0|%0, %2}"
3694 [(set_attr "type" "ssecmp")
3695 (set_attr "prefix_extra" "1")
3696 (set_attr "mode" "TI")])
3698 (define_insn "sse2_gt<mode>3"
3699 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3701 (match_operand:SSEMODE124 1 "register_operand" "0")
3702 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3704 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3705 [(set_attr "type" "ssecmp")
3706 (set_attr "prefix_data16" "1")
3707 (set_attr "mode" "TI")])
3709 (define_insn "sse4_2_gtv2di3"
3710 [(set (match_operand:V2DI 0 "register_operand" "=x")
3712 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3713 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3715 "pcmpgtq\t{%2, %0|%0, %2}"
3716 [(set_attr "type" "ssecmp")
3717 (set_attr "mode" "TI")])
3719 (define_expand "vcond<mode>"
3720 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3721 (if_then_else:SSEMODEI
3722 (match_operator 3 ""
3723 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3724 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3725 (match_operand:SSEMODEI 1 "general_operand" "")
3726 (match_operand:SSEMODEI 2 "general_operand" "")))]
3729 if (ix86_expand_int_vcond (operands))
3735 (define_expand "vcondu<mode>"
3736 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3737 (if_then_else:SSEMODEI
3738 (match_operator 3 ""
3739 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3740 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3741 (match_operand:SSEMODEI 1 "general_operand" "")
3742 (match_operand:SSEMODEI 2 "general_operand" "")))]
3745 if (ix86_expand_int_vcond (operands))
3751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3753 ;; Parallel bitwise logical operations
3755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3757 (define_expand "one_cmpl<mode>2"
3758 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3759 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3763 int i, n = GET_MODE_NUNITS (<MODE>mode);
3764 rtvec v = rtvec_alloc (n);
3766 for (i = 0; i < n; ++i)
3767 RTVEC_ELT (v, i) = constm1_rtx;
3769 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3772 (define_expand "and<mode>3"
3773 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3774 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3775 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3777 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3779 (define_insn "*and<mode>3"
3780 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3782 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3783 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3784 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3785 "pand\t{%2, %0|%0, %2}"
3786 [(set_attr "type" "sselog")
3787 (set_attr "prefix_data16" "1")
3788 (set_attr "mode" "TI")])
3790 (define_insn "sse2_nand<mode>3"
3791 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3793 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3794 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3796 "pandn\t{%2, %0|%0, %2}"
3797 [(set_attr "type" "sselog")
3798 (set_attr "prefix_data16" "1")
3799 (set_attr "mode" "TI")])
3801 (define_expand "andtf3"
3802 [(set (match_operand:TF 0 "register_operand" "")
3803 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3804 (match_operand:TF 2 "nonimmediate_operand" "")))]
3806 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3808 (define_insn "*andtf3"
3809 [(set (match_operand:TF 0 "register_operand" "=x")
3811 (match_operand:TF 1 "nonimmediate_operand" "%0")
3812 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3813 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3814 "pand\t{%2, %0|%0, %2}"
3815 [(set_attr "type" "sselog")
3816 (set_attr "prefix_data16" "1")
3817 (set_attr "mode" "TI")])
3819 (define_insn "*nandtf3"
3820 [(set (match_operand:TF 0 "register_operand" "=x")
3822 (not:TF (match_operand:TF 1 "register_operand" "0"))
3823 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3825 "pandn\t{%2, %0|%0, %2}"
3826 [(set_attr "type" "sselog")
3827 (set_attr "prefix_data16" "1")
3828 (set_attr "mode" "TI")])
3830 (define_expand "ior<mode>3"
3831 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3832 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3833 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3835 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3837 (define_insn "*ior<mode>3"
3838 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3840 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3841 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3842 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3843 "por\t{%2, %0|%0, %2}"
3844 [(set_attr "type" "sselog")
3845 (set_attr "prefix_data16" "1")
3846 (set_attr "mode" "TI")])
3848 (define_expand "iortf3"
3849 [(set (match_operand:TF 0 "register_operand" "")
3850 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3851 (match_operand:TF 2 "nonimmediate_operand" "")))]
3853 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3855 (define_insn "*iortf3"
3856 [(set (match_operand:TF 0 "register_operand" "=x")
3858 (match_operand:TF 1 "nonimmediate_operand" "%0")
3859 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3860 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3861 "por\t{%2, %0|%0, %2}"
3862 [(set_attr "type" "sselog")
3863 (set_attr "prefix_data16" "1")
3864 (set_attr "mode" "TI")])
3866 (define_expand "xor<mode>3"
3867 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3868 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3869 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3871 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3873 (define_insn "*xor<mode>3"
3874 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3876 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3877 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3878 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3879 "pxor\t{%2, %0|%0, %2}"
3880 [(set_attr "type" "sselog")
3881 (set_attr "prefix_data16" "1")
3882 (set_attr "mode" "TI")])
3884 (define_expand "xortf3"
3885 [(set (match_operand:TF 0 "register_operand" "")
3886 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3887 (match_operand:TF 2 "nonimmediate_operand" "")))]
3889 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3891 (define_insn "*xortf3"
3892 [(set (match_operand:TF 0 "register_operand" "=x")
3894 (match_operand:TF 1 "nonimmediate_operand" "%0")
3895 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3896 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3897 "pxor\t{%2, %0|%0, %2}"
3898 [(set_attr "type" "sselog")
3899 (set_attr "prefix_data16" "1")
3900 (set_attr "mode" "TI")])
3902 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3904 ;; Parallel integral element swizzling
3906 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3909 ;; op1 = abcdefghijklmnop
3910 ;; op2 = qrstuvwxyz012345
3911 ;; h1 = aqbrcsdteufvgwhx
3912 ;; l1 = iyjzk0l1m2n3o4p5
3913 ;; h2 = aiqybjrzcks0dlt1
3914 ;; l2 = emu2fnv3gow4hpx5
3915 ;; h3 = aeimquy2bfjnrvz3
3916 ;; l3 = cgkosw04dhlptx15
3917 ;; result = bdfhjlnprtvxz135
3918 (define_expand "vec_pack_trunc_v8hi"
3919 [(match_operand:V16QI 0 "register_operand" "")
3920 (match_operand:V8HI 1 "register_operand" "")
3921 (match_operand:V8HI 2 "register_operand" "")]
3924 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3926 op1 = gen_lowpart (V16QImode, operands[1]);
3927 op2 = gen_lowpart (V16QImode, operands[2]);
3928 h1 = gen_reg_rtx (V16QImode);
3929 l1 = gen_reg_rtx (V16QImode);
3930 h2 = gen_reg_rtx (V16QImode);
3931 l2 = gen_reg_rtx (V16QImode);
3932 h3 = gen_reg_rtx (V16QImode);
3933 l3 = gen_reg_rtx (V16QImode);
3935 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3936 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3937 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3938 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3939 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3940 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3941 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3952 ;; result = bdfhjlnp
3953 (define_expand "vec_pack_trunc_v4si"
3954 [(match_operand:V8HI 0 "register_operand" "")
3955 (match_operand:V4SI 1 "register_operand" "")
3956 (match_operand:V4SI 2 "register_operand" "")]
3959 rtx op1, op2, h1, l1, h2, l2;
3961 op1 = gen_lowpart (V8HImode, operands[1]);
3962 op2 = gen_lowpart (V8HImode, operands[2]);
3963 h1 = gen_reg_rtx (V8HImode);
3964 l1 = gen_reg_rtx (V8HImode);
3965 h2 = gen_reg_rtx (V8HImode);
3966 l2 = gen_reg_rtx (V8HImode);
3968 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3969 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3970 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3971 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3972 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3982 (define_expand "vec_pack_trunc_v2di"
3983 [(match_operand:V4SI 0 "register_operand" "")
3984 (match_operand:V2DI 1 "register_operand" "")
3985 (match_operand:V2DI 2 "register_operand" "")]
3988 rtx op1, op2, h1, l1;
3990 op1 = gen_lowpart (V4SImode, operands[1]);
3991 op2 = gen_lowpart (V4SImode, operands[2]);
3992 h1 = gen_reg_rtx (V4SImode);
3993 l1 = gen_reg_rtx (V4SImode);
3995 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3996 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3997 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4001 (define_expand "vec_interleave_highv16qi"
4002 [(set (match_operand:V16QI 0 "register_operand" "=x")
4005 (match_operand:V16QI 1 "register_operand" "0")
4006 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4007 (parallel [(const_int 8) (const_int 24)
4008 (const_int 9) (const_int 25)
4009 (const_int 10) (const_int 26)
4010 (const_int 11) (const_int 27)
4011 (const_int 12) (const_int 28)
4012 (const_int 13) (const_int 29)
4013 (const_int 14) (const_int 30)
4014 (const_int 15) (const_int 31)])))]
4017 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4021 (define_expand "vec_interleave_lowv16qi"
4022 [(set (match_operand:V16QI 0 "register_operand" "=x")
4025 (match_operand:V16QI 1 "register_operand" "0")
4026 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4027 (parallel [(const_int 0) (const_int 16)
4028 (const_int 1) (const_int 17)
4029 (const_int 2) (const_int 18)
4030 (const_int 3) (const_int 19)
4031 (const_int 4) (const_int 20)
4032 (const_int 5) (const_int 21)
4033 (const_int 6) (const_int 22)
4034 (const_int 7) (const_int 23)])))]
4037 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4041 (define_expand "vec_interleave_highv8hi"
4042 [(set (match_operand:V8HI 0 "register_operand" "=x")
4045 (match_operand:V8HI 1 "register_operand" "0")
4046 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4047 (parallel [(const_int 4) (const_int 12)
4048 (const_int 5) (const_int 13)
4049 (const_int 6) (const_int 14)
4050 (const_int 7) (const_int 15)])))]
4053 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4057 (define_expand "vec_interleave_lowv8hi"
4058 [(set (match_operand:V8HI 0 "register_operand" "=x")
4061 (match_operand:V8HI 1 "register_operand" "0")
4062 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4063 (parallel [(const_int 0) (const_int 8)
4064 (const_int 1) (const_int 9)
4065 (const_int 2) (const_int 10)
4066 (const_int 3) (const_int 11)])))]
4069 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4073 (define_expand "vec_interleave_highv4si"
4074 [(set (match_operand:V4SI 0 "register_operand" "=x")
4077 (match_operand:V4SI 1 "register_operand" "0")
4078 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4079 (parallel [(const_int 2) (const_int 6)
4080 (const_int 3) (const_int 7)])))]
4083 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4087 (define_expand "vec_interleave_lowv4si"
4088 [(set (match_operand:V4SI 0 "register_operand" "=x")
4091 (match_operand:V4SI 1 "register_operand" "0")
4092 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4093 (parallel [(const_int 0) (const_int 4)
4094 (const_int 1) (const_int 5)])))]
4097 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4101 (define_expand "vec_interleave_highv2di"
4102 [(set (match_operand:V2DI 0 "register_operand" "=x")
4105 (match_operand:V2DI 1 "register_operand" "0")
4106 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4107 (parallel [(const_int 1)
4111 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4115 (define_expand "vec_interleave_lowv2di"
4116 [(set (match_operand:V2DI 0 "register_operand" "=x")
4119 (match_operand:V2DI 1 "register_operand" "0")
4120 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4121 (parallel [(const_int 0)
4125 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4129 (define_insn "sse2_packsswb"
4130 [(set (match_operand:V16QI 0 "register_operand" "=x")
4133 (match_operand:V8HI 1 "register_operand" "0"))
4135 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4137 "packsswb\t{%2, %0|%0, %2}"
4138 [(set_attr "type" "sselog")
4139 (set_attr "prefix_data16" "1")
4140 (set_attr "mode" "TI")])
4142 (define_insn "sse2_packssdw"
4143 [(set (match_operand:V8HI 0 "register_operand" "=x")
4146 (match_operand:V4SI 1 "register_operand" "0"))
4148 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4150 "packssdw\t{%2, %0|%0, %2}"
4151 [(set_attr "type" "sselog")
4152 (set_attr "prefix_data16" "1")
4153 (set_attr "mode" "TI")])
4155 (define_insn "sse2_packuswb"
4156 [(set (match_operand:V16QI 0 "register_operand" "=x")
4159 (match_operand:V8HI 1 "register_operand" "0"))
4161 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4163 "packuswb\t{%2, %0|%0, %2}"
4164 [(set_attr "type" "sselog")
4165 (set_attr "prefix_data16" "1")
4166 (set_attr "mode" "TI")])
4168 (define_insn "sse2_punpckhbw"
4169 [(set (match_operand:V16QI 0 "register_operand" "=x")
4172 (match_operand:V16QI 1 "register_operand" "0")
4173 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4174 (parallel [(const_int 8) (const_int 24)
4175 (const_int 9) (const_int 25)
4176 (const_int 10) (const_int 26)
4177 (const_int 11) (const_int 27)
4178 (const_int 12) (const_int 28)
4179 (const_int 13) (const_int 29)
4180 (const_int 14) (const_int 30)
4181 (const_int 15) (const_int 31)])))]
4183 "punpckhbw\t{%2, %0|%0, %2}"
4184 [(set_attr "type" "sselog")
4185 (set_attr "prefix_data16" "1")
4186 (set_attr "mode" "TI")])
4188 (define_insn "sse2_punpcklbw"
4189 [(set (match_operand:V16QI 0 "register_operand" "=x")
4192 (match_operand:V16QI 1 "register_operand" "0")
4193 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4194 (parallel [(const_int 0) (const_int 16)
4195 (const_int 1) (const_int 17)
4196 (const_int 2) (const_int 18)
4197 (const_int 3) (const_int 19)
4198 (const_int 4) (const_int 20)
4199 (const_int 5) (const_int 21)
4200 (const_int 6) (const_int 22)
4201 (const_int 7) (const_int 23)])))]
4203 "punpcklbw\t{%2, %0|%0, %2}"
4204 [(set_attr "type" "sselog")
4205 (set_attr "prefix_data16" "1")
4206 (set_attr "mode" "TI")])
4208 (define_insn "sse2_punpckhwd"
4209 [(set (match_operand:V8HI 0 "register_operand" "=x")
4212 (match_operand:V8HI 1 "register_operand" "0")
4213 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4214 (parallel [(const_int 4) (const_int 12)
4215 (const_int 5) (const_int 13)
4216 (const_int 6) (const_int 14)
4217 (const_int 7) (const_int 15)])))]
4219 "punpckhwd\t{%2, %0|%0, %2}"
4220 [(set_attr "type" "sselog")
4221 (set_attr "prefix_data16" "1")
4222 (set_attr "mode" "TI")])
4224 (define_insn "sse2_punpcklwd"
4225 [(set (match_operand:V8HI 0 "register_operand" "=x")
4228 (match_operand:V8HI 1 "register_operand" "0")
4229 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4230 (parallel [(const_int 0) (const_int 8)
4231 (const_int 1) (const_int 9)
4232 (const_int 2) (const_int 10)
4233 (const_int 3) (const_int 11)])))]
4235 "punpcklwd\t{%2, %0|%0, %2}"
4236 [(set_attr "type" "sselog")
4237 (set_attr "prefix_data16" "1")
4238 (set_attr "mode" "TI")])
4240 (define_insn "sse2_punpckhdq"
4241 [(set (match_operand:V4SI 0 "register_operand" "=x")
4244 (match_operand:V4SI 1 "register_operand" "0")
4245 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4246 (parallel [(const_int 2) (const_int 6)
4247 (const_int 3) (const_int 7)])))]
4249 "punpckhdq\t{%2, %0|%0, %2}"
4250 [(set_attr "type" "sselog")
4251 (set_attr "prefix_data16" "1")
4252 (set_attr "mode" "TI")])
4254 (define_insn "sse2_punpckldq"
4255 [(set (match_operand:V4SI 0 "register_operand" "=x")
4258 (match_operand:V4SI 1 "register_operand" "0")
4259 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4260 (parallel [(const_int 0) (const_int 4)
4261 (const_int 1) (const_int 5)])))]
4263 "punpckldq\t{%2, %0|%0, %2}"
4264 [(set_attr "type" "sselog")
4265 (set_attr "prefix_data16" "1")
4266 (set_attr "mode" "TI")])
4268 (define_insn "sse2_punpckhqdq"
4269 [(set (match_operand:V2DI 0 "register_operand" "=x")
4272 (match_operand:V2DI 1 "register_operand" "0")
4273 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4274 (parallel [(const_int 1)
4277 "punpckhqdq\t{%2, %0|%0, %2}"
4278 [(set_attr "type" "sselog")
4279 (set_attr "prefix_data16" "1")
4280 (set_attr "mode" "TI")])
4282 (define_insn "sse2_punpcklqdq"
4283 [(set (match_operand:V2DI 0 "register_operand" "=x")
4286 (match_operand:V2DI 1 "register_operand" "0")
4287 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4288 (parallel [(const_int 0)
4291 "punpcklqdq\t{%2, %0|%0, %2}"
4292 [(set_attr "type" "sselog")
4293 (set_attr "prefix_data16" "1")
4294 (set_attr "mode" "TI")])
4296 (define_insn "*sse4_1_pinsrb"
4297 [(set (match_operand:V16QI 0 "register_operand" "=x")
4299 (vec_duplicate:V16QI
4300 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4301 (match_operand:V16QI 1 "register_operand" "0")
4302 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4305 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4306 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4308 [(set_attr "type" "sselog")
4309 (set_attr "prefix_extra" "1")
4310 (set_attr "mode" "TI")])
4312 (define_insn "*sse2_pinsrw"
4313 [(set (match_operand:V8HI 0 "register_operand" "=x")
4316 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4317 (match_operand:V8HI 1 "register_operand" "0")
4318 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4321 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4322 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4324 [(set_attr "type" "sselog")
4325 (set_attr "prefix_data16" "1")
4326 (set_attr "mode" "TI")])
4328 ;; It must come before sse2_loadld since it is preferred.
4329 (define_insn "*sse4_1_pinsrd"
4330 [(set (match_operand:V4SI 0 "register_operand" "=x")
4333 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4334 (match_operand:V4SI 1 "register_operand" "0")
4335 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4338 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4339 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4341 [(set_attr "type" "sselog")
4342 (set_attr "prefix_extra" "1")
4343 (set_attr "mode" "TI")])
4345 (define_insn "*sse4_1_pinsrq"
4346 [(set (match_operand:V2DI 0 "register_operand" "=x")
4349 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4350 (match_operand:V2DI 1 "register_operand" "0")
4351 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4354 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4355 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4357 [(set_attr "type" "sselog")
4358 (set_attr "prefix_extra" "1")
4359 (set_attr "mode" "TI")])
4361 (define_insn "*sse4_1_pextrb"
4362 [(set (match_operand:SI 0 "register_operand" "=r")
4365 (match_operand:V16QI 1 "register_operand" "x")
4366 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4368 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4369 [(set_attr "type" "sselog")
4370 (set_attr "prefix_extra" "1")
4371 (set_attr "mode" "TI")])
4373 (define_insn "*sse4_1_pextrb_memory"
4374 [(set (match_operand:QI 0 "memory_operand" "=m")
4376 (match_operand:V16QI 1 "register_operand" "x")
4377 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4379 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4380 [(set_attr "type" "sselog")
4381 (set_attr "prefix_extra" "1")
4382 (set_attr "mode" "TI")])
4384 (define_insn "*sse2_pextrw"
4385 [(set (match_operand:SI 0 "register_operand" "=r")
4388 (match_operand:V8HI 1 "register_operand" "x")
4389 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4391 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4392 [(set_attr "type" "sselog")
4393 (set_attr "prefix_data16" "1")
4394 (set_attr "mode" "TI")])
4396 (define_insn "*sse4_1_pextrw_memory"
4397 [(set (match_operand:HI 0 "memory_operand" "=m")
4399 (match_operand:V8HI 1 "register_operand" "x")
4400 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4402 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4403 [(set_attr "type" "sselog")
4404 (set_attr "prefix_extra" "1")
4405 (set_attr "mode" "TI")])
4407 (define_insn "*sse4_1_pextrd"
4408 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4410 (match_operand:V4SI 1 "register_operand" "x")
4411 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4413 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4414 [(set_attr "type" "sselog")
4415 (set_attr "prefix_extra" "1")
4416 (set_attr "mode" "TI")])
4418 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4419 (define_insn "*sse4_1_pextrq"
4420 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4422 (match_operand:V2DI 1 "register_operand" "x")
4423 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4424 "TARGET_SSE4_1 && TARGET_64BIT"
4425 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4426 [(set_attr "type" "sselog")
4427 (set_attr "prefix_extra" "1")
4428 (set_attr "mode" "TI")])
4430 (define_expand "sse2_pshufd"
4431 [(match_operand:V4SI 0 "register_operand" "")
4432 (match_operand:V4SI 1 "nonimmediate_operand" "")
4433 (match_operand:SI 2 "const_int_operand" "")]
4436 int mask = INTVAL (operands[2]);
4437 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4438 GEN_INT ((mask >> 0) & 3),
4439 GEN_INT ((mask >> 2) & 3),
4440 GEN_INT ((mask >> 4) & 3),
4441 GEN_INT ((mask >> 6) & 3)));
4445 (define_insn "sse2_pshufd_1"
4446 [(set (match_operand:V4SI 0 "register_operand" "=x")
4448 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4449 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4450 (match_operand 3 "const_0_to_3_operand" "")
4451 (match_operand 4 "const_0_to_3_operand" "")
4452 (match_operand 5 "const_0_to_3_operand" "")])))]
4456 mask |= INTVAL (operands[2]) << 0;
4457 mask |= INTVAL (operands[3]) << 2;
4458 mask |= INTVAL (operands[4]) << 4;
4459 mask |= INTVAL (operands[5]) << 6;
4460 operands[2] = GEN_INT (mask);
4462 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4464 [(set_attr "type" "sselog1")
4465 (set_attr "prefix_data16" "1")
4466 (set_attr "mode" "TI")])
4468 (define_expand "sse2_pshuflw"
4469 [(match_operand:V8HI 0 "register_operand" "")
4470 (match_operand:V8HI 1 "nonimmediate_operand" "")
4471 (match_operand:SI 2 "const_int_operand" "")]
4474 int mask = INTVAL (operands[2]);
4475 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4476 GEN_INT ((mask >> 0) & 3),
4477 GEN_INT ((mask >> 2) & 3),
4478 GEN_INT ((mask >> 4) & 3),
4479 GEN_INT ((mask >> 6) & 3)));
4483 (define_insn "sse2_pshuflw_1"
4484 [(set (match_operand:V8HI 0 "register_operand" "=x")
4486 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4487 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4488 (match_operand 3 "const_0_to_3_operand" "")
4489 (match_operand 4 "const_0_to_3_operand" "")
4490 (match_operand 5 "const_0_to_3_operand" "")
4498 mask |= INTVAL (operands[2]) << 0;
4499 mask |= INTVAL (operands[3]) << 2;
4500 mask |= INTVAL (operands[4]) << 4;
4501 mask |= INTVAL (operands[5]) << 6;
4502 operands[2] = GEN_INT (mask);
4504 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4506 [(set_attr "type" "sselog")
4507 (set_attr "prefix_rep" "1")
4508 (set_attr "mode" "TI")])
4510 (define_expand "sse2_pshufhw"
4511 [(match_operand:V8HI 0 "register_operand" "")
4512 (match_operand:V8HI 1 "nonimmediate_operand" "")
4513 (match_operand:SI 2 "const_int_operand" "")]
4516 int mask = INTVAL (operands[2]);
4517 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4518 GEN_INT (((mask >> 0) & 3) + 4),
4519 GEN_INT (((mask >> 2) & 3) + 4),
4520 GEN_INT (((mask >> 4) & 3) + 4),
4521 GEN_INT (((mask >> 6) & 3) + 4)));
4525 (define_insn "sse2_pshufhw_1"
4526 [(set (match_operand:V8HI 0 "register_operand" "=x")
4528 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4529 (parallel [(const_int 0)
4533 (match_operand 2 "const_4_to_7_operand" "")
4534 (match_operand 3 "const_4_to_7_operand" "")
4535 (match_operand 4 "const_4_to_7_operand" "")
4536 (match_operand 5 "const_4_to_7_operand" "")])))]
4540 mask |= (INTVAL (operands[2]) - 4) << 0;
4541 mask |= (INTVAL (operands[3]) - 4) << 2;
4542 mask |= (INTVAL (operands[4]) - 4) << 4;
4543 mask |= (INTVAL (operands[5]) - 4) << 6;
4544 operands[2] = GEN_INT (mask);
4546 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4548 [(set_attr "type" "sselog")
4549 (set_attr "prefix_rep" "1")
4550 (set_attr "mode" "TI")])
4552 (define_expand "sse2_loadd"
4553 [(set (match_operand:V4SI 0 "register_operand" "")
4556 (match_operand:SI 1 "nonimmediate_operand" ""))
4560 "operands[2] = CONST0_RTX (V4SImode);")
4562 (define_insn "sse2_loadld"
4563 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4566 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4567 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4571 movd\t{%2, %0|%0, %2}
4572 movd\t{%2, %0|%0, %2}
4573 movss\t{%2, %0|%0, %2}
4574 movss\t{%2, %0|%0, %2}"
4575 [(set_attr "type" "ssemov")
4576 (set_attr "mode" "TI,TI,V4SF,SF")])
4578 (define_insn_and_split "sse2_stored"
4579 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4581 (match_operand:V4SI 1 "register_operand" "x,Yi")
4582 (parallel [(const_int 0)])))]
4585 "&& reload_completed
4586 && (TARGET_INTER_UNIT_MOVES
4587 || MEM_P (operands [0])
4588 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4589 [(set (match_dup 0) (match_dup 1))]
4591 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4594 (define_expand "sse_storeq"
4595 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4597 (match_operand:V2DI 1 "register_operand" "")
4598 (parallel [(const_int 0)])))]
4602 (define_insn "*sse2_storeq_rex64"
4603 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4605 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4606 (parallel [(const_int 0)])))]
4607 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4611 mov{q}\t{%1, %0|%0, %1}"
4612 [(set_attr "type" "*,*,imov")
4613 (set_attr "mode" "*,*,DI")])
4615 (define_insn "*sse2_storeq"
4616 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4618 (match_operand:V2DI 1 "register_operand" "x")
4619 (parallel [(const_int 0)])))]
4624 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4626 (match_operand:V2DI 1 "register_operand" "")
4627 (parallel [(const_int 0)])))]
4630 && (TARGET_INTER_UNIT_MOVES
4631 || MEM_P (operands [0])
4632 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4633 [(set (match_dup 0) (match_dup 1))]
4635 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4638 (define_insn "*vec_extractv2di_1_rex64"
4639 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4641 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4642 (parallel [(const_int 1)])))]
4643 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4645 movhps\t{%1, %0|%0, %1}
4646 psrldq\t{$8, %0|%0, 8}
4647 movq\t{%H1, %0|%0, %H1}
4648 mov{q}\t{%H1, %0|%0, %H1}"
4649 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4650 (set_attr "memory" "*,none,*,*")
4651 (set_attr "mode" "V2SF,TI,TI,DI")])
4653 (define_insn "*vec_extractv2di_1_sse2"
4654 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4656 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4657 (parallel [(const_int 1)])))]
4659 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4661 movhps\t{%1, %0|%0, %1}
4662 psrldq\t{$8, %0|%0, 8}
4663 movq\t{%H1, %0|%0, %H1}"
4664 [(set_attr "type" "ssemov,sseishft,ssemov")
4665 (set_attr "memory" "*,none,*")
4666 (set_attr "mode" "V2SF,TI,TI")])
4668 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4669 (define_insn "*vec_extractv2di_1_sse"
4670 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4672 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4673 (parallel [(const_int 1)])))]
4674 "!TARGET_SSE2 && TARGET_SSE
4675 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4677 movhps\t{%1, %0|%0, %1}
4678 movhlps\t{%1, %0|%0, %1}
4679 movlps\t{%H1, %0|%0, %H1}"
4680 [(set_attr "type" "ssemov")
4681 (set_attr "mode" "V2SF,V4SF,V2SF")])
4683 (define_insn "*vec_dupv4si"
4684 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4686 (match_operand:SI 1 "register_operand" " Yt,0")))]
4689 pshufd\t{$0, %1, %0|%0, %1, 0}
4690 shufps\t{$0, %0, %0|%0, %0, 0}"
4691 [(set_attr "type" "sselog1")
4692 (set_attr "mode" "TI,V4SF")])
4694 (define_insn "*vec_dupv2di"
4695 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4697 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4702 [(set_attr "type" "sselog1,ssemov")
4703 (set_attr "mode" "TI,V4SF")])
4705 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4706 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4707 ;; alternatives pretty much forces the MMX alternative to be chosen.
4708 (define_insn "*sse2_concatv2si"
4709 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4711 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4712 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4715 punpckldq\t{%2, %0|%0, %2}
4716 movd\t{%1, %0|%0, %1}
4717 punpckldq\t{%2, %0|%0, %2}
4718 movd\t{%1, %0|%0, %1}"
4719 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4720 (set_attr "mode" "TI,TI,DI,DI")])
4722 (define_insn "*sse1_concatv2si"
4723 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4725 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4726 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4729 unpcklps\t{%2, %0|%0, %2}
4730 movss\t{%1, %0|%0, %1}
4731 punpckldq\t{%2, %0|%0, %2}
4732 movd\t{%1, %0|%0, %1}"
4733 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4734 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4736 (define_insn "*vec_concatv4si_1"
4737 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4739 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4740 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4743 punpcklqdq\t{%2, %0|%0, %2}
4744 movlhps\t{%2, %0|%0, %2}
4745 movhps\t{%2, %0|%0, %2}"
4746 [(set_attr "type" "sselog,ssemov,ssemov")
4747 (set_attr "mode" "TI,V4SF,V2SF")])
4749 (define_insn "vec_concatv2di"
4750 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4752 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4753 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4754 "!TARGET_64BIT && TARGET_SSE"
4756 movq\t{%1, %0|%0, %1}
4757 movq2dq\t{%1, %0|%0, %1}
4758 punpcklqdq\t{%2, %0|%0, %2}
4759 movlhps\t{%2, %0|%0, %2}
4760 movhps\t{%2, %0|%0, %2}
4761 movlps\t{%1, %0|%0, %1}"
4762 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4763 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4765 (define_insn "*vec_concatv2di_rex"
4766 [(set (match_operand:V2DI 0 "register_operand" "=Yt,Yi,!Yt,Yt,x,x,x")
4768 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4769 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Yt,x,m,0")))]
4772 movq\t{%1, %0|%0, %1}
4773 movq\t{%1, %0|%0, %1}
4774 movq2dq\t{%1, %0|%0, %1}
4775 punpcklqdq\t{%2, %0|%0, %2}
4776 movlhps\t{%2, %0|%0, %2}
4777 movhps\t{%2, %0|%0, %2}
4778 movlps\t{%1, %0|%0, %1}"
4779 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4780 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4782 (define_expand "vec_setv2di"
4783 [(match_operand:V2DI 0 "register_operand" "")
4784 (match_operand:DI 1 "register_operand" "")
4785 (match_operand 2 "const_int_operand" "")]
4788 ix86_expand_vector_set (false, operands[0], operands[1],
4789 INTVAL (operands[2]));
4793 (define_expand "vec_extractv2di"
4794 [(match_operand:DI 0 "register_operand" "")
4795 (match_operand:V2DI 1 "register_operand" "")
4796 (match_operand 2 "const_int_operand" "")]
4799 ix86_expand_vector_extract (false, operands[0], operands[1],
4800 INTVAL (operands[2]));
4804 (define_expand "vec_initv2di"
4805 [(match_operand:V2DI 0 "register_operand" "")
4806 (match_operand 1 "" "")]
4809 ix86_expand_vector_init (false, operands[0], operands[1]);
4813 (define_expand "vec_setv4si"
4814 [(match_operand:V4SI 0 "register_operand" "")
4815 (match_operand:SI 1 "register_operand" "")
4816 (match_operand 2 "const_int_operand" "")]
4819 ix86_expand_vector_set (false, operands[0], operands[1],
4820 INTVAL (operands[2]));
4824 (define_expand "vec_extractv4si"
4825 [(match_operand:SI 0 "register_operand" "")
4826 (match_operand:V4SI 1 "register_operand" "")
4827 (match_operand 2 "const_int_operand" "")]
4830 ix86_expand_vector_extract (false, operands[0], operands[1],
4831 INTVAL (operands[2]));
4835 (define_expand "vec_initv4si"
4836 [(match_operand:V4SI 0 "register_operand" "")
4837 (match_operand 1 "" "")]
4840 ix86_expand_vector_init (false, operands[0], operands[1]);
4844 (define_expand "vec_setv8hi"
4845 [(match_operand:V8HI 0 "register_operand" "")
4846 (match_operand:HI 1 "register_operand" "")
4847 (match_operand 2 "const_int_operand" "")]
4850 ix86_expand_vector_set (false, operands[0], operands[1],
4851 INTVAL (operands[2]));
4855 (define_expand "vec_extractv8hi"
4856 [(match_operand:HI 0 "register_operand" "")
4857 (match_operand:V8HI 1 "register_operand" "")
4858 (match_operand 2 "const_int_operand" "")]
4861 ix86_expand_vector_extract (false, operands[0], operands[1],
4862 INTVAL (operands[2]));
4866 (define_expand "vec_initv8hi"
4867 [(match_operand:V8HI 0 "register_operand" "")
4868 (match_operand 1 "" "")]
4871 ix86_expand_vector_init (false, operands[0], operands[1]);
4875 (define_expand "vec_setv16qi"
4876 [(match_operand:V16QI 0 "register_operand" "")
4877 (match_operand:QI 1 "register_operand" "")
4878 (match_operand 2 "const_int_operand" "")]
4881 ix86_expand_vector_set (false, operands[0], operands[1],
4882 INTVAL (operands[2]));
4886 (define_expand "vec_extractv16qi"
4887 [(match_operand:QI 0 "register_operand" "")
4888 (match_operand:V16QI 1 "register_operand" "")
4889 (match_operand 2 "const_int_operand" "")]
4892 ix86_expand_vector_extract (false, operands[0], operands[1],
4893 INTVAL (operands[2]));
4897 (define_expand "vec_initv16qi"
4898 [(match_operand:V16QI 0 "register_operand" "")
4899 (match_operand 1 "" "")]
4902 ix86_expand_vector_init (false, operands[0], operands[1]);
4906 (define_expand "vec_unpacku_hi_v16qi"
4907 [(match_operand:V8HI 0 "register_operand" "")
4908 (match_operand:V16QI 1 "register_operand" "")]
4912 ix86_expand_sse4_unpack (operands, true, true);
4914 ix86_expand_sse_unpack (operands, true, true);
4918 (define_expand "vec_unpacks_hi_v16qi"
4919 [(match_operand:V8HI 0 "register_operand" "")
4920 (match_operand:V16QI 1 "register_operand" "")]
4924 ix86_expand_sse4_unpack (operands, false, true);
4926 ix86_expand_sse_unpack (operands, false, true);
4930 (define_expand "vec_unpacku_lo_v16qi"
4931 [(match_operand:V8HI 0 "register_operand" "")
4932 (match_operand:V16QI 1 "register_operand" "")]
4936 ix86_expand_sse4_unpack (operands, true, false);
4938 ix86_expand_sse_unpack (operands, true, false);
4942 (define_expand "vec_unpacks_lo_v16qi"
4943 [(match_operand:V8HI 0 "register_operand" "")
4944 (match_operand:V16QI 1 "register_operand" "")]
4948 ix86_expand_sse4_unpack (operands, false, false);
4950 ix86_expand_sse_unpack (operands, false, false);
4954 (define_expand "vec_unpacku_hi_v8hi"
4955 [(match_operand:V4SI 0 "register_operand" "")
4956 (match_operand:V8HI 1 "register_operand" "")]
4960 ix86_expand_sse4_unpack (operands, true, true);
4962 ix86_expand_sse_unpack (operands, true, true);
4966 (define_expand "vec_unpacks_hi_v8hi"
4967 [(match_operand:V4SI 0 "register_operand" "")
4968 (match_operand:V8HI 1 "register_operand" "")]
4972 ix86_expand_sse4_unpack (operands, false, true);
4974 ix86_expand_sse_unpack (operands, false, true);
4978 (define_expand "vec_unpacku_lo_v8hi"
4979 [(match_operand:V4SI 0 "register_operand" "")
4980 (match_operand:V8HI 1 "register_operand" "")]
4984 ix86_expand_sse4_unpack (operands, true, false);
4986 ix86_expand_sse_unpack (operands, true, false);
4990 (define_expand "vec_unpacks_lo_v8hi"
4991 [(match_operand:V4SI 0 "register_operand" "")
4992 (match_operand:V8HI 1 "register_operand" "")]
4996 ix86_expand_sse4_unpack (operands, false, false);
4998 ix86_expand_sse_unpack (operands, false, false);
5002 (define_expand "vec_unpacku_hi_v4si"
5003 [(match_operand:V2DI 0 "register_operand" "")
5004 (match_operand:V4SI 1 "register_operand" "")]
5008 ix86_expand_sse4_unpack (operands, true, true);
5010 ix86_expand_sse_unpack (operands, true, true);
5014 (define_expand "vec_unpacks_hi_v4si"
5015 [(match_operand:V2DI 0 "register_operand" "")
5016 (match_operand:V4SI 1 "register_operand" "")]
5020 ix86_expand_sse4_unpack (operands, false, true);
5022 ix86_expand_sse_unpack (operands, false, true);
5026 (define_expand "vec_unpacku_lo_v4si"
5027 [(match_operand:V2DI 0 "register_operand" "")
5028 (match_operand:V4SI 1 "register_operand" "")]
5032 ix86_expand_sse4_unpack (operands, true, false);
5034 ix86_expand_sse_unpack (operands, true, false);
5038 (define_expand "vec_unpacks_lo_v4si"
5039 [(match_operand:V2DI 0 "register_operand" "")
5040 (match_operand:V4SI 1 "register_operand" "")]
5044 ix86_expand_sse4_unpack (operands, false, false);
5046 ix86_expand_sse_unpack (operands, false, false);
5050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5056 (define_insn "sse2_uavgv16qi3"
5057 [(set (match_operand:V16QI 0 "register_operand" "=x")
5063 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5065 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5066 (const_vector:V16QI [(const_int 1) (const_int 1)
5067 (const_int 1) (const_int 1)
5068 (const_int 1) (const_int 1)
5069 (const_int 1) (const_int 1)
5070 (const_int 1) (const_int 1)
5071 (const_int 1) (const_int 1)
5072 (const_int 1) (const_int 1)
5073 (const_int 1) (const_int 1)]))
5075 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5076 "pavgb\t{%2, %0|%0, %2}"
5077 [(set_attr "type" "sseiadd")
5078 (set_attr "prefix_data16" "1")
5079 (set_attr "mode" "TI")])
5081 (define_insn "sse2_uavgv8hi3"
5082 [(set (match_operand:V8HI 0 "register_operand" "=x")
5088 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5090 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5091 (const_vector:V8HI [(const_int 1) (const_int 1)
5092 (const_int 1) (const_int 1)
5093 (const_int 1) (const_int 1)
5094 (const_int 1) (const_int 1)]))
5096 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5097 "pavgw\t{%2, %0|%0, %2}"
5098 [(set_attr "type" "sseiadd")
5099 (set_attr "prefix_data16" "1")
5100 (set_attr "mode" "TI")])
5102 ;; The correct representation for this is absolutely enormous, and
5103 ;; surely not generally useful.
5104 (define_insn "sse2_psadbw"
5105 [(set (match_operand:V2DI 0 "register_operand" "=x")
5106 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5107 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5110 "psadbw\t{%2, %0|%0, %2}"
5111 [(set_attr "type" "sseiadd")
5112 (set_attr "prefix_data16" "1")
5113 (set_attr "mode" "TI")])
5115 (define_insn "sse_movmskps"
5116 [(set (match_operand:SI 0 "register_operand" "=r")
5117 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5120 "movmskps\t{%1, %0|%0, %1}"
5121 [(set_attr "type" "ssecvt")
5122 (set_attr "mode" "V4SF")])
5124 (define_insn "sse2_movmskpd"
5125 [(set (match_operand:SI 0 "register_operand" "=r")
5126 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5129 "movmskpd\t{%1, %0|%0, %1}"
5130 [(set_attr "type" "ssecvt")
5131 (set_attr "mode" "V2DF")])
5133 (define_insn "sse2_pmovmskb"
5134 [(set (match_operand:SI 0 "register_operand" "=r")
5135 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5138 "pmovmskb\t{%1, %0|%0, %1}"
5139 [(set_attr "type" "ssecvt")
5140 (set_attr "prefix_data16" "1")
5141 (set_attr "mode" "SI")])
5143 (define_expand "sse2_maskmovdqu"
5144 [(set (match_operand:V16QI 0 "memory_operand" "")
5145 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5146 (match_operand:V16QI 2 "register_operand" "x")
5152 (define_insn "*sse2_maskmovdqu"
5153 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5154 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5155 (match_operand:V16QI 2 "register_operand" "x")
5156 (mem:V16QI (match_dup 0))]
5158 "TARGET_SSE2 && !TARGET_64BIT"
5159 ;; @@@ check ordering of operands in intel/nonintel syntax
5160 "maskmovdqu\t{%2, %1|%1, %2}"
5161 [(set_attr "type" "ssecvt")
5162 (set_attr "prefix_data16" "1")
5163 (set_attr "mode" "TI")])
5165 (define_insn "*sse2_maskmovdqu_rex64"
5166 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5167 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5168 (match_operand:V16QI 2 "register_operand" "x")
5169 (mem:V16QI (match_dup 0))]
5171 "TARGET_SSE2 && TARGET_64BIT"
5172 ;; @@@ check ordering of operands in intel/nonintel syntax
5173 "maskmovdqu\t{%2, %1|%1, %2}"
5174 [(set_attr "type" "ssecvt")
5175 (set_attr "prefix_data16" "1")
5176 (set_attr "mode" "TI")])
5178 (define_insn "sse_ldmxcsr"
5179 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5183 [(set_attr "type" "sse")
5184 (set_attr "memory" "load")])
5186 (define_insn "sse_stmxcsr"
5187 [(set (match_operand:SI 0 "memory_operand" "=m")
5188 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5191 [(set_attr "type" "sse")
5192 (set_attr "memory" "store")])
5194 (define_expand "sse_sfence"
5196 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5197 "TARGET_SSE || TARGET_3DNOW_A"
5199 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5200 MEM_VOLATILE_P (operands[0]) = 1;
5203 (define_insn "*sse_sfence"
5204 [(set (match_operand:BLK 0 "" "")
5205 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5206 "TARGET_SSE || TARGET_3DNOW_A"
5208 [(set_attr "type" "sse")
5209 (set_attr "memory" "unknown")])
5211 (define_insn "sse2_clflush"
5212 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5216 [(set_attr "type" "sse")
5217 (set_attr "memory" "unknown")])
5219 (define_expand "sse2_mfence"
5221 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5224 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5225 MEM_VOLATILE_P (operands[0]) = 1;
5228 (define_insn "*sse2_mfence"
5229 [(set (match_operand:BLK 0 "" "")
5230 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5233 [(set_attr "type" "sse")
5234 (set_attr "memory" "unknown")])
5236 (define_expand "sse2_lfence"
5238 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5241 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5242 MEM_VOLATILE_P (operands[0]) = 1;
5245 (define_insn "*sse2_lfence"
5246 [(set (match_operand:BLK 0 "" "")
5247 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5250 [(set_attr "type" "sse")
5251 (set_attr "memory" "unknown")])
5253 (define_insn "sse3_mwait"
5254 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5255 (match_operand:SI 1 "register_operand" "c")]
5258 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5259 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5260 ;; we only need to set up 32bit registers.
5262 [(set_attr "length" "3")])
5264 (define_insn "sse3_monitor"
5265 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5266 (match_operand:SI 1 "register_operand" "c")
5267 (match_operand:SI 2 "register_operand" "d")]
5269 "TARGET_SSE3 && !TARGET_64BIT"
5270 "monitor\t%0, %1, %2"
5271 [(set_attr "length" "3")])
5273 (define_insn "sse3_monitor64"
5274 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5275 (match_operand:SI 1 "register_operand" "c")
5276 (match_operand:SI 2 "register_operand" "d")]
5278 "TARGET_SSE3 && TARGET_64BIT"
5279 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5280 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5281 ;; zero extended to 64bit, we only need to set up 32bit registers.
5283 [(set_attr "length" "3")])
5286 (define_insn "ssse3_phaddwv8hi3"
5287 [(set (match_operand:V8HI 0 "register_operand" "=x")
5293 (match_operand:V8HI 1 "register_operand" "0")
5294 (parallel [(const_int 0)]))
5295 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5297 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5298 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5301 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5302 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5304 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5305 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5310 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5311 (parallel [(const_int 0)]))
5312 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5314 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5315 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5318 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5319 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5321 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5322 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5324 "phaddw\t{%2, %0|%0, %2}"
5325 [(set_attr "type" "sseiadd")
5326 (set_attr "prefix_data16" "1")
5327 (set_attr "prefix_extra" "1")
5328 (set_attr "mode" "TI")])
5330 (define_insn "ssse3_phaddwv4hi3"
5331 [(set (match_operand:V4HI 0 "register_operand" "=y")
5336 (match_operand:V4HI 1 "register_operand" "0")
5337 (parallel [(const_int 0)]))
5338 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5340 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5341 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5345 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5346 (parallel [(const_int 0)]))
5347 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5349 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5350 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5352 "phaddw\t{%2, %0|%0, %2}"
5353 [(set_attr "type" "sseiadd")
5354 (set_attr "prefix_extra" "1")
5355 (set_attr "mode" "DI")])
5357 (define_insn "ssse3_phadddv4si3"
5358 [(set (match_operand:V4SI 0 "register_operand" "=x")
5363 (match_operand:V4SI 1 "register_operand" "0")
5364 (parallel [(const_int 0)]))
5365 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5367 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5368 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5372 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5373 (parallel [(const_int 0)]))
5374 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5376 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5377 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5379 "phaddd\t{%2, %0|%0, %2}"
5380 [(set_attr "type" "sseiadd")
5381 (set_attr "prefix_data16" "1")
5382 (set_attr "prefix_extra" "1")
5383 (set_attr "mode" "TI")])
5385 (define_insn "ssse3_phadddv2si3"
5386 [(set (match_operand:V2SI 0 "register_operand" "=y")
5390 (match_operand:V2SI 1 "register_operand" "0")
5391 (parallel [(const_int 0)]))
5392 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5395 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5396 (parallel [(const_int 0)]))
5397 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5399 "phaddd\t{%2, %0|%0, %2}"
5400 [(set_attr "type" "sseiadd")
5401 (set_attr "prefix_extra" "1")
5402 (set_attr "mode" "DI")])
5404 (define_insn "ssse3_phaddswv8hi3"
5405 [(set (match_operand:V8HI 0 "register_operand" "=x")
5411 (match_operand:V8HI 1 "register_operand" "0")
5412 (parallel [(const_int 0)]))
5413 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5415 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5416 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5419 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5420 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5422 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5423 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5428 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5429 (parallel [(const_int 0)]))
5430 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5432 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5433 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5436 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5437 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5439 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5440 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5442 "phaddsw\t{%2, %0|%0, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix_data16" "1")
5445 (set_attr "prefix_extra" "1")
5446 (set_attr "mode" "TI")])
5448 (define_insn "ssse3_phaddswv4hi3"
5449 [(set (match_operand:V4HI 0 "register_operand" "=y")
5454 (match_operand:V4HI 1 "register_operand" "0")
5455 (parallel [(const_int 0)]))
5456 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5458 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5459 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5463 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5464 (parallel [(const_int 0)]))
5465 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5467 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5468 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5470 "phaddsw\t{%2, %0|%0, %2}"
5471 [(set_attr "type" "sseiadd")
5472 (set_attr "prefix_extra" "1")
5473 (set_attr "mode" "DI")])
5475 (define_insn "ssse3_phsubwv8hi3"
5476 [(set (match_operand:V8HI 0 "register_operand" "=x")
5482 (match_operand:V8HI 1 "register_operand" "0")
5483 (parallel [(const_int 0)]))
5484 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5486 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5487 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5490 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5491 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5493 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5494 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5499 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5500 (parallel [(const_int 0)]))
5501 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5503 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5504 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5507 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5508 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5510 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5511 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5513 "phsubw\t{%2, %0|%0, %2}"
5514 [(set_attr "type" "sseiadd")
5515 (set_attr "prefix_data16" "1")
5516 (set_attr "prefix_extra" "1")
5517 (set_attr "mode" "TI")])
5519 (define_insn "ssse3_phsubwv4hi3"
5520 [(set (match_operand:V4HI 0 "register_operand" "=y")
5525 (match_operand:V4HI 1 "register_operand" "0")
5526 (parallel [(const_int 0)]))
5527 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5529 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5530 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5534 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5535 (parallel [(const_int 0)]))
5536 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5538 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5539 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5541 "phsubw\t{%2, %0|%0, %2}"
5542 [(set_attr "type" "sseiadd")
5543 (set_attr "prefix_extra" "1")
5544 (set_attr "mode" "DI")])
5546 (define_insn "ssse3_phsubdv4si3"
5547 [(set (match_operand:V4SI 0 "register_operand" "=x")
5552 (match_operand:V4SI 1 "register_operand" "0")
5553 (parallel [(const_int 0)]))
5554 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5556 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5557 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5561 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5562 (parallel [(const_int 0)]))
5563 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5565 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5566 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5568 "phsubd\t{%2, %0|%0, %2}"
5569 [(set_attr "type" "sseiadd")
5570 (set_attr "prefix_data16" "1")
5571 (set_attr "prefix_extra" "1")
5572 (set_attr "mode" "TI")])
5574 (define_insn "ssse3_phsubdv2si3"
5575 [(set (match_operand:V2SI 0 "register_operand" "=y")
5579 (match_operand:V2SI 1 "register_operand" "0")
5580 (parallel [(const_int 0)]))
5581 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5584 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5585 (parallel [(const_int 0)]))
5586 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5588 "phsubd\t{%2, %0|%0, %2}"
5589 [(set_attr "type" "sseiadd")
5590 (set_attr "prefix_extra" "1")
5591 (set_attr "mode" "DI")])
5593 (define_insn "ssse3_phsubswv8hi3"
5594 [(set (match_operand:V8HI 0 "register_operand" "=x")
5600 (match_operand:V8HI 1 "register_operand" "0")
5601 (parallel [(const_int 0)]))
5602 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5604 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5605 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5608 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5609 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5611 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5612 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5617 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5618 (parallel [(const_int 0)]))
5619 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5621 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5622 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5625 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5626 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5628 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5629 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5631 "phsubsw\t{%2, %0|%0, %2}"
5632 [(set_attr "type" "sseiadd")
5633 (set_attr "prefix_data16" "1")
5634 (set_attr "prefix_extra" "1")
5635 (set_attr "mode" "TI")])
5637 (define_insn "ssse3_phsubswv4hi3"
5638 [(set (match_operand:V4HI 0 "register_operand" "=y")
5643 (match_operand:V4HI 1 "register_operand" "0")
5644 (parallel [(const_int 0)]))
5645 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5647 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5648 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5652 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5653 (parallel [(const_int 0)]))
5654 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5656 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5657 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5659 "phsubsw\t{%2, %0|%0, %2}"
5660 [(set_attr "type" "sseiadd")
5661 (set_attr "prefix_extra" "1")
5662 (set_attr "mode" "DI")])
5664 (define_insn "ssse3_pmaddubswv8hi3"
5665 [(set (match_operand:V8HI 0 "register_operand" "=x")
5670 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5671 (parallel [(const_int 0)
5681 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5682 (parallel [(const_int 0)
5692 (vec_select:V16QI (match_dup 1)
5693 (parallel [(const_int 1)
5702 (vec_select:V16QI (match_dup 2)
5703 (parallel [(const_int 1)
5710 (const_int 15)]))))))]
5712 "pmaddubsw\t{%2, %0|%0, %2}"
5713 [(set_attr "type" "sseiadd")
5714 (set_attr "prefix_data16" "1")
5715 (set_attr "prefix_extra" "1")
5716 (set_attr "mode" "TI")])
5718 (define_insn "ssse3_pmaddubswv4hi3"
5719 [(set (match_operand:V4HI 0 "register_operand" "=y")
5724 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5725 (parallel [(const_int 0)
5731 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5732 (parallel [(const_int 0)
5738 (vec_select:V8QI (match_dup 1)
5739 (parallel [(const_int 1)
5744 (vec_select:V8QI (match_dup 2)
5745 (parallel [(const_int 1)
5748 (const_int 7)]))))))]
5750 "pmaddubsw\t{%2, %0|%0, %2}"
5751 [(set_attr "type" "sseiadd")
5752 (set_attr "prefix_extra" "1")
5753 (set_attr "mode" "DI")])
5755 (define_insn "ssse3_pmulhrswv8hi3"
5756 [(set (match_operand:V8HI 0 "register_operand" "=x")
5763 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5765 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5767 (const_vector:V8HI [(const_int 1) (const_int 1)
5768 (const_int 1) (const_int 1)
5769 (const_int 1) (const_int 1)
5770 (const_int 1) (const_int 1)]))
5772 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5773 "pmulhrsw\t{%2, %0|%0, %2}"
5774 [(set_attr "type" "sseimul")
5775 (set_attr "prefix_data16" "1")
5776 (set_attr "prefix_extra" "1")
5777 (set_attr "mode" "TI")])
5779 (define_insn "ssse3_pmulhrswv4hi3"
5780 [(set (match_operand:V4HI 0 "register_operand" "=y")
5787 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5789 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5791 (const_vector:V4HI [(const_int 1) (const_int 1)
5792 (const_int 1) (const_int 1)]))
5794 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5795 "pmulhrsw\t{%2, %0|%0, %2}"
5796 [(set_attr "type" "sseimul")
5797 (set_attr "prefix_extra" "1")
5798 (set_attr "mode" "DI")])
5800 (define_insn "ssse3_pshufbv16qi3"
5801 [(set (match_operand:V16QI 0 "register_operand" "=x")
5802 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5803 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5806 "pshufb\t{%2, %0|%0, %2}";
5807 [(set_attr "type" "sselog1")
5808 (set_attr "prefix_data16" "1")
5809 (set_attr "prefix_extra" "1")
5810 (set_attr "mode" "TI")])
5812 (define_insn "ssse3_pshufbv8qi3"
5813 [(set (match_operand:V8QI 0 "register_operand" "=y")
5814 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5815 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5818 "pshufb\t{%2, %0|%0, %2}";
5819 [(set_attr "type" "sselog1")
5820 (set_attr "prefix_extra" "1")
5821 (set_attr "mode" "DI")])
5823 (define_insn "ssse3_psign<mode>3"
5824 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5825 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5826 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5829 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5830 [(set_attr "type" "sselog1")
5831 (set_attr "prefix_data16" "1")
5832 (set_attr "prefix_extra" "1")
5833 (set_attr "mode" "TI")])
5835 (define_insn "ssse3_psign<mode>3"
5836 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5837 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5838 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5841 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5842 [(set_attr "type" "sselog1")
5843 (set_attr "prefix_extra" "1")
5844 (set_attr "mode" "DI")])
5846 (define_insn "ssse3_palignrti"
5847 [(set (match_operand:TI 0 "register_operand" "=x")
5848 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5849 (match_operand:TI 2 "nonimmediate_operand" "xm")
5850 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5854 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5855 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5857 [(set_attr "type" "sseishft")
5858 (set_attr "prefix_data16" "1")
5859 (set_attr "prefix_extra" "1")
5860 (set_attr "mode" "TI")])
5862 (define_insn "ssse3_palignrdi"
5863 [(set (match_operand:DI 0 "register_operand" "=y")
5864 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5865 (match_operand:DI 2 "nonimmediate_operand" "ym")
5866 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5870 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5871 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5873 [(set_attr "type" "sseishft")
5874 (set_attr "prefix_extra" "1")
5875 (set_attr "mode" "DI")])
5877 (define_insn "abs<mode>2"
5878 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5879 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5881 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5882 [(set_attr "type" "sselog1")
5883 (set_attr "prefix_data16" "1")
5884 (set_attr "prefix_extra" "1")
5885 (set_attr "mode" "TI")])
5887 (define_insn "abs<mode>2"
5888 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5889 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5891 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5892 [(set_attr "type" "sselog1")
5893 (set_attr "prefix_extra" "1")
5894 (set_attr "mode" "DI")])
5896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5898 ;; AMD SSE4A instructions
5900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5902 (define_insn "sse4a_vmmovntv2df"
5903 [(set (match_operand:DF 0 "memory_operand" "=m")
5904 (unspec:DF [(vec_select:DF
5905 (match_operand:V2DF 1 "register_operand" "x")
5906 (parallel [(const_int 0)]))]
5909 "movntsd\t{%1, %0|%0, %1}"
5910 [(set_attr "type" "ssemov")
5911 (set_attr "mode" "DF")])
5913 (define_insn "sse4a_movntdf"
5914 [(set (match_operand:DF 0 "memory_operand" "=m")
5915 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5918 "movntsd\t{%1, %0|%0, %1}"
5919 [(set_attr "type" "ssemov")
5920 (set_attr "mode" "DF")])
5922 (define_insn "sse4a_vmmovntv4sf"
5923 [(set (match_operand:SF 0 "memory_operand" "=m")
5924 (unspec:SF [(vec_select:SF
5925 (match_operand:V4SF 1 "register_operand" "x")
5926 (parallel [(const_int 0)]))]
5929 "movntss\t{%1, %0|%0, %1}"
5930 [(set_attr "type" "ssemov")
5931 (set_attr "mode" "SF")])
5933 (define_insn "sse4a_movntsf"
5934 [(set (match_operand:SF 0 "memory_operand" "=m")
5935 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5938 "movntss\t{%1, %0|%0, %1}"
5939 [(set_attr "type" "ssemov")
5940 (set_attr "mode" "SF")])
5942 (define_insn "sse4a_extrqi"
5943 [(set (match_operand:V2DI 0 "register_operand" "=x")
5944 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5945 (match_operand 2 "const_int_operand" "")
5946 (match_operand 3 "const_int_operand" "")]
5949 "extrq\t{%3, %2, %0|%0, %2, %3}"
5950 [(set_attr "type" "sse")
5951 (set_attr "prefix_data16" "1")
5952 (set_attr "mode" "TI")])
5954 (define_insn "sse4a_extrq"
5955 [(set (match_operand:V2DI 0 "register_operand" "=x")
5956 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5957 (match_operand:V16QI 2 "register_operand" "x")]
5960 "extrq\t{%2, %0|%0, %2}"
5961 [(set_attr "type" "sse")
5962 (set_attr "prefix_data16" "1")
5963 (set_attr "mode" "TI")])
5965 (define_insn "sse4a_insertqi"
5966 [(set (match_operand:V2DI 0 "register_operand" "=x")
5967 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5968 (match_operand:V2DI 2 "register_operand" "x")
5969 (match_operand 3 "const_int_operand" "")
5970 (match_operand 4 "const_int_operand" "")]
5973 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5974 [(set_attr "type" "sseins")
5975 (set_attr "prefix_rep" "1")
5976 (set_attr "mode" "TI")])
5978 (define_insn "sse4a_insertq"
5979 [(set (match_operand:V2DI 0 "register_operand" "=x")
5980 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5981 (match_operand:V2DI 2 "register_operand" "x")]
5984 "insertq\t{%2, %0|%0, %2}"
5985 [(set_attr "type" "sseins")
5986 (set_attr "prefix_rep" "1")
5987 (set_attr "mode" "TI")])
5989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5991 ;; Intel SSE4.1 instructions
5993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5995 (define_insn "sse4_1_blendpd"
5996 [(set (match_operand:V2DF 0 "register_operand" "=x")
5998 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5999 (match_operand:V2DF 1 "register_operand" "0")
6000 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6002 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6003 [(set_attr "type" "ssemov")
6004 (set_attr "prefix_extra" "1")
6005 (set_attr "mode" "V2DF")])
6007 (define_insn "sse4_1_blendps"
6008 [(set (match_operand:V4SF 0 "register_operand" "=x")
6010 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6011 (match_operand:V4SF 1 "register_operand" "0")
6012 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6014 "blendps\t{%3, %2, %0|%0, %2, %3}"
6015 [(set_attr "type" "ssemov")
6016 (set_attr "prefix_extra" "1")
6017 (set_attr "mode" "V4SF")])
6019 (define_insn "sse4_1_blendvpd"
6020 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6021 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6022 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6023 (match_operand:V2DF 3 "register_operand" "Y0")]
6026 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6027 [(set_attr "type" "ssemov")
6028 (set_attr "prefix_extra" "1")
6029 (set_attr "mode" "V2DF")])
6031 (define_insn "sse4_1_blendvps"
6032 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6033 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6034 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6035 (match_operand:V4SF 3 "register_operand" "Y0")]
6038 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6039 [(set_attr "type" "ssemov")
6040 (set_attr "prefix_extra" "1")
6041 (set_attr "mode" "V4SF")])
6043 (define_insn "sse4_1_dppd"
6044 [(set (match_operand:V2DF 0 "register_operand" "=x")
6045 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6046 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6047 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6050 "dppd\t{%3, %2, %0|%0, %2, %3}"
6051 [(set_attr "type" "ssemul")
6052 (set_attr "prefix_extra" "1")
6053 (set_attr "mode" "V2DF")])
6055 (define_insn "sse4_1_dpps"
6056 [(set (match_operand:V4SF 0 "register_operand" "=x")
6057 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6058 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6059 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6062 "dpps\t{%3, %2, %0|%0, %2, %3}"
6063 [(set_attr "type" "ssemul")
6064 (set_attr "prefix_extra" "1")
6065 (set_attr "mode" "V4SF")])
6067 (define_insn "sse4_1_movntdqa"
6068 [(set (match_operand:V2DI 0 "register_operand" "=x")
6069 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6072 "movntdqa\t{%1, %0|%0, %1}"
6073 [(set_attr "type" "ssecvt")
6074 (set_attr "prefix_extra" "1")
6075 (set_attr "mode" "TI")])
6077 (define_insn "sse4_1_mpsadbw"
6078 [(set (match_operand:V16QI 0 "register_operand" "=x")
6079 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6080 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6081 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6084 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6085 [(set_attr "type" "sselog1")
6086 (set_attr "prefix_extra" "1")
6087 (set_attr "mode" "TI")])
6089 (define_insn "sse4_1_packusdw"
6090 [(set (match_operand:V8HI 0 "register_operand" "=x")
6093 (match_operand:V4SI 1 "register_operand" "0"))
6095 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6097 "packusdw\t{%2, %0|%0, %2}"
6098 [(set_attr "type" "sselog")
6099 (set_attr "prefix_extra" "1")
6100 (set_attr "mode" "TI")])
6102 (define_insn "sse4_1_pblendvb"
6103 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6104 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6105 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6106 (match_operand:V16QI 3 "register_operand" "Y0")]
6109 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6110 [(set_attr "type" "ssemov")
6111 (set_attr "prefix_extra" "1")
6112 (set_attr "mode" "TI")])
6114 (define_insn "sse4_1_pblendw"
6115 [(set (match_operand:V8HI 0 "register_operand" "=x")
6117 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6118 (match_operand:V8HI 1 "register_operand" "0")
6119 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6121 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6122 [(set_attr "type" "ssemov")
6123 (set_attr "prefix_extra" "1")
6124 (set_attr "mode" "TI")])
6126 (define_insn "sse4_1_phminposuw"
6127 [(set (match_operand:V8HI 0 "register_operand" "=x")
6128 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6129 UNSPEC_PHMINPOSUW))]
6131 "phminposuw\t{%1, %0|%0, %1}"
6132 [(set_attr "type" "sselog1")
6133 (set_attr "prefix_extra" "1")
6134 (set_attr "mode" "TI")])
6136 (define_insn "sse4_1_extendv8qiv8hi2"
6137 [(set (match_operand:V8HI 0 "register_operand" "=x")
6140 (match_operand:V16QI 1 "register_operand" "x")
6141 (parallel [(const_int 0)
6150 "pmovsxbw\t{%1, %0|%0, %1}"
6151 [(set_attr "type" "ssemov")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "mode" "TI")])
6155 (define_insn "*sse4_1_extendv8qiv8hi2"
6156 [(set (match_operand:V8HI 0 "register_operand" "=x")
6159 (vec_duplicate:V16QI
6160 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6161 (parallel [(const_int 0)
6170 "pmovsxbw\t{%1, %0|%0, %1}"
6171 [(set_attr "type" "ssemov")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "mode" "TI")])
6175 (define_insn "sse4_1_extendv4qiv4si2"
6176 [(set (match_operand:V4SI 0 "register_operand" "=x")
6179 (match_operand:V16QI 1 "register_operand" "x")
6180 (parallel [(const_int 0)
6185 "pmovsxbd\t{%1, %0|%0, %1}"
6186 [(set_attr "type" "ssemov")
6187 (set_attr "prefix_extra" "1")
6188 (set_attr "mode" "TI")])
6190 (define_insn "*sse4_1_extendv4qiv4si2"
6191 [(set (match_operand:V4SI 0 "register_operand" "=x")
6194 (vec_duplicate:V16QI
6195 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6196 (parallel [(const_int 0)
6201 "pmovsxbd\t{%1, %0|%0, %1}"
6202 [(set_attr "type" "ssemov")
6203 (set_attr "prefix_extra" "1")
6204 (set_attr "mode" "TI")])
6206 (define_insn "sse4_1_extendv2qiv2di2"
6207 [(set (match_operand:V2DI 0 "register_operand" "=x")
6210 (match_operand:V16QI 1 "register_operand" "x")
6211 (parallel [(const_int 0)
6214 "pmovsxbq\t{%1, %0|%0, %1}"
6215 [(set_attr "type" "ssemov")
6216 (set_attr "prefix_extra" "1")
6217 (set_attr "mode" "TI")])
6219 (define_insn "*sse4_1_extendv2qiv2di2"
6220 [(set (match_operand:V2DI 0 "register_operand" "=x")
6223 (vec_duplicate:V16QI
6224 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6225 (parallel [(const_int 0)
6228 "pmovsxbq\t{%1, %0|%0, %1}"
6229 [(set_attr "type" "ssemov")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "mode" "TI")])
6233 (define_insn "sse4_1_extendv4hiv4si2"
6234 [(set (match_operand:V4SI 0 "register_operand" "=x")
6237 (match_operand:V8HI 1 "register_operand" "x")
6238 (parallel [(const_int 0)
6243 "pmovsxwd\t{%1, %0|%0, %1}"
6244 [(set_attr "type" "ssemov")
6245 (set_attr "prefix_extra" "1")
6246 (set_attr "mode" "TI")])
6248 (define_insn "*sse4_1_extendv4hiv4si2"
6249 [(set (match_operand:V4SI 0 "register_operand" "=x")
6253 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6254 (parallel [(const_int 0)
6259 "pmovsxwd\t{%1, %0|%0, %1}"
6260 [(set_attr "type" "ssemov")
6261 (set_attr "prefix_extra" "1")
6262 (set_attr "mode" "TI")])
6264 (define_insn "sse4_1_extendv2hiv2di2"
6265 [(set (match_operand:V2DI 0 "register_operand" "=x")
6268 (match_operand:V8HI 1 "register_operand" "x")
6269 (parallel [(const_int 0)
6272 "pmovsxwq\t{%1, %0|%0, %1}"
6273 [(set_attr "type" "ssemov")
6274 (set_attr "prefix_extra" "1")
6275 (set_attr "mode" "TI")])
6277 (define_insn "*sse4_1_extendv2hiv2di2"
6278 [(set (match_operand:V2DI 0 "register_operand" "=x")
6282 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6283 (parallel [(const_int 0)
6286 "pmovsxwq\t{%1, %0|%0, %1}"
6287 [(set_attr "type" "ssemov")
6288 (set_attr "prefix_extra" "1")
6289 (set_attr "mode" "TI")])
6291 (define_insn "sse4_1_extendv2siv2di2"
6292 [(set (match_operand:V2DI 0 "register_operand" "=x")
6295 (match_operand:V4SI 1 "register_operand" "x")
6296 (parallel [(const_int 0)
6299 "pmovsxdq\t{%1, %0|%0, %1}"
6300 [(set_attr "type" "ssemov")
6301 (set_attr "prefix_extra" "1")
6302 (set_attr "mode" "TI")])
6304 (define_insn "*sse4_1_extendv2siv2di2"
6305 [(set (match_operand:V2DI 0 "register_operand" "=x")
6309 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6310 (parallel [(const_int 0)
6313 "pmovsxdq\t{%1, %0|%0, %1}"
6314 [(set_attr "type" "ssemov")
6315 (set_attr "prefix_extra" "1")
6316 (set_attr "mode" "TI")])
6318 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6319 [(set (match_operand:V8HI 0 "register_operand" "=x")
6322 (match_operand:V16QI 1 "register_operand" "x")
6323 (parallel [(const_int 0)
6332 "pmovzxbw\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssemov")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6337 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6338 [(set (match_operand:V8HI 0 "register_operand" "=x")
6341 (vec_duplicate:V16QI
6342 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6343 (parallel [(const_int 0)
6352 "pmovzxbw\t{%1, %0|%0, %1}"
6353 [(set_attr "type" "ssemov")
6354 (set_attr "prefix_extra" "1")
6355 (set_attr "mode" "TI")])
6357 (define_insn "sse4_1_zero_extendv4qiv4si2"
6358 [(set (match_operand:V4SI 0 "register_operand" "=x")
6361 (match_operand:V16QI 1 "register_operand" "x")
6362 (parallel [(const_int 0)
6367 "pmovzxbd\t{%1, %0|%0, %1}"
6368 [(set_attr "type" "ssemov")
6369 (set_attr "prefix_extra" "1")
6370 (set_attr "mode" "TI")])
6372 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6373 [(set (match_operand:V4SI 0 "register_operand" "=x")
6376 (vec_duplicate:V16QI
6377 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6378 (parallel [(const_int 0)
6383 "pmovzxbd\t{%1, %0|%0, %1}"
6384 [(set_attr "type" "ssemov")
6385 (set_attr "prefix_extra" "1")
6386 (set_attr "mode" "TI")])
6388 (define_insn "sse4_1_zero_extendv2qiv2di2"
6389 [(set (match_operand:V2DI 0 "register_operand" "=x")
6392 (match_operand:V16QI 1 "register_operand" "x")
6393 (parallel [(const_int 0)
6396 "pmovzxbq\t{%1, %0|%0, %1}"
6397 [(set_attr "type" "ssemov")
6398 (set_attr "prefix_extra" "1")
6399 (set_attr "mode" "TI")])
6401 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6402 [(set (match_operand:V2DI 0 "register_operand" "=x")
6405 (vec_duplicate:V16QI
6406 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6407 (parallel [(const_int 0)
6410 "pmovzxbq\t{%1, %0|%0, %1}"
6411 [(set_attr "type" "ssemov")
6412 (set_attr "prefix_extra" "1")
6413 (set_attr "mode" "TI")])
6415 (define_insn "sse4_1_zero_extendv4hiv4si2"
6416 [(set (match_operand:V4SI 0 "register_operand" "=x")
6419 (match_operand:V8HI 1 "register_operand" "x")
6420 (parallel [(const_int 0)
6425 "pmovzxwd\t{%1, %0|%0, %1}"
6426 [(set_attr "type" "ssemov")
6427 (set_attr "prefix_extra" "1")
6428 (set_attr "mode" "TI")])
6430 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6431 [(set (match_operand:V4SI 0 "register_operand" "=x")
6435 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6436 (parallel [(const_int 0)
6441 "pmovzxwd\t{%1, %0|%0, %1}"
6442 [(set_attr "type" "ssemov")
6443 (set_attr "prefix_extra" "1")
6444 (set_attr "mode" "TI")])
6446 (define_insn "sse4_1_zero_extendv2hiv2di2"
6447 [(set (match_operand:V2DI 0 "register_operand" "=x")
6450 (match_operand:V8HI 1 "register_operand" "x")
6451 (parallel [(const_int 0)
6454 "pmovzxwq\t{%1, %0|%0, %1}"
6455 [(set_attr "type" "ssemov")
6456 (set_attr "prefix_extra" "1")
6457 (set_attr "mode" "TI")])
6459 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6460 [(set (match_operand:V2DI 0 "register_operand" "=x")
6464 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6465 (parallel [(const_int 0)
6468 "pmovzxwq\t{%1, %0|%0, %1}"
6469 [(set_attr "type" "ssemov")
6470 (set_attr "prefix_extra" "1")
6471 (set_attr "mode" "TI")])
6473 (define_insn "sse4_1_zero_extendv2siv2di2"
6474 [(set (match_operand:V2DI 0 "register_operand" "=x")
6477 (match_operand:V4SI 1 "register_operand" "x")
6478 (parallel [(const_int 0)
6481 "pmovzxdq\t{%1, %0|%0, %1}"
6482 [(set_attr "type" "ssemov")
6483 (set_attr "prefix_extra" "1")
6484 (set_attr "mode" "TI")])
6486 (define_insn "*sse4_1_zero_extendv2siv2di2"
6487 [(set (match_operand:V2DI 0 "register_operand" "=x")
6491 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6492 (parallel [(const_int 0)
6495 "pmovzxdq\t{%1, %0|%0, %1}"
6496 [(set_attr "type" "ssemov")
6497 (set_attr "prefix_extra" "1")
6498 (set_attr "mode" "TI")])
6500 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6501 ;; But it is not a really compare instruction.
6502 (define_insn "sse4_1_ptest"
6503 [(set (reg:CC FLAGS_REG)
6504 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6505 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6508 "ptest\t{%1, %0|%0, %1}"
6509 [(set_attr "type" "ssecomi")
6510 (set_attr "prefix_extra" "1")
6511 (set_attr "mode" "TI")])
6513 (define_insn "sse4_1_roundpd"
6514 [(set (match_operand:V2DF 0 "register_operand" "=x")
6515 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6516 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6519 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6520 [(set_attr "type" "ssecvt")
6521 (set_attr "prefix_extra" "1")
6522 (set_attr "mode" "V2DF")])
6524 (define_insn "sse4_1_roundps"
6525 [(set (match_operand:V4SF 0 "register_operand" "=x")
6526 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6527 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6530 "roundps\t{%2, %1, %0|%0, %1, %2}"
6531 [(set_attr "type" "ssecvt")
6532 (set_attr "prefix_extra" "1")
6533 (set_attr "mode" "V4SF")])
6535 (define_insn "sse4_1_roundsd"
6536 [(set (match_operand:V2DF 0 "register_operand" "=x")
6538 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6539 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6541 (match_operand:V2DF 1 "register_operand" "0")
6544 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6545 [(set_attr "type" "ssecvt")
6546 (set_attr "prefix_extra" "1")
6547 (set_attr "mode" "V2DF")])
6549 (define_insn "sse4_1_roundss"
6550 [(set (match_operand:V4SF 0 "register_operand" "=x")
6552 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6553 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6555 (match_operand:V4SF 1 "register_operand" "0")
6558 "roundss\t{%3, %2, %0|%0, %2, %3}"
6559 [(set_attr "type" "ssecvt")
6560 (set_attr "prefix_extra" "1")
6561 (set_attr "mode" "V4SF")])
6563 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6565 ;; Intel SSE4.2 string/text processing instructions
6567 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6569 (define_insn_and_split "sse4_2_pcmpestr"
6570 [(set (match_operand:SI 0 "register_operand" "=c,c")
6572 [(match_operand:V16QI 2 "register_operand" "x,x")
6573 (match_operand:SI 3 "register_operand" "a,a")
6574 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6575 (match_operand:SI 5 "register_operand" "d,d")
6576 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6578 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6586 (set (reg:CC FLAGS_REG)
6595 && !(reload_completed || reload_in_progress)"
6600 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6601 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6602 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6605 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6606 operands[3], operands[4],
6607 operands[5], operands[6]));
6609 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6610 operands[3], operands[4],
6611 operands[5], operands[6]));
6612 if (flags && !(ecx || xmm0))
6613 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6614 operands[4], operands[5],
6618 [(set_attr "type" "sselog")
6619 (set_attr "prefix_data16" "1")
6620 (set_attr "prefix_extra" "1")
6621 (set_attr "memory" "none,load")
6622 (set_attr "mode" "TI")])
6624 (define_insn "sse4_2_pcmpestri"
6625 [(set (match_operand:SI 0 "register_operand" "=c,c")
6627 [(match_operand:V16QI 1 "register_operand" "x,x")
6628 (match_operand:SI 2 "register_operand" "a,a")
6629 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6630 (match_operand:SI 4 "register_operand" "d,d")
6631 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6633 (set (reg:CC FLAGS_REG)
6642 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6643 [(set_attr "type" "sselog")
6644 (set_attr "prefix_data16" "1")
6645 (set_attr "prefix_extra" "1")
6646 (set_attr "memory" "none,load")
6647 (set_attr "mode" "TI")])
6649 (define_insn "sse4_2_pcmpestrm"
6650 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6652 [(match_operand:V16QI 1 "register_operand" "x,x")
6653 (match_operand:SI 2 "register_operand" "a,a")
6654 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6655 (match_operand:SI 4 "register_operand" "d,d")
6656 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6658 (set (reg:CC FLAGS_REG)
6667 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6668 [(set_attr "type" "sselog")
6669 (set_attr "prefix_data16" "1")
6670 (set_attr "prefix_extra" "1")
6671 (set_attr "memory" "none,load")
6672 (set_attr "mode" "TI")])
6674 (define_insn "sse4_2_pcmpestr_cconly"
6675 [(set (reg:CC FLAGS_REG)
6677 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6678 (match_operand:SI 1 "register_operand" "a,a,a,a")
6679 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6680 (match_operand:SI 3 "register_operand" "d,d,d,d")
6681 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6683 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6684 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6687 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6688 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6689 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6690 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6691 [(set_attr "type" "sselog")
6692 (set_attr "prefix_data16" "1")
6693 (set_attr "prefix_extra" "1")
6694 (set_attr "memory" "none,load,none,load")
6695 (set_attr "mode" "TI")])
6697 (define_insn_and_split "sse4_2_pcmpistr"
6698 [(set (match_operand:SI 0 "register_operand" "=c,c")
6700 [(match_operand:V16QI 2 "register_operand" "x,x")
6701 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6702 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6704 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6710 (set (reg:CC FLAGS_REG)
6717 && !(reload_completed || reload_in_progress)"
6722 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6723 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6724 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6727 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6728 operands[3], operands[4]));
6730 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6731 operands[3], operands[4]));
6732 if (flags && !(ecx || xmm0))
6733 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6737 [(set_attr "type" "sselog")
6738 (set_attr "prefix_data16" "1")
6739 (set_attr "prefix_extra" "1")
6740 (set_attr "memory" "none,load")
6741 (set_attr "mode" "TI")])
6743 (define_insn "sse4_2_pcmpistri"
6744 [(set (match_operand:SI 0 "register_operand" "=c,c")
6746 [(match_operand:V16QI 1 "register_operand" "x,x")
6747 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6748 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6750 (set (reg:CC FLAGS_REG)
6757 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6758 [(set_attr "type" "sselog")
6759 (set_attr "prefix_data16" "1")
6760 (set_attr "prefix_extra" "1")
6761 (set_attr "memory" "none,load")
6762 (set_attr "mode" "TI")])
6764 (define_insn "sse4_2_pcmpistrm"
6765 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6767 [(match_operand:V16QI 1 "register_operand" "x,x")
6768 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6769 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6771 (set (reg:CC FLAGS_REG)
6778 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6779 [(set_attr "type" "sselog")
6780 (set_attr "prefix_data16" "1")
6781 (set_attr "prefix_extra" "1")
6782 (set_attr "memory" "none,load")
6783 (set_attr "mode" "TI")])
6785 (define_insn "sse4_2_pcmpistr_cconly"
6786 [(set (reg:CC FLAGS_REG)
6788 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6789 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6790 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6792 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6793 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6796 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6797 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6798 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6799 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6800 [(set_attr "type" "sselog")
6801 (set_attr "prefix_data16" "1")
6802 (set_attr "prefix_extra" "1")
6803 (set_attr "memory" "none,load,none,load")
6804 (set_attr "mode" "TI")])