1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; All vector float modes
23 (define_mode_iterator VF
24 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")
25 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")])
27 ;; All SFmode vector float modes
28 (define_mode_iterator VF1
29 [(V4SF "TARGET_SSE") (V8SF "TARGET_AVX")])
31 ;; All DFmode vector float modes
32 (define_mode_iterator VF2
33 [(V2DF "TARGET_SSE2") (V4DF "TARGET_AVX")])
35 ;; All 128bit vector float modes
36 (define_mode_iterator VF_128
37 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")])
39 ;; All 128bit vector integer modes
40 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
42 ;; Random 128bit vector integer mode combinations
43 (define_mode_iterator VI12_128 [V16QI V8HI])
44 (define_mode_iterator VI14_128 [V16QI V4SI])
45 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
46 (define_mode_iterator VI24_128 [V8HI V4SI])
47 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
50 ;; Instruction suffix for sign and zero extensions.
51 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
53 ;; 16 byte integral modes handled by SSE
54 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
56 ;; All 16-byte vector modes handled by SSE
57 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
58 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
60 ;; 32 byte integral vector modes handled by AVX
61 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
63 ;; All 32-byte vector modes handled by AVX
64 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
66 ;; All QI vector modes handled by AVX
67 (define_mode_iterator AVXMODEQI [V32QI V16QI])
69 ;; All DI vector modes handled by AVX
70 (define_mode_iterator AVXMODEDI [V4DI V2DI])
72 ;; All vector modes handled by AVX
73 (define_mode_iterator AVXMODE
74 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
75 (define_mode_iterator AVXMODE16
76 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
79 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
80 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
81 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
83 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
84 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
85 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
86 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
87 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
88 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
89 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
90 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
91 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
93 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
95 ;; Int-float size matches
96 (define_mode_iterator SSEMODE4S [V4SF V4SI])
97 (define_mode_iterator SSEMODE2D [V2DF V2DI])
99 ;; Modes handled by vec_extract_even/odd pattern.
100 (define_mode_iterator SSEMODE_EO
103 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
104 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
105 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
107 ;; Modes handled by storent patterns.
108 (define_mode_iterator STORENT_MODE
109 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
110 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
112 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
114 ;; Modes handled by vector extract patterns.
115 (define_mode_iterator VEC_EXTRACT_MODE
116 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
117 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
118 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
119 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
121 ;; Mapping from float mode to required SSE level
122 (define_mode_attr sse
123 [(SF "sse") (DF "sse2")
124 (V4SF "sse") (V2DF "sse2")
125 (V8SF "avx") (V4DF "avx")])
127 ;; Mapping from integer vector mode to mnemonic suffix
128 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
130 ;; Mapping of the insn mnemonic suffix
131 (define_mode_attr ssemodesuffix
132 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
133 (V8SI "ps") (V4DI "pd")])
134 (define_mode_attr ssescalarmodesuffix
135 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
136 (V4DF "sd") (V4SI "d") (V4DI "sd")])
138 ;; Mapping of the max integer size for xop rotate immediate constraint
139 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
141 ;; Mapping of vector modes back to the scalar modes
142 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
143 (V16QI "QI") (V8HI "HI")
144 (V4SI "SI") (V2DI "DI")])
146 ;; Mapping of vector modes to a vector mode of double size
147 (define_mode_attr ssedoublesizemode
148 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
149 (V8HI "V16HI") (V16QI "V32QI")
150 (V4DF "V8DF") (V8SF "V16SF")
151 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
153 ;; Number of scalar elements in each vector type
154 (define_mode_attr ssescalarnum
155 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
156 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
159 (define_mode_attr avxvecmode
160 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
161 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
162 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
163 (define_mode_attr avxvecpsmode
164 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
165 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
166 (define_mode_attr avxhalfvecmode
167 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
168 (V8SF "V4SF") (V4DF "V2DF")
169 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
170 (define_mode_attr avxscalarmode
171 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
172 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
173 (define_mode_attr avxpermvecmode
174 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
175 (define_mode_attr avxmodesuffixp
176 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
178 (define_mode_attr avxmodesuffix
179 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
180 (V8SI "256") (V8SF "256") (V4DF "256")])
182 ;; Mapping of immediate bits for blend instructions
183 (define_mode_attr blendbits
184 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
186 ;; Mapping of immediate bits for pinsr instructions
187 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
189 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
197 (define_expand "mov<mode>"
198 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
199 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
202 ix86_expand_vector_move (<MODE>mode, operands);
206 (define_insn "*avx_mov<mode>_internal"
207 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
208 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
210 && (register_operand (operands[0], <MODE>mode)
211 || register_operand (operands[1], <MODE>mode))"
213 switch (which_alternative)
216 return standard_sse_constant_opcode (insn, operands[1]);
219 switch (get_attr_mode (insn))
223 if (misaligned_operand (operands[0], <MODE>mode)
224 || misaligned_operand (operands[1], <MODE>mode))
225 return "vmovups\t{%1, %0|%0, %1}";
227 return "vmovaps\t{%1, %0|%0, %1}";
230 if (misaligned_operand (operands[0], <MODE>mode)
231 || misaligned_operand (operands[1], <MODE>mode))
232 return "vmovupd\t{%1, %0|%0, %1}";
233 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
234 return "vmovaps\t{%1, %0|%0, %1}";
236 return "vmovapd\t{%1, %0|%0, %1}";
238 if (misaligned_operand (operands[0], <MODE>mode)
239 || misaligned_operand (operands[1], <MODE>mode))
240 return "vmovdqu\t{%1, %0|%0, %1}";
241 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
242 return "vmovaps\t{%1, %0|%0, %1}";
244 return "vmovdqa\t{%1, %0|%0, %1}";
250 [(set_attr "type" "sselog1,ssemov,ssemov")
251 (set_attr "prefix" "vex")
252 (set_attr "mode" "<avxvecmode>")])
254 ;; All of these patterns are enabled for SSE1 as well as SSE2.
255 ;; This is essential for maintaining stable calling conventions.
257 (define_expand "mov<mode>"
258 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
259 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
262 ix86_expand_vector_move (<MODE>mode, operands);
266 (define_insn "*mov<mode>_internal"
267 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
268 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
270 && (register_operand (operands[0], <MODE>mode)
271 || register_operand (operands[1], <MODE>mode))"
273 switch (which_alternative)
276 return standard_sse_constant_opcode (insn, operands[1]);
279 switch (get_attr_mode (insn))
282 return "movaps\t{%1, %0|%0, %1}";
284 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
285 return "movaps\t{%1, %0|%0, %1}";
287 return "movapd\t{%1, %0|%0, %1}";
289 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
290 return "movaps\t{%1, %0|%0, %1}";
292 return "movdqa\t{%1, %0|%0, %1}";
298 [(set_attr "type" "sselog1,ssemov,ssemov")
300 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
301 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
302 (and (eq_attr "alternative" "2")
303 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
305 (const_string "V4SF")
306 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
307 (const_string "V4SF")
308 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
309 (const_string "V2DF")
311 (const_string "TI")))])
313 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
314 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
315 ;; from memory, we'd prefer to load the memory directly into the %xmm
316 ;; register. To facilitate this happy circumstance, this pattern won't
317 ;; split until after register allocation. If the 64-bit value didn't
318 ;; come from memory, this is the best we can do. This is much better
319 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
322 (define_insn_and_split "movdi_to_sse"
324 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
325 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
326 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
327 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
329 "&& reload_completed"
332 if (register_operand (operands[1], DImode))
334 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
335 Assemble the 64-bit DImode value in an xmm register. */
336 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
337 gen_rtx_SUBREG (SImode, operands[1], 0)));
338 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
339 gen_rtx_SUBREG (SImode, operands[1], 4)));
340 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
343 else if (memory_operand (operands[1], DImode))
344 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
345 operands[1], const0_rtx));
351 [(set (match_operand:V4SF 0 "register_operand" "")
352 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
353 "TARGET_SSE && reload_completed"
356 (vec_duplicate:V4SF (match_dup 1))
360 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
361 operands[2] = CONST0_RTX (V4SFmode);
365 [(set (match_operand:V2DF 0 "register_operand" "")
366 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
367 "TARGET_SSE2 && reload_completed"
368 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
370 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
371 operands[2] = CONST0_RTX (DFmode);
374 (define_expand "push<mode>1"
375 [(match_operand:AVX256MODE 0 "register_operand" "")]
378 ix86_expand_push (<MODE>mode, operands[0]);
382 (define_expand "push<mode>1"
383 [(match_operand:SSEMODE16 0 "register_operand" "")]
386 ix86_expand_push (<MODE>mode, operands[0]);
390 (define_expand "movmisalign<mode>"
391 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
392 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
395 ix86_expand_vector_move_misalign (<MODE>mode, operands);
399 (define_expand "movmisalign<mode>"
400 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
401 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
404 ix86_expand_vector_move_misalign (<MODE>mode, operands);
408 (define_expand "avx_movu<ssemodesuffix><avxmodesuffix>"
409 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "")
411 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")]
413 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
415 if (MEM_P (operands[0]) && MEM_P (operands[1]))
416 operands[1] = force_reg (<MODE>mode, operands[1]);
419 (define_insn "*avx_movu<ssemodesuffix><avxmodesuffix>"
420 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
422 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
424 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
425 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
426 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
427 [(set_attr "type" "ssemov")
428 (set_attr "movu" "1")
429 (set_attr "prefix" "vex")
430 (set_attr "mode" "<MODE>")])
432 (define_insn "sse2_movq128"
433 [(set (match_operand:V2DI 0 "register_operand" "=x")
436 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
437 (parallel [(const_int 0)]))
440 "%vmovq\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "prefix" "maybe_vex")
443 (set_attr "mode" "TI")])
445 (define_expand "<sse>_movu<ssemodesuffix>"
446 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
448 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")]
450 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
452 if (MEM_P (operands[0]) && MEM_P (operands[1]))
453 operands[1] = force_reg (<MODE>mode, operands[1]);
456 (define_insn "*<sse>_movu<ssemodesuffix>"
457 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
459 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
461 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
462 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
463 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
464 [(set_attr "type" "ssemov")
465 (set_attr "movu" "1")
466 (set_attr "mode" "<MODE>")])
468 (define_expand "avx_movdqu<avxmodesuffix>"
469 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "")
471 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")]
475 if (MEM_P (operands[0]) && MEM_P (operands[1]))
476 operands[1] = force_reg (<MODE>mode, operands[1]);
479 (define_insn "*avx_movdqu<avxmodesuffix>"
480 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
482 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
484 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
485 "vmovdqu\t{%1, %0|%0, %1}"
486 [(set_attr "type" "ssemov")
487 (set_attr "movu" "1")
488 (set_attr "prefix" "vex")
489 (set_attr "mode" "<avxvecmode>")])
491 (define_expand "sse2_movdqu"
492 [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
493 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")]
497 if (MEM_P (operands[0]) && MEM_P (operands[1]))
498 operands[1] = force_reg (V16QImode, operands[1]);
501 (define_insn "*sse2_movdqu"
502 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
503 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
505 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
506 "movdqu\t{%1, %0|%0, %1}"
507 [(set_attr "type" "ssemov")
508 (set_attr "movu" "1")
509 (set_attr "prefix_data16" "1")
510 (set_attr "mode" "TI")])
512 (define_insn "avx_movnt<mode>"
513 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
515 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
517 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
518 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
519 [(set_attr "type" "ssemov")
520 (set_attr "prefix" "vex")
521 (set_attr "mode" "<MODE>")])
523 (define_insn "<sse>_movnt<mode>"
524 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
526 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
528 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
529 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
530 [(set_attr "type" "ssemov")
531 (set_attr "mode" "<MODE>")])
533 (define_insn "avx_movnt<mode>"
534 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
536 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
539 "vmovntdq\t{%1, %0|%0, %1}"
540 [(set_attr "type" "ssecvt")
541 (set_attr "prefix" "vex")
542 (set_attr "mode" "<avxvecmode>")])
544 (define_insn "sse2_movntv2di"
545 [(set (match_operand:V2DI 0 "memory_operand" "=m")
546 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
549 "movntdq\t{%1, %0|%0, %1}"
550 [(set_attr "type" "ssemov")
551 (set_attr "prefix_data16" "1")
552 (set_attr "mode" "TI")])
554 (define_insn "sse2_movntsi"
555 [(set (match_operand:SI 0 "memory_operand" "=m")
556 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
559 "movnti\t{%1, %0|%0, %1}"
560 [(set_attr "type" "ssemov")
561 (set_attr "prefix_data16" "0")
562 (set_attr "mode" "V2DF")])
564 (define_insn "avx_lddqu<avxmodesuffix>"
565 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
567 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
570 "vlddqu\t{%1, %0|%0, %1}"
571 [(set_attr "type" "ssecvt")
572 (set_attr "movu" "1")
573 (set_attr "prefix" "vex")
574 (set_attr "mode" "<avxvecmode>")])
576 (define_insn "sse3_lddqu"
577 [(set (match_operand:V16QI 0 "register_operand" "=x")
578 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
581 "lddqu\t{%1, %0|%0, %1}"
582 [(set_attr "type" "ssemov")
583 (set_attr "movu" "1")
584 (set_attr "prefix_data16" "0")
585 (set_attr "prefix_rep" "1")
586 (set_attr "mode" "TI")])
588 ; Expand patterns for non-temporal stores. At the moment, only those
589 ; that directly map to insns are defined; it would be possible to
590 ; define patterns for other modes that would expand to several insns.
592 (define_expand "storent<mode>"
593 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
595 [(match_operand:STORENT_MODE 1 "register_operand" "")]
598 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
600 ;; Parallel floating point arithmetic
602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
604 (define_expand "<code><mode>2"
605 [(set (match_operand:VF 0 "register_operand" "")
607 (match_operand:VF 1 "register_operand" "")))]
609 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
611 (define_insn_and_split "*absneg<mode>2"
612 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
613 (match_operator:VF 3 "absneg_operator"
614 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
615 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
621 enum rtx_code absneg_op;
627 if (MEM_P (operands[1]))
628 op1 = operands[2], op2 = operands[1];
630 op1 = operands[1], op2 = operands[2];
635 if (rtx_equal_p (operands[0], operands[1]))
641 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
642 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
643 t = gen_rtx_SET (VOIDmode, operands[0], t);
647 [(set_attr "isa" "noavx,noavx,avx,avx")])
649 (define_expand "<plusminus_insn><mode>3"
650 [(set (match_operand:VF 0 "register_operand" "")
652 (match_operand:VF 1 "nonimmediate_operand" "")
653 (match_operand:VF 2 "nonimmediate_operand" "")))]
655 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
657 (define_insn "*<plusminus_insn><mode>3"
658 [(set (match_operand:VF 0 "register_operand" "=x,x")
660 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
661 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
662 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
664 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
665 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
666 [(set_attr "isa" "noavx,avx")
667 (set_attr "type" "sseadd")
668 (set_attr "prefix" "orig,vex")
669 (set_attr "mode" "<MODE>")])
671 (define_insn "<sse>_vm<plusminus_insn><mode>3"
672 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
675 (match_operand:VF_128 1 "register_operand" "0,x")
676 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
681 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
682 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
683 [(set_attr "isa" "noavx,avx")
684 (set_attr "type" "sseadd")
685 (set_attr "prefix" "orig,vex")
686 (set_attr "mode" "<ssescalarmode>")])
688 (define_expand "mul<mode>3"
689 [(set (match_operand:VF 0 "register_operand" "")
691 (match_operand:VF 1 "nonimmediate_operand" "")
692 (match_operand:VF 2 "nonimmediate_operand" "")))]
694 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
696 (define_insn "*mul<mode>3"
697 [(set (match_operand:VF 0 "register_operand" "=x,x")
699 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
700 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
701 "ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
703 mul<ssemodesuffix>\t{%2, %0|%0, %2}
704 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
705 [(set_attr "isa" "noavx,avx")
706 (set_attr "type" "ssemul")
707 (set_attr "prefix" "orig,vex")
708 (set_attr "mode" "<MODE>")])
710 (define_insn "<sse>_vmmul<mode>3"
711 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
714 (match_operand:VF_128 1 "register_operand" "0,x")
715 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
720 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
721 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
722 [(set_attr "isa" "noavx,avx")
723 (set_attr "type" "ssemul")
724 (set_attr "prefix" "orig,vex")
725 (set_attr "mode" "<ssescalarmode>")])
727 (define_expand "div<mode>3"
728 [(set (match_operand:VF2 0 "register_operand" "")
729 (div:VF2 (match_operand:VF2 1 "register_operand" "")
730 (match_operand:VF2 2 "nonimmediate_operand" "")))]
732 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
734 (define_expand "div<mode>3"
735 [(set (match_operand:VF1 0 "register_operand" "")
736 (div:VF1 (match_operand:VF1 1 "register_operand" "")
737 (match_operand:VF1 2 "nonimmediate_operand" "")))]
740 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
742 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
743 && flag_finite_math_only && !flag_trapping_math
744 && flag_unsafe_math_optimizations)
746 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
751 (define_insn "<sse>_div<mode>3"
752 [(set (match_operand:VF 0 "register_operand" "=x,x")
754 (match_operand:VF 1 "register_operand" "0,x")
755 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
758 div<ssemodesuffix>\t{%2, %0|%0, %2}
759 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
760 [(set_attr "isa" "noavx,avx")
761 (set_attr "type" "ssediv")
762 (set_attr "prefix" "orig,vex")
763 (set_attr "mode" "<MODE>")])
765 (define_insn "<sse>_vmdiv<mode>3"
766 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
769 (match_operand:VF_128 1 "register_operand" "0,x")
770 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
775 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
776 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
777 [(set_attr "isa" "noavx,avx")
778 (set_attr "type" "ssediv")
779 (set_attr "prefix" "orig,vex")
780 (set_attr "mode" "<ssescalarmode>")])
782 (define_insn "<sse>_rcp<mode>2"
783 [(set (match_operand:VF1 0 "register_operand" "=x")
785 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
787 "%vrcpps\t{%1, %0|%0, %1}"
788 [(set_attr "type" "sse")
789 (set_attr "atom_sse_attr" "rcp")
790 (set_attr "prefix" "maybe_vex")
791 (set_attr "mode" "<MODE>")])
793 (define_insn "sse_vmrcpv4sf2"
794 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
796 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
798 (match_operand:V4SF 2 "register_operand" "0,x")
802 rcpss\t{%1, %0|%0, %1}
803 vrcpss\t{%1, %2, %0|%0, %2, %1}"
804 [(set_attr "isa" "noavx,avx")
805 (set_attr "type" "sse")
806 (set_attr "atom_sse_attr" "rcp")
807 (set_attr "prefix" "orig,vex")
808 (set_attr "mode" "SF")])
810 (define_expand "sqrt<mode>2"
811 [(set (match_operand:VF2 0 "register_operand" "")
812 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))])
814 (define_expand "sqrt<mode>2"
815 [(set (match_operand:VF1 0 "register_operand" "")
816 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
819 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
820 && flag_finite_math_only && !flag_trapping_math
821 && flag_unsafe_math_optimizations)
823 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
828 (define_insn "<sse>_sqrt<mode>2"
829 [(set (match_operand:VF 0 "register_operand" "=x")
830 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
832 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
833 [(set_attr "type" "sse")
834 (set_attr "atom_sse_attr" "sqrt")
835 (set_attr "prefix" "maybe_vex")
836 (set_attr "mode" "<MODE>")])
838 (define_insn "<sse>_vmsqrt<mode>2"
839 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
842 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
843 (match_operand:VF_128 2 "register_operand" "0,x")
847 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
848 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
849 [(set_attr "isa" "noavx,avx")
850 (set_attr "type" "sse")
851 (set_attr "atom_sse_attr" "sqrt")
852 (set_attr "prefix" "orig,vex")
853 (set_attr "mode" "<ssescalarmode>")])
855 (define_expand "rsqrt<mode>2"
856 [(set (match_operand:VF1 0 "register_operand" "")
858 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
861 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
865 (define_insn "<sse>_rsqrt<mode>2"
866 [(set (match_operand:VF1 0 "register_operand" "=x")
868 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
870 "%vrsqrtps\t{%1, %0|%0, %1}"
871 [(set_attr "type" "sse")
872 (set_attr "prefix" "maybe_vex")
873 (set_attr "mode" "<MODE>")])
875 (define_insn "sse_vmrsqrtv4sf2"
876 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
878 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
880 (match_operand:V4SF 2 "register_operand" "0,x")
884 rsqrtss\t{%1, %0|%0, %1}
885 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
886 [(set_attr "isa" "noavx,avx")
887 (set_attr "type" "sse")
888 (set_attr "prefix" "orig,vex")
889 (set_attr "mode" "SF")])
891 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
892 ;; isn't really correct, as those rtl operators aren't defined when
893 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
895 (define_expand "<code><mode>3"
896 [(set (match_operand:VF 0 "register_operand" "")
898 (match_operand:VF 1 "nonimmediate_operand" "")
899 (match_operand:VF 2 "nonimmediate_operand" "")))]
902 if (!flag_finite_math_only)
903 operands[1] = force_reg (<MODE>mode, operands[1]);
904 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
907 (define_insn "*<code><mode>3_finite"
908 [(set (match_operand:VF 0 "register_operand" "=x,x")
910 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
911 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
912 "flag_finite_math_only
913 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
915 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
916 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sseadd")
919 (set_attr "prefix" "orig,vex")
920 (set_attr "mode" "<MODE>")])
922 (define_insn "*<code><mode>3"
923 [(set (match_operand:VF 0 "register_operand" "=x,x")
925 (match_operand:VF 1 "register_operand" "0,x")
926 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
927 "!flag_finite_math_only"
929 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
930 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
931 [(set_attr "isa" "noavx,avx")
932 (set_attr "type" "sseadd")
933 (set_attr "prefix" "orig,vex")
934 (set_attr "mode" "<MODE>")])
936 (define_insn "<sse>_vm<code><mode>3"
937 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
940 (match_operand:VF_128 1 "register_operand" "0,x")
941 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
946 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
947 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
948 [(set_attr "isa" "noavx,avx")
949 (set_attr "type" "sse")
950 (set_attr "prefix" "orig,vex")
951 (set_attr "mode" "<ssescalarmode>")])
953 ;; These versions of the min/max patterns implement exactly the operations
954 ;; min = (op1 < op2 ? op1 : op2)
955 ;; max = (!(op1 < op2) ? op1 : op2)
956 ;; Their operands are not commutative, and thus they may be used in the
957 ;; presence of -0.0 and NaN.
959 (define_insn "*ieee_smin<mode>3"
960 [(set (match_operand:VF 0 "register_operand" "=x,x")
962 [(match_operand:VF 1 "register_operand" "0,x")
963 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
967 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
968 min<ssemodesuffix>\t{%2, %0|%0, %2}"
969 [(set_attr "isa" "noavx,avx")
970 (set_attr "type" "sseadd")
971 (set_attr "prefix" "orig,vex")
972 (set_attr "mode" "<MODE>")])
974 (define_insn "*ieee_smax<mode>3"
975 [(set (match_operand:VF 0 "register_operand" "=x,x")
977 [(match_operand:VF 1 "register_operand" "0,x")
978 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
982 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
983 max<ssemodesuffix>\t{%2, %0|%0, %2}"
984 [(set_attr "isa" "noavx,avx")
985 (set_attr "type" "sseadd")
986 (set_attr "prefix" "orig,vex")
987 (set_attr "mode" "<MODE>")])
989 (define_insn "avx_addsubv4df3"
990 [(set (match_operand:V4DF 0 "register_operand" "=x")
993 (match_operand:V4DF 1 "register_operand" "x")
994 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
995 (minus:V4DF (match_dup 1) (match_dup 2))
998 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
999 [(set_attr "type" "sseadd")
1000 (set_attr "prefix" "vex")
1001 (set_attr "mode" "V4DF")])
1003 (define_insn "sse3_addsubv2df3"
1004 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1007 (match_operand:V2DF 1 "register_operand" "0,x")
1008 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1009 (minus:V2DF (match_dup 1) (match_dup 2))
1013 addsubpd\t{%2, %0|%0, %2}
1014 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1015 [(set_attr "isa" "noavx,avx")
1016 (set_attr "type" "sseadd")
1017 (set_attr "atom_unit" "complex")
1018 (set_attr "prefix" "orig,vex")
1019 (set_attr "mode" "V2DF")])
1021 (define_insn "avx_addsubv8sf3"
1022 [(set (match_operand:V8SF 0 "register_operand" "=x")
1025 (match_operand:V8SF 1 "register_operand" "x")
1026 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1027 (minus:V8SF (match_dup 1) (match_dup 2))
1030 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "prefix" "vex")
1033 (set_attr "mode" "V8SF")])
1035 (define_insn "sse3_addsubv4sf3"
1036 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1039 (match_operand:V4SF 1 "register_operand" "0,x")
1040 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1041 (minus:V4SF (match_dup 1) (match_dup 2))
1045 addsubps\t{%2, %0|%0, %2}
1046 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "isa" "noavx,avx")
1048 (set_attr "type" "sseadd")
1049 (set_attr "prefix" "orig,vex")
1050 (set_attr "prefix_rep" "1,*")
1051 (set_attr "mode" "V4SF")])
1053 (define_insn "avx_h<plusminus_insn>v4df3"
1054 [(set (match_operand:V4DF 0 "register_operand" "=x")
1059 (match_operand:V4DF 1 "register_operand" "x")
1060 (parallel [(const_int 0)]))
1061 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1063 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1064 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1068 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1069 (parallel [(const_int 0)]))
1070 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1072 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1073 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1075 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1076 [(set_attr "type" "sseadd")
1077 (set_attr "prefix" "vex")
1078 (set_attr "mode" "V4DF")])
1080 (define_insn "sse3_h<plusminus_insn>v2df3"
1081 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1085 (match_operand:V2DF 1 "register_operand" "0,x")
1086 (parallel [(const_int 0)]))
1087 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1090 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1091 (parallel [(const_int 0)]))
1092 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1095 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1096 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1097 [(set_attr "isa" "noavx,avx")
1098 (set_attr "type" "sseadd")
1099 (set_attr "prefix" "orig,vex")
1100 (set_attr "mode" "V2DF")])
1102 (define_insn "avx_h<plusminus_insn>v8sf3"
1103 [(set (match_operand:V8SF 0 "register_operand" "=x")
1109 (match_operand:V8SF 1 "register_operand" "x")
1110 (parallel [(const_int 0)]))
1111 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1113 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1114 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1118 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1119 (parallel [(const_int 0)]))
1120 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1122 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1123 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1127 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1128 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1130 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1131 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1134 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1135 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1137 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1138 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1140 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1141 [(set_attr "type" "sseadd")
1142 (set_attr "prefix" "vex")
1143 (set_attr "mode" "V8SF")])
1145 (define_insn "sse3_h<plusminus_insn>v4sf3"
1146 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1151 (match_operand:V4SF 1 "register_operand" "0,x")
1152 (parallel [(const_int 0)]))
1153 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1155 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1156 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1160 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1161 (parallel [(const_int 0)]))
1162 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1164 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1165 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1168 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1169 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1170 [(set_attr "isa" "noavx,avx")
1171 (set_attr "type" "sseadd")
1172 (set_attr "atom_unit" "complex")
1173 (set_attr "prefix" "orig,vex")
1174 (set_attr "prefix_rep" "1,*")
1175 (set_attr "mode" "V4SF")])
1177 (define_expand "reduc_splus_v4df"
1178 [(match_operand:V4DF 0 "register_operand" "")
1179 (match_operand:V4DF 1 "register_operand" "")]
1182 rtx tmp = gen_reg_rtx (V4DFmode);
1183 rtx tmp2 = gen_reg_rtx (V4DFmode);
1184 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1185 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1186 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1190 (define_expand "reduc_splus_v2df"
1191 [(match_operand:V2DF 0 "register_operand" "")
1192 (match_operand:V2DF 1 "register_operand" "")]
1195 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1199 (define_expand "reduc_splus_v8sf"
1200 [(match_operand:V8SF 0 "register_operand" "")
1201 (match_operand:V8SF 1 "register_operand" "")]
1204 rtx tmp = gen_reg_rtx (V8SFmode);
1205 rtx tmp2 = gen_reg_rtx (V8SFmode);
1206 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1207 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1208 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1209 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1213 (define_expand "reduc_splus_v4sf"
1214 [(match_operand:V4SF 0 "register_operand" "")
1215 (match_operand:V4SF 1 "register_operand" "")]
1220 rtx tmp = gen_reg_rtx (V4SFmode);
1221 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1222 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1225 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1230 (define_expand "reduc_smax_v4sf"
1231 [(match_operand:V4SF 0 "register_operand" "")
1232 (match_operand:V4SF 1 "register_operand" "")]
1235 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1239 (define_expand "reduc_smin_v4sf"
1240 [(match_operand:V4SF 0 "register_operand" "")
1241 (match_operand:V4SF 1 "register_operand" "")]
1244 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1250 ;; Parallel floating point comparisons
1252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1254 (define_insn "avx_cmp<mode>3"
1255 [(set (match_operand:VF 0 "register_operand" "=x")
1257 [(match_operand:VF 1 "register_operand" "x")
1258 (match_operand:VF 2 "nonimmediate_operand" "xm")
1259 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1262 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1263 [(set_attr "type" "ssecmp")
1264 (set_attr "length_immediate" "1")
1265 (set_attr "prefix" "vex")
1266 (set_attr "mode" "<MODE>")])
1268 (define_insn "avx_vmcmp<mode>3"
1269 [(set (match_operand:VF_128 0 "register_operand" "=x")
1272 [(match_operand:VF_128 1 "register_operand" "x")
1273 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1274 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1279 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1280 [(set_attr "type" "ssecmp")
1281 (set_attr "length_immediate" "1")
1282 (set_attr "prefix" "vex")
1283 (set_attr "mode" "<ssescalarmode>")])
1285 (define_insn "<sse>_maskcmp<mode>3"
1286 [(set (match_operand:VF 0 "register_operand" "=x,x")
1287 (match_operator:VF 3 "sse_comparison_operator"
1288 [(match_operand:VF 1 "register_operand" "0,x")
1289 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1292 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1293 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1294 [(set_attr "isa" "noavx,avx")
1295 (set_attr "type" "ssecmp")
1296 (set_attr "length_immediate" "1")
1297 (set_attr "prefix" "orig,vex")
1298 (set_attr "mode" "<MODE>")])
1300 (define_insn "<sse>_vmmaskcmp<mode>3"
1301 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1303 (match_operator:VF_128 3 "sse_comparison_operator"
1304 [(match_operand:VF_128 1 "register_operand" "0,x")
1305 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1310 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1311 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1312 [(set_attr "isa" "noavx,avx")
1313 (set_attr "type" "ssecmp")
1314 (set_attr "length_immediate" "1,*")
1315 (set_attr "prefix" "orig,vex")
1316 (set_attr "mode" "<ssescalarmode>")])
1318 (define_insn "<sse>_comi"
1319 [(set (reg:CCFP FLAGS_REG)
1322 (match_operand:<ssevecmode> 0 "register_operand" "x")
1323 (parallel [(const_int 0)]))
1325 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1326 (parallel [(const_int 0)]))))]
1327 "SSE_FLOAT_MODE_P (<MODE>mode)"
1328 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1329 [(set_attr "type" "ssecomi")
1330 (set_attr "prefix" "maybe_vex")
1331 (set_attr "prefix_rep" "0")
1332 (set (attr "prefix_data16")
1333 (if_then_else (eq_attr "mode" "DF")
1335 (const_string "0")))
1336 (set_attr "mode" "<MODE>")])
1338 (define_insn "<sse>_ucomi"
1339 [(set (reg:CCFPU FLAGS_REG)
1342 (match_operand:<ssevecmode> 0 "register_operand" "x")
1343 (parallel [(const_int 0)]))
1345 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1346 (parallel [(const_int 0)]))))]
1347 "SSE_FLOAT_MODE_P (<MODE>mode)"
1348 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1349 [(set_attr "type" "ssecomi")
1350 (set_attr "prefix" "maybe_vex")
1351 (set_attr "prefix_rep" "0")
1352 (set (attr "prefix_data16")
1353 (if_then_else (eq_attr "mode" "DF")
1355 (const_string "0")))
1356 (set_attr "mode" "<MODE>")])
1358 (define_expand "vcond<mode>"
1359 [(set (match_operand:VF 0 "register_operand" "")
1361 (match_operator 3 ""
1362 [(match_operand:VF 4 "nonimmediate_operand" "")
1363 (match_operand:VF 5 "nonimmediate_operand" "")])
1364 (match_operand:VF 1 "general_operand" "")
1365 (match_operand:VF 2 "general_operand" "")))]
1368 bool ok = ix86_expand_fp_vcond (operands);
1373 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1375 ;; Parallel floating point logical operations
1377 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1379 (define_insn "<sse>_andnot<mode>3"
1380 [(set (match_operand:VF 0 "register_operand" "=x,x")
1383 (match_operand:VF 1 "register_operand" "0,x"))
1384 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1387 static char buf[32];
1390 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1392 switch (which_alternative)
1395 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1398 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1404 snprintf (buf, sizeof (buf), insn, suffix);
1407 [(set_attr "isa" "noavx,avx")
1408 (set_attr "type" "sselog")
1409 (set_attr "prefix" "orig,vex")
1410 (set_attr "mode" "<MODE>")])
1412 (define_expand "<code><mode>3"
1413 [(set (match_operand:VF 0 "register_operand" "")
1415 (match_operand:VF 1 "nonimmediate_operand" "")
1416 (match_operand:VF 2 "nonimmediate_operand" "")))]
1418 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1420 (define_insn "*<code><mode>3"
1421 [(set (match_operand:VF 0 "register_operand" "=x,x")
1423 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1424 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1425 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1427 static char buf[32];
1430 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1432 switch (which_alternative)
1435 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1438 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1444 snprintf (buf, sizeof (buf), insn, suffix);
1447 [(set_attr "isa" "noavx,avx")
1448 (set_attr "type" "sselog")
1449 (set_attr "prefix" "orig,vex")
1450 (set_attr "mode" "<MODE>")])
1452 (define_expand "copysign<mode>3"
1455 (not:VF (match_dup 3))
1456 (match_operand:VF 1 "nonimmediate_operand" "")))
1458 (and:VF (match_dup 3)
1459 (match_operand:VF 2 "nonimmediate_operand" "")))
1460 (set (match_operand:VF 0 "register_operand" "")
1461 (ior:VF (match_dup 4) (match_dup 5)))]
1464 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1466 operands[4] = gen_reg_rtx (<MODE>mode);
1467 operands[5] = gen_reg_rtx (<MODE>mode);
1470 ;; Also define scalar versions. These are used for abs, neg, and
1471 ;; conditional move. Using subregs into vector modes causes register
1472 ;; allocation lossage. These patterns do not allow memory operands
1473 ;; because the native instructions read the full 128-bits.
1475 (define_insn "*andnot<mode>3"
1476 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1479 (match_operand:MODEF 1 "register_operand" "0,x"))
1480 (match_operand:MODEF 2 "register_operand" "x,x")))]
1481 "SSE_FLOAT_MODE_P (<MODE>mode)"
1483 static char buf[32];
1486 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "s" : "<ssemodefsuffix>";
1488 switch (which_alternative)
1491 insn = "andnp%s\t{%%2, %%0|%%0, %%2}";
1494 insn = "vandnp%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1500 snprintf (buf, sizeof (buf), insn, suffix);
1503 [(set_attr "isa" "noavx,avx")
1504 (set_attr "type" "sselog")
1505 (set_attr "prefix" "orig,vex")
1506 (set_attr "mode" "<ssevecmode>")])
1508 (define_insn "*<code><mode>3"
1509 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1511 (match_operand:MODEF 1 "register_operand" "%0,x")
1512 (match_operand:MODEF 2 "register_operand" "x,x")))]
1513 "SSE_FLOAT_MODE_P (<MODE>mode)"
1515 static char buf[32];
1518 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "s" : "<ssemodefsuffix>";
1520 switch (which_alternative)
1523 insn = "<logic>p%s\t{%%2, %%0|%%0, %%2}";
1526 insn = "v<logic>p%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1532 snprintf (buf, sizeof (buf), insn, suffix);
1535 [(set_attr "isa" "noavx,avx")
1536 (set_attr "type" "sselog")
1537 (set_attr "prefix" "orig,vex")
1538 (set_attr "mode" "<ssevecmode>")])
1540 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1542 ;; FMA4 floating point multiply/accumulate instructions. This
1543 ;; includes the scalar version of the instructions as well as the
1546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1548 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1549 ;; combine to generate a multiply/add with two memory references. We then
1550 ;; split this insn, into loading up the destination register with one of the
1551 ;; memory operations. If we don't manage to split the insn, reload will
1552 ;; generate the appropriate moves. The reason this is needed, is that combine
1553 ;; has already folded one of the memory references into both the multiply and
1554 ;; add insns, and it can't generate a new pseudo. I.e.:
1555 ;; (set (reg1) (mem (addr1)))
1556 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1557 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1559 ;; ??? This is historic, pre-dating the gimple fma transformation.
1560 ;; We could now properly represent that only one memory operand is
1561 ;; allowed and not be penalized during optimization.
1563 ;; Intrinsic FMA operations.
1565 ;; The standard names for fma is only available with SSE math enabled.
1566 (define_expand "fma<mode>4"
1567 [(set (match_operand:FMAMODE 0 "register_operand")
1569 (match_operand:FMAMODE 1 "nonimmediate_operand")
1570 (match_operand:FMAMODE 2 "nonimmediate_operand")
1571 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1572 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1575 (define_expand "fms<mode>4"
1576 [(set (match_operand:FMAMODE 0 "register_operand")
1578 (match_operand:FMAMODE 1 "nonimmediate_operand")
1579 (match_operand:FMAMODE 2 "nonimmediate_operand")
1580 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1581 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1584 (define_expand "fnma<mode>4"
1585 [(set (match_operand:FMAMODE 0 "register_operand")
1587 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1588 (match_operand:FMAMODE 2 "nonimmediate_operand")
1589 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1590 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1593 (define_expand "fnms<mode>4"
1594 [(set (match_operand:FMAMODE 0 "register_operand")
1596 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1597 (match_operand:FMAMODE 2 "nonimmediate_operand")
1598 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1599 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1602 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1603 (define_expand "fma4i_fmadd_<mode>"
1604 [(set (match_operand:FMAMODE 0 "register_operand")
1606 (match_operand:FMAMODE 1 "nonimmediate_operand")
1607 (match_operand:FMAMODE 2 "nonimmediate_operand")
1608 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1609 "TARGET_FMA || TARGET_FMA4"
1612 (define_insn "*fma4i_fmadd_<mode>"
1613 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1615 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1616 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1617 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1619 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1620 [(set_attr "type" "ssemuladd")
1621 (set_attr "mode" "<MODE>")])
1623 (define_insn "*fma4i_fmsub_<mode>"
1624 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1626 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1627 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1629 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1631 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1632 [(set_attr "type" "ssemuladd")
1633 (set_attr "mode" "<MODE>")])
1635 (define_insn "*fma4i_fnmadd_<mode>"
1636 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1639 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1640 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1641 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1643 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1644 [(set_attr "type" "ssemuladd")
1645 (set_attr "mode" "<MODE>")])
1647 (define_insn "*fma4i_fnmsub_<mode>"
1648 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1651 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1652 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1654 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1656 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1657 [(set_attr "type" "ssemuladd")
1658 (set_attr "mode" "<MODE>")])
1660 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1661 ;; entire destination register, with the high-order elements zeroed.
1663 (define_expand "fma4i_vmfmadd_<mode>"
1664 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1665 (vec_merge:SSEMODEF2P
1667 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1668 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1669 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1674 operands[4] = CONST0_RTX (<MODE>mode);
1677 (define_insn "*fma4i_vmfmadd_<mode>"
1678 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1679 (vec_merge:SSEMODEF2P
1681 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1682 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1683 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1684 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1687 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1688 [(set_attr "type" "ssemuladd")
1689 (set_attr "mode" "<MODE>")])
1691 (define_insn "*fma4i_vmfmsub_<mode>"
1692 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1693 (vec_merge:SSEMODEF2P
1695 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1696 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1698 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1699 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1702 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1703 [(set_attr "type" "ssemuladd")
1704 (set_attr "mode" "<MODE>")])
1706 (define_insn "*fma4i_vmfnmadd_<mode>"
1707 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1708 (vec_merge:SSEMODEF2P
1711 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1712 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1713 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1714 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1717 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1721 (define_insn "*fma4i_vmfnmsub_<mode>"
1722 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1723 (vec_merge:SSEMODEF2P
1726 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1727 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1729 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1730 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1733 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1734 [(set_attr "type" "ssemuladd")
1735 (set_attr "mode" "<MODE>")])
1737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1739 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1743 ;; It would be possible to represent these without the UNSPEC as
1746 ;; (fma op1 op2 op3)
1747 ;; (fma op1 op2 (neg op3))
1750 ;; But this doesn't seem useful in practice.
1752 (define_expand "fmaddsub_<mode>"
1753 [(set (match_operand:AVXMODEF2P 0 "register_operand")
1755 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
1756 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
1757 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
1759 "TARGET_FMA || TARGET_FMA4"
1762 (define_insn "*fma4_fmaddsub_<mode>"
1763 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
1765 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
1766 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
1767 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
1770 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 (define_insn "*fma4_fmsubadd_<mode>"
1775 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
1777 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
1778 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
1780 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1783 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1784 [(set_attr "type" "ssemuladd")
1785 (set_attr "mode" "<MODE>")])
1787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1789 ;; FMA3 floating point multiply/accumulate instructions.
1791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1793 (define_insn "*fma_fmadd_<mode>"
1794 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1796 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1797 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1798 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1801 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1802 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1803 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1804 [(set_attr "type" "ssemuladd")
1805 (set_attr "mode" "<MODE>")])
1807 (define_insn "*fma_fmsub_<mode>"
1808 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1810 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1811 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1813 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1816 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1817 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1818 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1819 [(set_attr "type" "ssemuladd")
1820 (set_attr "mode" "<MODE>")])
1822 (define_insn "*fma_fmadd_<mode>"
1823 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1826 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1827 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1828 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1831 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1832 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1833 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1834 [(set_attr "type" "ssemuladd")
1835 (set_attr "mode" "<MODE>")])
1837 (define_insn "*fma_fmsub_<mode>"
1838 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1841 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1842 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1844 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1847 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1848 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1849 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1850 [(set_attr "type" "ssemuladd")
1851 (set_attr "mode" "<MODE>")])
1853 (define_insn "*fma_fmaddsub_<mode>"
1854 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
1856 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
1857 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
1858 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
1862 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1863 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1864 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1865 [(set_attr "type" "ssemuladd")
1866 (set_attr "mode" "<MODE>")])
1868 (define_insn "*fma_fmsubadd_<mode>"
1869 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
1871 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
1872 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
1874 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
1878 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1879 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1880 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1881 [(set_attr "type" "ssemuladd")
1882 (set_attr "mode" "<MODE>")])
1884 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1886 ;; Parallel single-precision floating point conversion operations
1888 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1890 (define_insn "sse_cvtpi2ps"
1891 [(set (match_operand:V4SF 0 "register_operand" "=x")
1894 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1895 (match_operand:V4SF 1 "register_operand" "0")
1898 "cvtpi2ps\t{%2, %0|%0, %2}"
1899 [(set_attr "type" "ssecvt")
1900 (set_attr "mode" "V4SF")])
1902 (define_insn "sse_cvtps2pi"
1903 [(set (match_operand:V2SI 0 "register_operand" "=y")
1905 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1907 (parallel [(const_int 0) (const_int 1)])))]
1909 "cvtps2pi\t{%1, %0|%0, %1}"
1910 [(set_attr "type" "ssecvt")
1911 (set_attr "unit" "mmx")
1912 (set_attr "mode" "DI")])
1914 (define_insn "sse_cvttps2pi"
1915 [(set (match_operand:V2SI 0 "register_operand" "=y")
1917 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1918 (parallel [(const_int 0) (const_int 1)])))]
1920 "cvttps2pi\t{%1, %0|%0, %1}"
1921 [(set_attr "type" "ssecvt")
1922 (set_attr "unit" "mmx")
1923 (set_attr "prefix_rep" "0")
1924 (set_attr "mode" "SF")])
1926 (define_insn "sse_cvtsi2ss"
1927 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1930 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1931 (match_operand:V4SF 1 "register_operand" "0,0,x")
1935 cvtsi2ss\t{%2, %0|%0, %2}
1936 cvtsi2ss\t{%2, %0|%0, %2}
1937 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1938 [(set_attr "isa" "noavx,noavx,avx")
1939 (set_attr "type" "sseicvt")
1940 (set_attr "athlon_decode" "vector,double,*")
1941 (set_attr "amdfam10_decode" "vector,double,*")
1942 (set_attr "bdver1_decode" "double,direct,*")
1943 (set_attr "prefix" "orig,orig,vex")
1944 (set_attr "mode" "SF")])
1946 (define_insn "sse_cvtsi2ssq"
1947 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1950 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1951 (match_operand:V4SF 1 "register_operand" "0,0,x")
1953 "TARGET_SSE && TARGET_64BIT"
1955 cvtsi2ssq\t{%2, %0|%0, %2}
1956 cvtsi2ssq\t{%2, %0|%0, %2}
1957 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1958 [(set_attr "isa" "noavx,noavx,avx")
1959 (set_attr "type" "sseicvt")
1960 (set_attr "athlon_decode" "vector,double,*")
1961 (set_attr "amdfam10_decode" "vector,double,*")
1962 (set_attr "bdver1_decode" "double,direct,*")
1963 (set_attr "length_vex" "*,*,4")
1964 (set_attr "prefix_rex" "1,1,*")
1965 (set_attr "prefix" "orig,orig,vex")
1966 (set_attr "mode" "SF")])
1968 (define_insn "sse_cvtss2si"
1969 [(set (match_operand:SI 0 "register_operand" "=r,r")
1972 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1973 (parallel [(const_int 0)]))]
1974 UNSPEC_FIX_NOTRUNC))]
1976 "%vcvtss2si\t{%1, %0|%0, %1}"
1977 [(set_attr "type" "sseicvt")
1978 (set_attr "athlon_decode" "double,vector")
1979 (set_attr "bdver1_decode" "double,double")
1980 (set_attr "prefix_rep" "1")
1981 (set_attr "prefix" "maybe_vex")
1982 (set_attr "mode" "SI")])
1984 (define_insn "sse_cvtss2si_2"
1985 [(set (match_operand:SI 0 "register_operand" "=r,r")
1986 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1987 UNSPEC_FIX_NOTRUNC))]
1989 "%vcvtss2si\t{%1, %0|%0, %1}"
1990 [(set_attr "type" "sseicvt")
1991 (set_attr "athlon_decode" "double,vector")
1992 (set_attr "amdfam10_decode" "double,double")
1993 (set_attr "bdver1_decode" "double,double")
1994 (set_attr "prefix_rep" "1")
1995 (set_attr "prefix" "maybe_vex")
1996 (set_attr "mode" "SI")])
1998 (define_insn "sse_cvtss2siq"
1999 [(set (match_operand:DI 0 "register_operand" "=r,r")
2002 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2003 (parallel [(const_int 0)]))]
2004 UNSPEC_FIX_NOTRUNC))]
2005 "TARGET_SSE && TARGET_64BIT"
2006 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2007 [(set_attr "type" "sseicvt")
2008 (set_attr "athlon_decode" "double,vector")
2009 (set_attr "bdver1_decode" "double,double")
2010 (set_attr "prefix_rep" "1")
2011 (set_attr "prefix" "maybe_vex")
2012 (set_attr "mode" "DI")])
2014 (define_insn "sse_cvtss2siq_2"
2015 [(set (match_operand:DI 0 "register_operand" "=r,r")
2016 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2017 UNSPEC_FIX_NOTRUNC))]
2018 "TARGET_SSE && TARGET_64BIT"
2019 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2020 [(set_attr "type" "sseicvt")
2021 (set_attr "athlon_decode" "double,vector")
2022 (set_attr "amdfam10_decode" "double,double")
2023 (set_attr "bdver1_decode" "double,double")
2024 (set_attr "prefix_rep" "1")
2025 (set_attr "prefix" "maybe_vex")
2026 (set_attr "mode" "DI")])
2028 (define_insn "sse_cvttss2si"
2029 [(set (match_operand:SI 0 "register_operand" "=r,r")
2032 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2033 (parallel [(const_int 0)]))))]
2035 "%vcvttss2si\t{%1, %0|%0, %1}"
2036 [(set_attr "type" "sseicvt")
2037 (set_attr "athlon_decode" "double,vector")
2038 (set_attr "amdfam10_decode" "double,double")
2039 (set_attr "bdver1_decode" "double,double")
2040 (set_attr "prefix_rep" "1")
2041 (set_attr "prefix" "maybe_vex")
2042 (set_attr "mode" "SI")])
2044 (define_insn "sse_cvttss2siq"
2045 [(set (match_operand:DI 0 "register_operand" "=r,r")
2048 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2049 (parallel [(const_int 0)]))))]
2050 "TARGET_SSE && TARGET_64BIT"
2051 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2052 [(set_attr "type" "sseicvt")
2053 (set_attr "athlon_decode" "double,vector")
2054 (set_attr "amdfam10_decode" "double,double")
2055 (set_attr "bdver1_decode" "double,double")
2056 (set_attr "prefix_rep" "1")
2057 (set_attr "prefix" "maybe_vex")
2058 (set_attr "mode" "DI")])
2060 (define_insn "avx_cvtdq2ps256"
2061 [(set (match_operand:V8SF 0 "register_operand" "=x")
2062 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2064 "vcvtdq2ps\t{%1, %0|%0, %1}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "prefix" "vex")
2067 (set_attr "mode" "V8SF")])
2069 (define_insn "sse2_cvtdq2ps"
2070 [(set (match_operand:V4SF 0 "register_operand" "=x")
2071 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2073 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2074 [(set_attr "type" "ssecvt")
2075 (set_attr "prefix" "maybe_vex")
2076 (set_attr "mode" "V4SF")])
2078 (define_expand "sse2_cvtudq2ps"
2080 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2082 (lt:V4SF (match_dup 5) (match_dup 3)))
2084 (and:V4SF (match_dup 6) (match_dup 4)))
2085 (set (match_operand:V4SF 0 "register_operand" "")
2086 (plus:V4SF (match_dup 5) (match_dup 7)))]
2089 REAL_VALUE_TYPE TWO32r;
2093 real_ldexp (&TWO32r, &dconst1, 32);
2094 x = const_double_from_real_value (TWO32r, SFmode);
2096 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2097 operands[4] = force_reg (V4SFmode,
2098 ix86_build_const_vector (V4SFmode, 1, x));
2100 for (i = 5; i < 8; i++)
2101 operands[i] = gen_reg_rtx (V4SFmode);
2104 (define_insn "avx_cvtps2dq256"
2105 [(set (match_operand:V8SI 0 "register_operand" "=x")
2106 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2107 UNSPEC_FIX_NOTRUNC))]
2109 "vcvtps2dq\t{%1, %0|%0, %1}"
2110 [(set_attr "type" "ssecvt")
2111 (set_attr "prefix" "vex")
2112 (set_attr "mode" "OI")])
2114 (define_insn "sse2_cvtps2dq"
2115 [(set (match_operand:V4SI 0 "register_operand" "=x")
2116 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2117 UNSPEC_FIX_NOTRUNC))]
2119 "%vcvtps2dq\t{%1, %0|%0, %1}"
2120 [(set_attr "type" "ssecvt")
2121 (set (attr "prefix_data16")
2123 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2125 (const_string "1")))
2126 (set_attr "prefix" "maybe_vex")
2127 (set_attr "mode" "TI")])
2129 (define_insn "avx_cvttps2dq256"
2130 [(set (match_operand:V8SI 0 "register_operand" "=x")
2131 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2133 "vcvttps2dq\t{%1, %0|%0, %1}"
2134 [(set_attr "type" "ssecvt")
2135 (set_attr "prefix" "vex")
2136 (set_attr "mode" "OI")])
2138 (define_insn "sse2_cvttps2dq"
2139 [(set (match_operand:V4SI 0 "register_operand" "=x")
2140 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2142 "%vcvttps2dq\t{%1, %0|%0, %1}"
2143 [(set_attr "type" "ssecvt")
2144 (set (attr "prefix_rep")
2146 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2148 (const_string "1")))
2149 (set (attr "prefix_data16")
2151 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2153 (const_string "0")))
2154 (set_attr "prefix_data16" "0")
2155 (set_attr "prefix" "maybe_vex")
2156 (set_attr "mode" "TI")])
2158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2160 ;; Parallel double-precision floating point conversion operations
2162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2164 (define_insn "sse2_cvtpi2pd"
2165 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2166 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2168 "cvtpi2pd\t{%1, %0|%0, %1}"
2169 [(set_attr "type" "ssecvt")
2170 (set_attr "unit" "mmx,*")
2171 (set_attr "prefix_data16" "1,*")
2172 (set_attr "mode" "V2DF")])
2174 (define_insn "sse2_cvtpd2pi"
2175 [(set (match_operand:V2SI 0 "register_operand" "=y")
2176 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2177 UNSPEC_FIX_NOTRUNC))]
2179 "cvtpd2pi\t{%1, %0|%0, %1}"
2180 [(set_attr "type" "ssecvt")
2181 (set_attr "unit" "mmx")
2182 (set_attr "bdver1_decode" "double")
2183 (set_attr "prefix_data16" "1")
2184 (set_attr "mode" "DI")])
2186 (define_insn "sse2_cvttpd2pi"
2187 [(set (match_operand:V2SI 0 "register_operand" "=y")
2188 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2190 "cvttpd2pi\t{%1, %0|%0, %1}"
2191 [(set_attr "type" "ssecvt")
2192 (set_attr "unit" "mmx")
2193 (set_attr "bdver1_decode" "double")
2194 (set_attr "prefix_data16" "1")
2195 (set_attr "mode" "TI")])
2197 (define_insn "sse2_cvtsi2sd"
2198 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2201 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2202 (match_operand:V2DF 1 "register_operand" "0,0,x")
2206 cvtsi2sd\t{%2, %0|%0, %2}
2207 cvtsi2sd\t{%2, %0|%0, %2}
2208 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2209 [(set_attr "isa" "noavx,noavx,avx")
2210 (set_attr "type" "sseicvt")
2211 (set_attr "athlon_decode" "double,direct,*")
2212 (set_attr "amdfam10_decode" "vector,double,*")
2213 (set_attr "bdver1_decode" "double,direct,*")
2214 (set_attr "prefix" "orig,orig,vex")
2215 (set_attr "mode" "DF")])
2217 (define_insn "sse2_cvtsi2sdq"
2218 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2221 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2222 (match_operand:V2DF 1 "register_operand" "0,0,x")
2224 "TARGET_SSE2 && TARGET_64BIT"
2226 cvtsi2sdq\t{%2, %0|%0, %2}
2227 cvtsi2sdq\t{%2, %0|%0, %2}
2228 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2229 [(set_attr "isa" "noavx,noavx,avx")
2230 (set_attr "type" "sseicvt")
2231 (set_attr "athlon_decode" "double,direct,*")
2232 (set_attr "amdfam10_decode" "vector,double,*")
2233 (set_attr "bdver1_decode" "double,direct,*")
2234 (set_attr "length_vex" "*,*,4")
2235 (set_attr "prefix_rex" "1,1,*")
2236 (set_attr "prefix" "orig,orig,vex")
2237 (set_attr "mode" "DF")])
2239 (define_insn "sse2_cvtsd2si"
2240 [(set (match_operand:SI 0 "register_operand" "=r,r")
2243 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2244 (parallel [(const_int 0)]))]
2245 UNSPEC_FIX_NOTRUNC))]
2247 "%vcvtsd2si\t{%1, %0|%0, %1}"
2248 [(set_attr "type" "sseicvt")
2249 (set_attr "athlon_decode" "double,vector")
2250 (set_attr "bdver1_decode" "double,double")
2251 (set_attr "prefix_rep" "1")
2252 (set_attr "prefix" "maybe_vex")
2253 (set_attr "mode" "SI")])
2255 (define_insn "sse2_cvtsd2si_2"
2256 [(set (match_operand:SI 0 "register_operand" "=r,r")
2257 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2258 UNSPEC_FIX_NOTRUNC))]
2260 "%vcvtsd2si\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "sseicvt")
2262 (set_attr "athlon_decode" "double,vector")
2263 (set_attr "amdfam10_decode" "double,double")
2264 (set_attr "bdver1_decode" "double,double")
2265 (set_attr "prefix_rep" "1")
2266 (set_attr "prefix" "maybe_vex")
2267 (set_attr "mode" "SI")])
2269 (define_insn "sse2_cvtsd2siq"
2270 [(set (match_operand:DI 0 "register_operand" "=r,r")
2273 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2274 (parallel [(const_int 0)]))]
2275 UNSPEC_FIX_NOTRUNC))]
2276 "TARGET_SSE2 && TARGET_64BIT"
2277 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2278 [(set_attr "type" "sseicvt")
2279 (set_attr "athlon_decode" "double,vector")
2280 (set_attr "bdver1_decode" "double,double")
2281 (set_attr "prefix_rep" "1")
2282 (set_attr "prefix" "maybe_vex")
2283 (set_attr "mode" "DI")])
2285 (define_insn "sse2_cvtsd2siq_2"
2286 [(set (match_operand:DI 0 "register_operand" "=r,r")
2287 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2288 UNSPEC_FIX_NOTRUNC))]
2289 "TARGET_SSE2 && TARGET_64BIT"
2290 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2291 [(set_attr "type" "sseicvt")
2292 (set_attr "athlon_decode" "double,vector")
2293 (set_attr "amdfam10_decode" "double,double")
2294 (set_attr "bdver1_decode" "double,double")
2295 (set_attr "prefix_rep" "1")
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "DI")])
2299 (define_insn "sse2_cvttsd2si"
2300 [(set (match_operand:SI 0 "register_operand" "=r,r")
2303 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2304 (parallel [(const_int 0)]))))]
2306 "%vcvttsd2si\t{%1, %0|%0, %1}"
2307 [(set_attr "type" "sseicvt")
2308 (set_attr "athlon_decode" "double,vector")
2309 (set_attr "amdfam10_decode" "double,double")
2310 (set_attr "bdver1_decode" "double,double")
2311 (set_attr "prefix_rep" "1")
2312 (set_attr "prefix" "maybe_vex")
2313 (set_attr "mode" "SI")])
2315 (define_insn "sse2_cvttsd2siq"
2316 [(set (match_operand:DI 0 "register_operand" "=r,r")
2319 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2320 (parallel [(const_int 0)]))))]
2321 "TARGET_SSE2 && TARGET_64BIT"
2322 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "sseicvt")
2324 (set_attr "athlon_decode" "double,vector")
2325 (set_attr "amdfam10_decode" "double,double")
2326 (set_attr "bdver1_decode" "double,double")
2327 (set_attr "prefix_rep" "1")
2328 (set_attr "prefix" "maybe_vex")
2329 (set_attr "mode" "DI")])
2331 (define_insn "avx_cvtdq2pd256"
2332 [(set (match_operand:V4DF 0 "register_operand" "=x")
2333 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2335 "vcvtdq2pd\t{%1, %0|%0, %1}"
2336 [(set_attr "type" "ssecvt")
2337 (set_attr "prefix" "vex")
2338 (set_attr "mode" "V4DF")])
2340 (define_insn "*avx_cvtdq2pd256_2"
2341 [(set (match_operand:V4DF 0 "register_operand" "=x")
2344 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2345 (parallel [(const_int 0) (const_int 1)
2346 (const_int 2) (const_int 3)]))))]
2348 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2349 [(set_attr "type" "ssecvt")
2350 (set_attr "prefix" "vex")
2351 (set_attr "mode" "V4DF")])
2353 (define_insn "sse2_cvtdq2pd"
2354 [(set (match_operand:V2DF 0 "register_operand" "=x")
2357 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2358 (parallel [(const_int 0) (const_int 1)]))))]
2360 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2361 [(set_attr "type" "ssecvt")
2362 (set_attr "prefix" "maybe_vex")
2363 (set_attr "mode" "V2DF")])
2365 (define_insn "avx_cvtpd2dq256"
2366 [(set (match_operand:V4SI 0 "register_operand" "=x")
2367 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2368 UNSPEC_FIX_NOTRUNC))]
2370 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2371 [(set_attr "type" "ssecvt")
2372 (set_attr "prefix" "vex")
2373 (set_attr "mode" "OI")])
2375 (define_expand "sse2_cvtpd2dq"
2376 [(set (match_operand:V4SI 0 "register_operand" "")
2378 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2382 "operands[2] = CONST0_RTX (V2SImode);")
2384 (define_insn "*sse2_cvtpd2dq"
2385 [(set (match_operand:V4SI 0 "register_operand" "=x")
2387 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2389 (match_operand:V2SI 2 "const0_operand" "")))]
2393 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2395 return "cvtpd2dq\t{%1, %0|%0, %1}";
2397 [(set_attr "type" "ssecvt")
2398 (set_attr "prefix_rep" "1")
2399 (set_attr "prefix_data16" "0")
2400 (set_attr "prefix" "maybe_vex")
2401 (set_attr "mode" "TI")
2402 (set_attr "amdfam10_decode" "double")
2403 (set_attr "athlon_decode" "vector")
2404 (set_attr "bdver1_decode" "double")])
2406 (define_insn "avx_cvttpd2dq256"
2407 [(set (match_operand:V4SI 0 "register_operand" "=x")
2408 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2410 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2411 [(set_attr "type" "ssecvt")
2412 (set_attr "prefix" "vex")
2413 (set_attr "mode" "OI")])
2415 (define_expand "sse2_cvttpd2dq"
2416 [(set (match_operand:V4SI 0 "register_operand" "")
2418 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2421 "operands[2] = CONST0_RTX (V2SImode);")
2423 (define_insn "*sse2_cvttpd2dq"
2424 [(set (match_operand:V4SI 0 "register_operand" "=x")
2426 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2427 (match_operand:V2SI 2 "const0_operand" "")))]
2431 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2433 return "cvttpd2dq\t{%1, %0|%0, %1}";
2435 [(set_attr "type" "ssecvt")
2436 (set_attr "amdfam10_decode" "double")
2437 (set_attr "athlon_decode" "vector")
2438 (set_attr "bdver1_decode" "double")
2439 (set_attr "prefix" "maybe_vex")
2440 (set_attr "mode" "TI")])
2442 (define_insn "sse2_cvtsd2ss"
2443 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2446 (float_truncate:V2SF
2447 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2448 (match_operand:V4SF 1 "register_operand" "0,0,x")
2452 cvtsd2ss\t{%2, %0|%0, %2}
2453 cvtsd2ss\t{%2, %0|%0, %2}
2454 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2455 [(set_attr "isa" "noavx,noavx,avx")
2456 (set_attr "type" "ssecvt")
2457 (set_attr "athlon_decode" "vector,double,*")
2458 (set_attr "amdfam10_decode" "vector,double,*")
2459 (set_attr "bdver1_decode" "direct,direct,*")
2460 (set_attr "prefix" "orig,orig,vex")
2461 (set_attr "mode" "SF")])
2463 (define_insn "sse2_cvtss2sd"
2464 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2468 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2469 (parallel [(const_int 0) (const_int 1)])))
2470 (match_operand:V2DF 1 "register_operand" "0,0,x")
2474 cvtss2sd\t{%2, %0|%0, %2}
2475 cvtss2sd\t{%2, %0|%0, %2}
2476 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2477 [(set_attr "isa" "noavx,noavx,avx")
2478 (set_attr "type" "ssecvt")
2479 (set_attr "amdfam10_decode" "vector,double,*")
2480 (set_attr "athlon_decode" "direct,direct,*")
2481 (set_attr "bdver1_decode" "direct,direct,*")
2482 (set_attr "prefix" "orig,orig,vex")
2483 (set_attr "mode" "DF")])
2485 (define_insn "avx_cvtpd2ps256"
2486 [(set (match_operand:V4SF 0 "register_operand" "=x")
2487 (float_truncate:V4SF
2488 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2490 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2491 [(set_attr "type" "ssecvt")
2492 (set_attr "prefix" "vex")
2493 (set_attr "mode" "V4SF")])
2495 (define_expand "sse2_cvtpd2ps"
2496 [(set (match_operand:V4SF 0 "register_operand" "")
2498 (float_truncate:V2SF
2499 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2502 "operands[2] = CONST0_RTX (V2SFmode);")
2504 (define_insn "*sse2_cvtpd2ps"
2505 [(set (match_operand:V4SF 0 "register_operand" "=x")
2507 (float_truncate:V2SF
2508 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2509 (match_operand:V2SF 2 "const0_operand" "")))]
2513 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2515 return "cvtpd2ps\t{%1, %0|%0, %1}";
2517 [(set_attr "type" "ssecvt")
2518 (set_attr "amdfam10_decode" "double")
2519 (set_attr "athlon_decode" "vector")
2520 (set_attr "bdver1_decode" "double")
2521 (set_attr "prefix_data16" "1")
2522 (set_attr "prefix" "maybe_vex")
2523 (set_attr "mode" "V4SF")])
2525 (define_insn "avx_cvtps2pd256"
2526 [(set (match_operand:V4DF 0 "register_operand" "=x")
2528 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2530 "vcvtps2pd\t{%1, %0|%0, %1}"
2531 [(set_attr "type" "ssecvt")
2532 (set_attr "prefix" "vex")
2533 (set_attr "mode" "V4DF")])
2535 (define_insn "*avx_cvtps2pd256_2"
2536 [(set (match_operand:V4DF 0 "register_operand" "=x")
2539 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2540 (parallel [(const_int 0) (const_int 1)
2541 (const_int 2) (const_int 3)]))))]
2543 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2544 [(set_attr "type" "ssecvt")
2545 (set_attr "prefix" "vex")
2546 (set_attr "mode" "V4DF")])
2548 (define_insn "sse2_cvtps2pd"
2549 [(set (match_operand:V2DF 0 "register_operand" "=x")
2552 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2553 (parallel [(const_int 0) (const_int 1)]))))]
2555 "%vcvtps2pd\t{%1, %0|%0, %1}"
2556 [(set_attr "type" "ssecvt")
2557 (set_attr "amdfam10_decode" "direct")
2558 (set_attr "athlon_decode" "double")
2559 (set_attr "bdver1_decode" "double")
2560 (set_attr "prefix_data16" "0")
2561 (set_attr "prefix" "maybe_vex")
2562 (set_attr "mode" "V2DF")])
2564 (define_expand "vec_unpacks_hi_v4sf"
2569 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2570 (parallel [(const_int 6) (const_int 7)
2571 (const_int 2) (const_int 3)])))
2572 (set (match_operand:V2DF 0 "register_operand" "")
2576 (parallel [(const_int 0) (const_int 1)]))))]
2578 "operands[2] = gen_reg_rtx (V4SFmode);")
2580 (define_expand "vec_unpacks_hi_v8sf"
2583 (match_operand:V8SF 1 "nonimmediate_operand" "")
2584 (parallel [(const_int 4) (const_int 5)
2585 (const_int 6) (const_int 7)])))
2586 (set (match_operand:V4DF 0 "register_operand" "")
2590 "operands[2] = gen_reg_rtx (V4SFmode);")
2592 (define_expand "vec_unpacks_lo_v4sf"
2593 [(set (match_operand:V2DF 0 "register_operand" "")
2596 (match_operand:V4SF 1 "nonimmediate_operand" "")
2597 (parallel [(const_int 0) (const_int 1)]))))]
2600 (define_expand "vec_unpacks_lo_v8sf"
2601 [(set (match_operand:V4DF 0 "register_operand" "")
2604 (match_operand:V8SF 1 "nonimmediate_operand" "")
2605 (parallel [(const_int 0) (const_int 1)
2606 (const_int 2) (const_int 3)]))))]
2609 (define_expand "vec_unpacks_float_hi_v8hi"
2610 [(match_operand:V4SF 0 "register_operand" "")
2611 (match_operand:V8HI 1 "register_operand" "")]
2614 rtx tmp = gen_reg_rtx (V4SImode);
2616 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2617 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2621 (define_expand "vec_unpacks_float_lo_v8hi"
2622 [(match_operand:V4SF 0 "register_operand" "")
2623 (match_operand:V8HI 1 "register_operand" "")]
2626 rtx tmp = gen_reg_rtx (V4SImode);
2628 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2629 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2633 (define_expand "vec_unpacku_float_hi_v8hi"
2634 [(match_operand:V4SF 0 "register_operand" "")
2635 (match_operand:V8HI 1 "register_operand" "")]
2638 rtx tmp = gen_reg_rtx (V4SImode);
2640 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2641 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2645 (define_expand "vec_unpacku_float_lo_v8hi"
2646 [(match_operand:V4SF 0 "register_operand" "")
2647 (match_operand:V8HI 1 "register_operand" "")]
2650 rtx tmp = gen_reg_rtx (V4SImode);
2652 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2653 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2657 (define_expand "vec_unpacks_float_hi_v4si"
2660 (match_operand:V4SI 1 "nonimmediate_operand" "")
2661 (parallel [(const_int 2) (const_int 3)
2662 (const_int 2) (const_int 3)])))
2663 (set (match_operand:V2DF 0 "register_operand" "")
2667 (parallel [(const_int 0) (const_int 1)]))))]
2669 "operands[2] = gen_reg_rtx (V4SImode);")
2671 (define_expand "vec_unpacks_float_lo_v4si"
2672 [(set (match_operand:V2DF 0 "register_operand" "")
2675 (match_operand:V4SI 1 "nonimmediate_operand" "")
2676 (parallel [(const_int 0) (const_int 1)]))))]
2679 (define_expand "vec_unpacks_float_hi_v8si"
2682 (match_operand:V8SI 1 "nonimmediate_operand" "")
2683 (parallel [(const_int 4) (const_int 5)
2684 (const_int 6) (const_int 7)])))
2685 (set (match_operand:V4DF 0 "register_operand" "")
2689 "operands[2] = gen_reg_rtx (V4SImode);")
2691 (define_expand "vec_unpacks_float_lo_v8si"
2692 [(set (match_operand:V4DF 0 "register_operand" "")
2695 (match_operand:V8SI 1 "nonimmediate_operand" "")
2696 (parallel [(const_int 0) (const_int 1)
2697 (const_int 2) (const_int 3)]))))]
2700 (define_expand "vec_unpacku_float_hi_v4si"
2703 (match_operand:V4SI 1 "nonimmediate_operand" "")
2704 (parallel [(const_int 2) (const_int 3)
2705 (const_int 2) (const_int 3)])))
2710 (parallel [(const_int 0) (const_int 1)]))))
2712 (lt:V2DF (match_dup 6) (match_dup 3)))
2714 (and:V2DF (match_dup 7) (match_dup 4)))
2715 (set (match_operand:V2DF 0 "register_operand" "")
2716 (plus:V2DF (match_dup 6) (match_dup 8)))]
2719 REAL_VALUE_TYPE TWO32r;
2723 real_ldexp (&TWO32r, &dconst1, 32);
2724 x = const_double_from_real_value (TWO32r, DFmode);
2726 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2727 operands[4] = force_reg (V2DFmode,
2728 ix86_build_const_vector (V2DFmode, 1, x));
2730 operands[5] = gen_reg_rtx (V4SImode);
2732 for (i = 6; i < 9; i++)
2733 operands[i] = gen_reg_rtx (V2DFmode);
2736 (define_expand "vec_unpacku_float_lo_v4si"
2740 (match_operand:V4SI 1 "nonimmediate_operand" "")
2741 (parallel [(const_int 0) (const_int 1)]))))
2743 (lt:V2DF (match_dup 5) (match_dup 3)))
2745 (and:V2DF (match_dup 6) (match_dup 4)))
2746 (set (match_operand:V2DF 0 "register_operand" "")
2747 (plus:V2DF (match_dup 5) (match_dup 7)))]
2750 REAL_VALUE_TYPE TWO32r;
2754 real_ldexp (&TWO32r, &dconst1, 32);
2755 x = const_double_from_real_value (TWO32r, DFmode);
2757 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2758 operands[4] = force_reg (V2DFmode,
2759 ix86_build_const_vector (V2DFmode, 1, x));
2761 for (i = 5; i < 8; i++)
2762 operands[i] = gen_reg_rtx (V2DFmode);
2765 (define_expand "vec_pack_trunc_v4df"
2767 (float_truncate:V4SF
2768 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2770 (float_truncate:V4SF
2771 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2772 (set (match_operand:V8SF 0 "register_operand" "")
2778 operands[3] = gen_reg_rtx (V4SFmode);
2779 operands[4] = gen_reg_rtx (V4SFmode);
2782 (define_expand "vec_pack_trunc_v2df"
2783 [(match_operand:V4SF 0 "register_operand" "")
2784 (match_operand:V2DF 1 "nonimmediate_operand" "")
2785 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2790 r1 = gen_reg_rtx (V4SFmode);
2791 r2 = gen_reg_rtx (V4SFmode);
2793 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2794 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2795 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2799 (define_expand "vec_pack_sfix_trunc_v2df"
2800 [(match_operand:V4SI 0 "register_operand" "")
2801 (match_operand:V2DF 1 "nonimmediate_operand" "")
2802 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2807 r1 = gen_reg_rtx (V4SImode);
2808 r2 = gen_reg_rtx (V4SImode);
2810 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2811 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2812 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2813 gen_lowpart (V2DImode, r1),
2814 gen_lowpart (V2DImode, r2)));
2818 (define_expand "vec_pack_sfix_v2df"
2819 [(match_operand:V4SI 0 "register_operand" "")
2820 (match_operand:V2DF 1 "nonimmediate_operand" "")
2821 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2826 r1 = gen_reg_rtx (V4SImode);
2827 r2 = gen_reg_rtx (V4SImode);
2829 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2830 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2831 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2832 gen_lowpart (V2DImode, r1),
2833 gen_lowpart (V2DImode, r2)));
2837 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2839 ;; Parallel single-precision floating point element swizzling
2841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2843 (define_expand "sse_movhlps_exp"
2844 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2847 (match_operand:V4SF 1 "nonimmediate_operand" "")
2848 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2849 (parallel [(const_int 6)
2855 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2857 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2859 /* Fix up the destination if needed. */
2860 if (dst != operands[0])
2861 emit_move_insn (operands[0], dst);
2866 (define_insn "sse_movhlps"
2867 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2870 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2871 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2872 (parallel [(const_int 6)
2876 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2878 movhlps\t{%2, %0|%0, %2}
2879 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2880 movlps\t{%H2, %0|%0, %H2}
2881 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2882 %vmovhps\t{%2, %0|%0, %2}"
2883 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2884 (set_attr "type" "ssemov")
2885 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2886 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2888 (define_expand "sse_movlhps_exp"
2889 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2892 (match_operand:V4SF 1 "nonimmediate_operand" "")
2893 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2894 (parallel [(const_int 0)
2900 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2902 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2904 /* Fix up the destination if needed. */
2905 if (dst != operands[0])
2906 emit_move_insn (operands[0], dst);
2911 (define_insn "sse_movlhps"
2912 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2915 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2916 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2917 (parallel [(const_int 0)
2921 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2923 movlhps\t{%2, %0|%0, %2}
2924 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2925 movhps\t{%2, %0|%0, %2}
2926 vmovhps\t{%2, %1, %0|%0, %1, %2}
2927 %vmovlps\t{%2, %H0|%H0, %2}"
2928 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2929 (set_attr "type" "ssemov")
2930 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2931 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2933 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2934 (define_insn "avx_unpckhps256"
2935 [(set (match_operand:V8SF 0 "register_operand" "=x")
2938 (match_operand:V8SF 1 "register_operand" "x")
2939 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2940 (parallel [(const_int 2) (const_int 10)
2941 (const_int 3) (const_int 11)
2942 (const_int 6) (const_int 14)
2943 (const_int 7) (const_int 15)])))]
2945 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2946 [(set_attr "type" "sselog")
2947 (set_attr "prefix" "vex")
2948 (set_attr "mode" "V8SF")])
2950 (define_expand "vec_interleave_highv8sf"
2954 (match_operand:V8SF 1 "register_operand" "x")
2955 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2956 (parallel [(const_int 0) (const_int 8)
2957 (const_int 1) (const_int 9)
2958 (const_int 4) (const_int 12)
2959 (const_int 5) (const_int 13)])))
2965 (parallel [(const_int 2) (const_int 10)
2966 (const_int 3) (const_int 11)
2967 (const_int 6) (const_int 14)
2968 (const_int 7) (const_int 15)])))
2969 (set (match_operand:V8SF 0 "register_operand" "")
2974 (parallel [(const_int 4) (const_int 5)
2975 (const_int 6) (const_int 7)
2976 (const_int 12) (const_int 13)
2977 (const_int 14) (const_int 15)])))]
2980 operands[3] = gen_reg_rtx (V8SFmode);
2981 operands[4] = gen_reg_rtx (V8SFmode);
2984 (define_insn "vec_interleave_highv4sf"
2985 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2988 (match_operand:V4SF 1 "register_operand" "0,x")
2989 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2990 (parallel [(const_int 2) (const_int 6)
2991 (const_int 3) (const_int 7)])))]
2994 unpckhps\t{%2, %0|%0, %2}
2995 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2996 [(set_attr "isa" "noavx,avx")
2997 (set_attr "type" "sselog")
2998 (set_attr "prefix" "orig,vex")
2999 (set_attr "mode" "V4SF")])
3001 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3002 (define_insn "avx_unpcklps256"
3003 [(set (match_operand:V8SF 0 "register_operand" "=x")
3006 (match_operand:V8SF 1 "register_operand" "x")
3007 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3008 (parallel [(const_int 0) (const_int 8)
3009 (const_int 1) (const_int 9)
3010 (const_int 4) (const_int 12)
3011 (const_int 5) (const_int 13)])))]
3013 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3014 [(set_attr "type" "sselog")
3015 (set_attr "prefix" "vex")
3016 (set_attr "mode" "V8SF")])
3018 (define_expand "vec_interleave_lowv8sf"
3022 (match_operand:V8SF 1 "register_operand" "x")
3023 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3024 (parallel [(const_int 0) (const_int 8)
3025 (const_int 1) (const_int 9)
3026 (const_int 4) (const_int 12)
3027 (const_int 5) (const_int 13)])))
3033 (parallel [(const_int 2) (const_int 10)
3034 (const_int 3) (const_int 11)
3035 (const_int 6) (const_int 14)
3036 (const_int 7) (const_int 15)])))
3037 (set (match_operand:V8SF 0 "register_operand" "")
3042 (parallel [(const_int 0) (const_int 1)
3043 (const_int 2) (const_int 3)
3044 (const_int 8) (const_int 9)
3045 (const_int 10) (const_int 11)])))]
3048 operands[3] = gen_reg_rtx (V8SFmode);
3049 operands[4] = gen_reg_rtx (V8SFmode);
3052 (define_insn "vec_interleave_lowv4sf"
3053 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3056 (match_operand:V4SF 1 "register_operand" "0,x")
3057 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3058 (parallel [(const_int 0) (const_int 4)
3059 (const_int 1) (const_int 5)])))]
3062 unpcklps\t{%2, %0|%0, %2}
3063 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3064 [(set_attr "isa" "noavx,avx")
3065 (set_attr "type" "sselog")
3066 (set_attr "prefix" "orig,vex")
3067 (set_attr "mode" "V4SF")])
3069 ;; These are modeled with the same vec_concat as the others so that we
3070 ;; capture users of shufps that can use the new instructions
3071 (define_insn "avx_movshdup256"
3072 [(set (match_operand:V8SF 0 "register_operand" "=x")
3075 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3077 (parallel [(const_int 1) (const_int 1)
3078 (const_int 3) (const_int 3)
3079 (const_int 5) (const_int 5)
3080 (const_int 7) (const_int 7)])))]
3082 "vmovshdup\t{%1, %0|%0, %1}"
3083 [(set_attr "type" "sse")
3084 (set_attr "prefix" "vex")
3085 (set_attr "mode" "V8SF")])
3087 (define_insn "sse3_movshdup"
3088 [(set (match_operand:V4SF 0 "register_operand" "=x")
3091 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3093 (parallel [(const_int 1)
3098 "%vmovshdup\t{%1, %0|%0, %1}"
3099 [(set_attr "type" "sse")
3100 (set_attr "prefix_rep" "1")
3101 (set_attr "prefix" "maybe_vex")
3102 (set_attr "mode" "V4SF")])
3104 (define_insn "avx_movsldup256"
3105 [(set (match_operand:V8SF 0 "register_operand" "=x")
3108 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3110 (parallel [(const_int 0) (const_int 0)
3111 (const_int 2) (const_int 2)
3112 (const_int 4) (const_int 4)
3113 (const_int 6) (const_int 6)])))]
3115 "vmovsldup\t{%1, %0|%0, %1}"
3116 [(set_attr "type" "sse")
3117 (set_attr "prefix" "vex")
3118 (set_attr "mode" "V8SF")])
3120 (define_insn "sse3_movsldup"
3121 [(set (match_operand:V4SF 0 "register_operand" "=x")
3124 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3126 (parallel [(const_int 0)
3131 "%vmovsldup\t{%1, %0|%0, %1}"
3132 [(set_attr "type" "sse")
3133 (set_attr "prefix_rep" "1")
3134 (set_attr "prefix" "maybe_vex")
3135 (set_attr "mode" "V4SF")])
3137 (define_expand "avx_shufps256"
3138 [(match_operand:V8SF 0 "register_operand" "")
3139 (match_operand:V8SF 1 "register_operand" "")
3140 (match_operand:V8SF 2 "nonimmediate_operand" "")
3141 (match_operand:SI 3 "const_int_operand" "")]
3144 int mask = INTVAL (operands[3]);
3145 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3146 GEN_INT ((mask >> 0) & 3),
3147 GEN_INT ((mask >> 2) & 3),
3148 GEN_INT (((mask >> 4) & 3) + 8),
3149 GEN_INT (((mask >> 6) & 3) + 8),
3150 GEN_INT (((mask >> 0) & 3) + 4),
3151 GEN_INT (((mask >> 2) & 3) + 4),
3152 GEN_INT (((mask >> 4) & 3) + 12),
3153 GEN_INT (((mask >> 6) & 3) + 12)));
3157 ;; One bit in mask selects 2 elements.
3158 (define_insn "avx_shufps256_1"
3159 [(set (match_operand:V8SF 0 "register_operand" "=x")
3162 (match_operand:V8SF 1 "register_operand" "x")
3163 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3164 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3165 (match_operand 4 "const_0_to_3_operand" "")
3166 (match_operand 5 "const_8_to_11_operand" "")
3167 (match_operand 6 "const_8_to_11_operand" "")
3168 (match_operand 7 "const_4_to_7_operand" "")
3169 (match_operand 8 "const_4_to_7_operand" "")
3170 (match_operand 9 "const_12_to_15_operand" "")
3171 (match_operand 10 "const_12_to_15_operand" "")])))]
3173 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3174 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3175 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3176 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3179 mask = INTVAL (operands[3]);
3180 mask |= INTVAL (operands[4]) << 2;
3181 mask |= (INTVAL (operands[5]) - 8) << 4;
3182 mask |= (INTVAL (operands[6]) - 8) << 6;
3183 operands[3] = GEN_INT (mask);
3185 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3187 [(set_attr "type" "sselog")
3188 (set_attr "length_immediate" "1")
3189 (set_attr "prefix" "vex")
3190 (set_attr "mode" "V8SF")])
3192 (define_expand "sse_shufps"
3193 [(match_operand:V4SF 0 "register_operand" "")
3194 (match_operand:V4SF 1 "register_operand" "")
3195 (match_operand:V4SF 2 "nonimmediate_operand" "")
3196 (match_operand:SI 3 "const_int_operand" "")]
3199 int mask = INTVAL (operands[3]);
3200 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3201 GEN_INT ((mask >> 0) & 3),
3202 GEN_INT ((mask >> 2) & 3),
3203 GEN_INT (((mask >> 4) & 3) + 4),
3204 GEN_INT (((mask >> 6) & 3) + 4)));
3208 (define_insn "sse_shufps_<mode>"
3209 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x,x")
3210 (vec_select:SSEMODE4S
3211 (vec_concat:<ssedoublesizemode>
3212 (match_operand:SSEMODE4S 1 "register_operand" "0,x")
3213 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm,xm"))
3214 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3215 (match_operand 4 "const_0_to_3_operand" "")
3216 (match_operand 5 "const_4_to_7_operand" "")
3217 (match_operand 6 "const_4_to_7_operand" "")])))]
3221 mask |= INTVAL (operands[3]) << 0;
3222 mask |= INTVAL (operands[4]) << 2;
3223 mask |= (INTVAL (operands[5]) - 4) << 4;
3224 mask |= (INTVAL (operands[6]) - 4) << 6;
3225 operands[3] = GEN_INT (mask);
3227 switch (which_alternative)
3230 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3232 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3237 [(set_attr "isa" "noavx,avx")
3238 (set_attr "type" "sselog")
3239 (set_attr "length_immediate" "1")
3240 (set_attr "prefix" "orig,vex")
3241 (set_attr "mode" "V4SF")])
3243 (define_insn "sse_storehps"
3244 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3246 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3247 (parallel [(const_int 2) (const_int 3)])))]
3250 %vmovhps\t{%1, %0|%0, %1}
3251 %vmovhlps\t{%1, %d0|%d0, %1}
3252 %vmovlps\t{%H1, %d0|%d0, %H1}"
3253 [(set_attr "type" "ssemov")
3254 (set_attr "prefix" "maybe_vex")
3255 (set_attr "mode" "V2SF,V4SF,V2SF")])
3257 (define_expand "sse_loadhps_exp"
3258 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3261 (match_operand:V4SF 1 "nonimmediate_operand" "")
3262 (parallel [(const_int 0) (const_int 1)]))
3263 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3266 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3268 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3270 /* Fix up the destination if needed. */
3271 if (dst != operands[0])
3272 emit_move_insn (operands[0], dst);
3277 (define_insn "sse_loadhps"
3278 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3281 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3282 (parallel [(const_int 0) (const_int 1)]))
3283 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3286 movhps\t{%2, %0|%0, %2}
3287 vmovhps\t{%2, %1, %0|%0, %1, %2}
3288 movlhps\t{%2, %0|%0, %2}
3289 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3290 %vmovlps\t{%2, %H0|%H0, %2}"
3291 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3292 (set_attr "type" "ssemov")
3293 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3294 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3296 (define_insn "sse_storelps"
3297 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3299 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3300 (parallel [(const_int 0) (const_int 1)])))]
3303 %vmovlps\t{%1, %0|%0, %1}
3304 %vmovaps\t{%1, %0|%0, %1}
3305 %vmovlps\t{%1, %d0|%d0, %1}"
3306 [(set_attr "type" "ssemov")
3307 (set_attr "prefix" "maybe_vex")
3308 (set_attr "mode" "V2SF,V4SF,V2SF")])
3310 (define_expand "sse_loadlps_exp"
3311 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3313 (match_operand:V2SF 2 "nonimmediate_operand" "")
3315 (match_operand:V4SF 1 "nonimmediate_operand" "")
3316 (parallel [(const_int 2) (const_int 3)]))))]
3319 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3321 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3323 /* Fix up the destination if needed. */
3324 if (dst != operands[0])
3325 emit_move_insn (operands[0], dst);
3330 (define_insn "sse_loadlps"
3331 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3333 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3335 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3336 (parallel [(const_int 2) (const_int 3)]))))]
3339 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3340 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3341 movlps\t{%2, %0|%0, %2}
3342 vmovlps\t{%2, %1, %0|%0, %1, %2}
3343 %vmovlps\t{%2, %0|%0, %2}"
3344 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3345 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3346 (set_attr "length_immediate" "1,1,*,*,*")
3347 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3348 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3350 (define_insn "sse_movss"
3351 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3353 (match_operand:V4SF 2 "register_operand" " x,x")
3354 (match_operand:V4SF 1 "register_operand" " 0,x")
3358 movss\t{%2, %0|%0, %2}
3359 vmovss\t{%2, %1, %0|%0, %1, %2}"
3360 [(set_attr "isa" "noavx,avx")
3361 (set_attr "type" "ssemov")
3362 (set_attr "prefix" "orig,vex")
3363 (set_attr "mode" "SF")])
3365 (define_expand "vec_dupv4sf"
3366 [(set (match_operand:V4SF 0 "register_operand" "")
3368 (match_operand:SF 1 "nonimmediate_operand" "")))]
3372 operands[1] = force_reg (V4SFmode, operands[1]);
3375 (define_insn "*vec_dupv4sf_avx"
3376 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3378 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3381 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3382 vbroadcastss\t{%1, %0|%0, %1}"
3383 [(set_attr "type" "sselog1,ssemov")
3384 (set_attr "length_immediate" "1,0")
3385 (set_attr "prefix_extra" "0,1")
3386 (set_attr "prefix" "vex")
3387 (set_attr "mode" "V4SF")])
3389 (define_insn "*vec_dupv4sf"
3390 [(set (match_operand:V4SF 0 "register_operand" "=x")
3392 (match_operand:SF 1 "register_operand" "0")))]
3394 "shufps\t{$0, %0, %0|%0, %0, 0}"
3395 [(set_attr "type" "sselog1")
3396 (set_attr "length_immediate" "1")
3397 (set_attr "mode" "V4SF")])
3399 ;; Although insertps takes register source, we prefer
3400 ;; unpcklps with register source since it is shorter.
3401 (define_insn "*vec_concatv2sf_sse4_1"
3402 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3404 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3405 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3408 unpcklps\t{%2, %0|%0, %2}
3409 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3410 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3411 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3412 %vmovss\t{%1, %0|%0, %1}
3413 punpckldq\t{%2, %0|%0, %2}
3414 movd\t{%1, %0|%0, %1}"
3415 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
3416 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3417 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3418 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3419 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3420 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3421 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3423 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3424 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3425 ;; alternatives pretty much forces the MMX alternative to be chosen.
3426 (define_insn "*vec_concatv2sf_sse"
3427 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3429 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3430 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3433 unpcklps\t{%2, %0|%0, %2}
3434 movss\t{%1, %0|%0, %1}
3435 punpckldq\t{%2, %0|%0, %2}
3436 movd\t{%1, %0|%0, %1}"
3437 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3438 (set_attr "mode" "V4SF,SF,DI,DI")])
3440 (define_insn "*vec_concatv4sf_sse"
3441 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3443 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3444 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3447 movlhps\t{%2, %0|%0, %2}
3448 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3449 movhps\t{%2, %0|%0, %2}
3450 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3451 [(set_attr "isa" "noavx,avx,noavx,avx")
3452 (set_attr "type" "ssemov")
3453 (set_attr "prefix" "orig,vex,orig,vex")
3454 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3456 (define_expand "vec_init<mode>"
3457 [(match_operand:SSEMODE 0 "register_operand" "")
3458 (match_operand 1 "" "")]
3461 ix86_expand_vector_init (false, operands[0], operands[1]);
3465 ;; Avoid combining registers from different units in a single alternative,
3466 ;; see comment above inline_secondary_memory_needed function in i386.c
3467 (define_insn "*vec_set<mode>_0_sse4_1"
3468 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"
3469 "=x,x,x ,x,x,x ,x ,m,m,m")
3470 (vec_merge:SSEMODE4S
3471 (vec_duplicate:SSEMODE4S
3472 (match_operand:<ssescalarmode> 2 "general_operand"
3473 " x,m,*r,x,x,*rm,*rm,x,*r,fF"))
3474 (match_operand:SSEMODE4S 1 "vector_move_operand"
3475 " C,C,C ,0,x,0 ,x ,0,0 ,0")
3479 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3480 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3481 %vmovd\t{%2, %0|%0, %2}
3482 movss\t{%2, %0|%0, %2}
3483 vmovss\t{%2, %1, %0|%0, %1, %2}
3484 pinsrd\t{$0, %2, %0|%0, %2, 0}
3485 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3489 [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base")
3490 (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*")
3491 (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*")
3492 (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*")
3493 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*")
3494 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,TI,TI,*,*,*")])
3496 ;; Avoid combining registers from different units in a single alternative,
3497 ;; see comment above inline_secondary_memory_needed function in i386.c
3498 (define_insn "*vec_set<mode>_0_sse2"
3499 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"
3501 (vec_merge:SSEMODE4S
3502 (vec_duplicate:SSEMODE4S
3503 (match_operand:<ssescalarmode> 2 "general_operand"
3505 (match_operand:SSEMODE4S 1 "vector_move_operand"
3510 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3511 movd\t{%2, %0|%0, %2}
3512 movss\t{%2, %0|%0, %2}
3516 [(set_attr "type" "ssemov")
3517 (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
3519 ;; Avoid combining registers from different units in a single alternative,
3520 ;; see comment above inline_secondary_memory_needed function in i386.c
3521 (define_insn "vec_set<mode>_0"
3522 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"
3524 (vec_merge:SSEMODE4S
3525 (vec_duplicate:SSEMODE4S
3526 (match_operand:<ssescalarmode> 2 "general_operand"
3528 (match_operand:SSEMODE4S 1 "vector_move_operand"
3533 movss\t{%2, %0|%0, %2}
3534 movss\t{%2, %0|%0, %2}
3538 [(set_attr "type" "ssemov")
3539 (set_attr "mode" "SF,SF,*,*,*")])
3541 ;; A subset is vec_setv4sf.
3542 (define_insn "*vec_setv4sf_sse4_1"
3543 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3546 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3547 (match_operand:V4SF 1 "register_operand" "0,x")
3548 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
3551 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3552 switch (which_alternative)
3555 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3557 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3562 [(set_attr "isa" "noavx,avx")
3563 (set_attr "type" "sselog")
3564 (set_attr "prefix_data16" "1,*")
3565 (set_attr "prefix_extra" "1")
3566 (set_attr "length_immediate" "1")
3567 (set_attr "prefix" "orig,vex")
3568 (set_attr "mode" "V4SF")])
3570 (define_insn "sse4_1_insertps"
3571 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3572 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3573 (match_operand:V4SF 1 "register_operand" "0,x")
3574 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3578 insertps\t{%3, %2, %0|%0, %2, %3}
3579 vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3580 [(set_attr "isa" "noavx,avx")
3581 (set_attr "type" "sselog")
3582 (set_attr "prefix_data16" "1,*")
3583 (set_attr "prefix_extra" "1")
3584 (set_attr "length_immediate" "1")
3585 (set_attr "prefix" "orig,vex")
3586 (set_attr "mode" "V4SF")])
3589 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
3590 (vec_merge:SSEMODE4S
3591 (vec_duplicate:SSEMODE4S
3592 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3595 "TARGET_SSE && reload_completed"
3598 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3603 (define_expand "vec_set<mode>"
3604 [(match_operand:SSEMODE 0 "register_operand" "")
3605 (match_operand:<ssescalarmode> 1 "register_operand" "")
3606 (match_operand 2 "const_int_operand" "")]
3609 ix86_expand_vector_set (false, operands[0], operands[1],
3610 INTVAL (operands[2]));
3614 (define_insn_and_split "*vec_extractv4sf_0"
3615 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3617 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3618 (parallel [(const_int 0)])))]
3619 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3621 "&& reload_completed"
3624 rtx op1 = operands[1];
3626 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3628 op1 = gen_lowpart (SFmode, op1);
3629 emit_move_insn (operands[0], op1);
3633 (define_expand "avx_vextractf128<mode>"
3634 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3635 (match_operand:AVX256MODE 1 "register_operand" "")
3636 (match_operand:SI 2 "const_0_to_1_operand" "")]
3639 rtx (*insn)(rtx, rtx);
3641 switch (INTVAL (operands[2]))
3644 insn = gen_vec_extract_lo_<mode>;
3647 insn = gen_vec_extract_hi_<mode>;
3653 emit_insn (insn (operands[0], operands[1]));
3657 (define_insn_and_split "vec_extract_lo_<mode>"
3658 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3659 (vec_select:<avxhalfvecmode>
3660 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
3661 (parallel [(const_int 0) (const_int 1)])))]
3664 "&& reload_completed"
3667 rtx op1 = operands[1];
3669 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
3671 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
3672 emit_move_insn (operands[0], op1);
3676 (define_insn "vec_extract_hi_<mode>"
3677 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3678 (vec_select:<avxhalfvecmode>
3679 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3680 (parallel [(const_int 2) (const_int 3)])))]
3682 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3683 [(set_attr "type" "sselog")
3684 (set_attr "prefix_extra" "1")
3685 (set_attr "length_immediate" "1")
3686 (set_attr "memory" "none,store")
3687 (set_attr "prefix" "vex")
3688 (set_attr "mode" "V8SF")])
3690 (define_insn_and_split "vec_extract_lo_<mode>"
3691 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3692 (vec_select:<avxhalfvecmode>
3693 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
3694 (parallel [(const_int 0) (const_int 1)
3695 (const_int 2) (const_int 3)])))]
3698 "&& reload_completed"
3701 rtx op1 = operands[1];
3703 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
3705 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
3706 emit_move_insn (operands[0], op1);
3710 (define_insn "vec_extract_hi_<mode>"
3711 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3712 (vec_select:<avxhalfvecmode>
3713 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3714 (parallel [(const_int 4) (const_int 5)
3715 (const_int 6) (const_int 7)])))]
3717 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3718 [(set_attr "type" "sselog")
3719 (set_attr "prefix_extra" "1")
3720 (set_attr "length_immediate" "1")
3721 (set_attr "memory" "none,store")
3722 (set_attr "prefix" "vex")
3723 (set_attr "mode" "V8SF")])
3725 (define_insn_and_split "vec_extract_lo_v16hi"
3726 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3728 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3729 (parallel [(const_int 0) (const_int 1)
3730 (const_int 2) (const_int 3)
3731 (const_int 4) (const_int 5)
3732 (const_int 6) (const_int 7)])))]
3735 "&& reload_completed"
3738 rtx op1 = operands[1];
3740 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3742 op1 = gen_lowpart (V8HImode, op1);
3743 emit_move_insn (operands[0], op1);
3747 (define_insn "vec_extract_hi_v16hi"
3748 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3750 (match_operand:V16HI 1 "register_operand" "x,x")
3751 (parallel [(const_int 8) (const_int 9)
3752 (const_int 10) (const_int 11)
3753 (const_int 12) (const_int 13)
3754 (const_int 14) (const_int 15)])))]
3756 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3757 [(set_attr "type" "sselog")
3758 (set_attr "prefix_extra" "1")
3759 (set_attr "length_immediate" "1")
3760 (set_attr "memory" "none,store")
3761 (set_attr "prefix" "vex")
3762 (set_attr "mode" "V8SF")])
3764 (define_insn_and_split "vec_extract_lo_v32qi"
3765 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3767 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3768 (parallel [(const_int 0) (const_int 1)
3769 (const_int 2) (const_int 3)
3770 (const_int 4) (const_int 5)
3771 (const_int 6) (const_int 7)
3772 (const_int 8) (const_int 9)
3773 (const_int 10) (const_int 11)
3774 (const_int 12) (const_int 13)
3775 (const_int 14) (const_int 15)])))]
3778 "&& reload_completed"
3781 rtx op1 = operands[1];
3783 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3785 op1 = gen_lowpart (V16QImode, op1);
3786 emit_move_insn (operands[0], op1);
3790 (define_insn "vec_extract_hi_v32qi"
3791 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3793 (match_operand:V32QI 1 "register_operand" "x,x")
3794 (parallel [(const_int 16) (const_int 17)
3795 (const_int 18) (const_int 19)
3796 (const_int 20) (const_int 21)
3797 (const_int 22) (const_int 23)
3798 (const_int 24) (const_int 25)
3799 (const_int 26) (const_int 27)
3800 (const_int 28) (const_int 29)
3801 (const_int 30) (const_int 31)])))]
3803 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3804 [(set_attr "type" "sselog")
3805 (set_attr "prefix_extra" "1")
3806 (set_attr "length_immediate" "1")
3807 (set_attr "memory" "none,store")
3808 (set_attr "prefix" "vex")
3809 (set_attr "mode" "V8SF")])
3811 (define_insn "*sse4_1_extractps"
3812 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3814 (match_operand:V4SF 1 "register_operand" "x")
3815 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3817 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3818 [(set_attr "type" "sselog")
3819 (set_attr "prefix_data16" "1")
3820 (set_attr "prefix_extra" "1")
3821 (set_attr "length_immediate" "1")
3822 (set_attr "prefix" "maybe_vex")
3823 (set_attr "mode" "V4SF")])
3825 (define_insn_and_split "*vec_extract_v4sf_mem"
3826 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3828 (match_operand:V4SF 1 "memory_operand" "o")
3829 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3835 int i = INTVAL (operands[2]);
3837 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3841 (define_expand "vec_extract<mode>"
3842 [(match_operand:<avxscalarmode> 0 "register_operand" "")
3843 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3844 (match_operand 2 "const_int_operand" "")]
3847 ix86_expand_vector_extract (false, operands[0], operands[1],
3848 INTVAL (operands[2]));
3852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3854 ;; Parallel double-precision floating point element swizzling
3856 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3858 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3859 (define_insn "avx_unpckhpd256"
3860 [(set (match_operand:V4DF 0 "register_operand" "=x")
3863 (match_operand:V4DF 1 "register_operand" "x")
3864 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3865 (parallel [(const_int 1) (const_int 5)
3866 (const_int 3) (const_int 7)])))]
3868 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3869 [(set_attr "type" "sselog")
3870 (set_attr "prefix" "vex")
3871 (set_attr "mode" "V4DF")])
3873 (define_expand "vec_interleave_highv4df"
3877 (match_operand:V4DF 1 "register_operand" "x")
3878 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3879 (parallel [(const_int 0) (const_int 4)
3880 (const_int 2) (const_int 6)])))
3886 (parallel [(const_int 1) (const_int 5)
3887 (const_int 3) (const_int 7)])))
3888 (set (match_operand:V4DF 0 "register_operand" "")
3893 (parallel [(const_int 2) (const_int 3)
3894 (const_int 6) (const_int 7)])))]
3897 operands[3] = gen_reg_rtx (V4DFmode);
3898 operands[4] = gen_reg_rtx (V4DFmode);
3902 (define_expand "vec_interleave_highv2df"
3903 [(set (match_operand:V2DF 0 "register_operand" "")
3906 (match_operand:V2DF 1 "nonimmediate_operand" "")
3907 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3908 (parallel [(const_int 1)
3912 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3913 operands[2] = force_reg (V2DFmode, operands[2]);
3916 (define_insn "*sse3_interleave_highv2df"
3917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3920 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3921 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3922 (parallel [(const_int 1)
3924 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3926 unpckhpd\t{%2, %0|%0, %2}
3927 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3928 %vmovddup\t{%H1, %0|%0, %H1}
3929 movlpd\t{%H1, %0|%0, %H1}
3930 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3931 %vmovhpd\t{%1, %0|%0, %1}"
3932 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3933 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3934 (set_attr "prefix_data16" "*,*,*,1,*,1")
3935 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3936 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3938 (define_insn "*sse2_interleave_highv2df"
3939 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3942 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3943 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3944 (parallel [(const_int 1)
3946 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3948 unpckhpd\t{%2, %0|%0, %2}
3949 movlpd\t{%H1, %0|%0, %H1}
3950 movhpd\t{%1, %0|%0, %1}"
3951 [(set_attr "type" "sselog,ssemov,ssemov")
3952 (set_attr "prefix_data16" "*,1,1")
3953 (set_attr "mode" "V2DF,V1DF,V1DF")])
3955 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3956 (define_expand "avx_movddup256"
3957 [(set (match_operand:V4DF 0 "register_operand" "")
3960 (match_operand:V4DF 1 "nonimmediate_operand" "")
3962 (parallel [(const_int 0) (const_int 4)
3963 (const_int 2) (const_int 6)])))]
3966 (define_expand "avx_unpcklpd256"
3967 [(set (match_operand:V4DF 0 "register_operand" "")
3970 (match_operand:V4DF 1 "register_operand" "")
3971 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3972 (parallel [(const_int 0) (const_int 4)
3973 (const_int 2) (const_int 6)])))]
3976 (define_insn "*avx_unpcklpd256"
3977 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3980 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3981 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3982 (parallel [(const_int 0) (const_int 4)
3983 (const_int 2) (const_int 6)])))]
3985 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3987 vmovddup\t{%1, %0|%0, %1}
3988 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3989 [(set_attr "type" "sselog")
3990 (set_attr "prefix" "vex")
3991 (set_attr "mode" "V4DF")])
3993 (define_expand "vec_interleave_lowv4df"
3997 (match_operand:V4DF 1 "register_operand" "x")
3998 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3999 (parallel [(const_int 0) (const_int 4)
4000 (const_int 2) (const_int 6)])))
4006 (parallel [(const_int 1) (const_int 5)
4007 (const_int 3) (const_int 7)])))
4008 (set (match_operand:V4DF 0 "register_operand" "")
4013 (parallel [(const_int 0) (const_int 1)
4014 (const_int 4) (const_int 5)])))]
4017 operands[3] = gen_reg_rtx (V4DFmode);
4018 operands[4] = gen_reg_rtx (V4DFmode);
4021 (define_expand "vec_interleave_lowv2df"
4022 [(set (match_operand:V2DF 0 "register_operand" "")
4025 (match_operand:V2DF 1 "nonimmediate_operand" "")
4026 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4027 (parallel [(const_int 0)
4031 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4032 operands[1] = force_reg (V2DFmode, operands[1]);
4035 (define_insn "*sse3_interleave_lowv2df"
4036 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4039 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4040 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4041 (parallel [(const_int 0)
4043 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4045 unpcklpd\t{%2, %0|%0, %2}
4046 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4047 %vmovddup\t{%1, %0|%0, %1}
4048 movhpd\t{%2, %0|%0, %2}
4049 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4050 %vmovlpd\t{%2, %H0|%H0, %2}"
4051 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
4052 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4053 (set_attr "prefix_data16" "*,*,*,1,*,1")
4054 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4055 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4057 (define_insn "*sse2_interleave_lowv2df"
4058 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4061 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4062 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4063 (parallel [(const_int 0)
4065 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4067 unpcklpd\t{%2, %0|%0, %2}
4068 movhpd\t{%2, %0|%0, %2}
4069 movlpd\t{%2, %H0|%H0, %2}"
4070 [(set_attr "type" "sselog,ssemov,ssemov")
4071 (set_attr "prefix_data16" "*,1,1")
4072 (set_attr "mode" "V2DF,V1DF,V1DF")])
4075 [(set (match_operand:V2DF 0 "memory_operand" "")
4078 (match_operand:V2DF 1 "register_operand" "")
4080 (parallel [(const_int 0)
4082 "TARGET_SSE3 && reload_completed"
4085 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4086 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4087 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4092 [(set (match_operand:V2DF 0 "register_operand" "")
4095 (match_operand:V2DF 1 "memory_operand" "")
4097 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4098 (match_operand:SI 3 "const_int_operand" "")])))]
4099 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4100 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4102 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4105 (define_expand "avx_shufpd256"
4106 [(match_operand:V4DF 0 "register_operand" "")
4107 (match_operand:V4DF 1 "register_operand" "")
4108 (match_operand:V4DF 2 "nonimmediate_operand" "")
4109 (match_operand:SI 3 "const_int_operand" "")]
4112 int mask = INTVAL (operands[3]);
4113 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4115 GEN_INT (mask & 2 ? 5 : 4),
4116 GEN_INT (mask & 4 ? 3 : 2),
4117 GEN_INT (mask & 8 ? 7 : 6)));
4121 (define_insn "avx_shufpd256_1"
4122 [(set (match_operand:V4DF 0 "register_operand" "=x")
4125 (match_operand:V4DF 1 "register_operand" "x")
4126 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4127 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4128 (match_operand 4 "const_4_to_5_operand" "")
4129 (match_operand 5 "const_2_to_3_operand" "")
4130 (match_operand 6 "const_6_to_7_operand" "")])))]
4134 mask = INTVAL (operands[3]);
4135 mask |= (INTVAL (operands[4]) - 4) << 1;
4136 mask |= (INTVAL (operands[5]) - 2) << 2;
4137 mask |= (INTVAL (operands[6]) - 6) << 3;
4138 operands[3] = GEN_INT (mask);
4140 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4142 [(set_attr "type" "sselog")
4143 (set_attr "length_immediate" "1")
4144 (set_attr "prefix" "vex")
4145 (set_attr "mode" "V4DF")])
4147 (define_expand "sse2_shufpd"
4148 [(match_operand:V2DF 0 "register_operand" "")
4149 (match_operand:V2DF 1 "register_operand" "")
4150 (match_operand:V2DF 2 "nonimmediate_operand" "")
4151 (match_operand:SI 3 "const_int_operand" "")]
4154 int mask = INTVAL (operands[3]);
4155 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4157 GEN_INT (mask & 2 ? 3 : 2)));
4161 (define_expand "vec_extract_even<mode>"
4162 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4163 (match_operand:SSEMODE_EO 1 "register_operand" "")
4164 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4167 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4171 (define_expand "vec_extract_odd<mode>"
4172 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4173 (match_operand:SSEMODE_EO 1 "register_operand" "")
4174 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4177 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4181 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4183 (define_insn "vec_interleave_highv2di"
4184 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4187 (match_operand:V2DI 1 "register_operand" "0,x")
4188 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4189 (parallel [(const_int 1)
4193 punpckhqdq\t{%2, %0|%0, %2}
4194 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4195 [(set_attr "isa" "noavx,avx")
4196 (set_attr "type" "sselog")
4197 (set_attr "prefix_data16" "1,*")
4198 (set_attr "prefix" "orig,vex")
4199 (set_attr "mode" "TI")])
4201 (define_insn "vec_interleave_lowv2di"
4202 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4205 (match_operand:V2DI 1 "register_operand" "0,x")
4206 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4207 (parallel [(const_int 0)
4211 punpcklqdq\t{%2, %0|%0, %2}
4212 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4213 [(set_attr "isa" "noavx,avx")
4214 (set_attr "type" "sselog")
4215 (set_attr "prefix_data16" "1,*")
4216 (set_attr "prefix" "orig,vex")
4217 (set_attr "mode" "TI")])
4219 (define_insn "sse2_shufpd_<mode>"
4220 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x,x")
4221 (vec_select:SSEMODE2D
4222 (vec_concat:<ssedoublesizemode>
4223 (match_operand:SSEMODE2D 1 "register_operand" "0,x")
4224 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm,xm"))
4225 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4226 (match_operand 4 "const_2_to_3_operand" "")])))]
4230 mask = INTVAL (operands[3]);
4231 mask |= (INTVAL (operands[4]) - 2) << 1;
4232 operands[3] = GEN_INT (mask);
4234 switch (which_alternative)
4237 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4239 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4244 [(set_attr "isa" "noavx,avx")
4245 (set_attr "type" "sselog")
4246 (set_attr "length_immediate" "1")
4247 (set_attr "prefix" "orig,vex")
4248 (set_attr "mode" "V2DF")])
4250 ;; Avoid combining registers from different units in a single alternative,
4251 ;; see comment above inline_secondary_memory_needed function in i386.c
4252 (define_insn "sse2_storehpd"
4253 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4255 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4256 (parallel [(const_int 1)])))]
4257 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4259 %vmovhpd\t{%1, %0|%0, %1}
4261 vunpckhpd\t{%d1, %0|%0, %d1}
4265 [(set_attr "isa" "base,noavx,avx,base,base,base")
4266 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4267 (set (attr "prefix_data16")
4269 (and (eq_attr "alternative" "0")
4270 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4272 (const_string "*")))
4273 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4274 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4277 [(set (match_operand:DF 0 "register_operand" "")
4279 (match_operand:V2DF 1 "memory_operand" "")
4280 (parallel [(const_int 1)])))]
4281 "TARGET_SSE2 && reload_completed"
4282 [(set (match_dup 0) (match_dup 1))]
4283 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4285 ;; Avoid combining registers from different units in a single alternative,
4286 ;; see comment above inline_secondary_memory_needed function in i386.c
4287 (define_insn "sse2_storelpd"
4288 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4290 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4291 (parallel [(const_int 0)])))]
4292 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4294 %vmovlpd\t{%1, %0|%0, %1}
4299 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4300 (set_attr "prefix_data16" "1,*,*,*,*")
4301 (set_attr "prefix" "maybe_vex")
4302 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4305 [(set (match_operand:DF 0 "register_operand" "")
4307 (match_operand:V2DF 1 "nonimmediate_operand" "")
4308 (parallel [(const_int 0)])))]
4309 "TARGET_SSE2 && reload_completed"
4312 rtx op1 = operands[1];
4314 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4316 op1 = gen_lowpart (DFmode, op1);
4317 emit_move_insn (operands[0], op1);
4321 (define_expand "sse2_loadhpd_exp"
4322 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4325 (match_operand:V2DF 1 "nonimmediate_operand" "")
4326 (parallel [(const_int 0)]))
4327 (match_operand:DF 2 "nonimmediate_operand" "")))]
4330 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4332 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4334 /* Fix up the destination if needed. */
4335 if (dst != operands[0])
4336 emit_move_insn (operands[0], dst);
4341 ;; Avoid combining registers from different units in a single alternative,
4342 ;; see comment above inline_secondary_memory_needed function in i386.c
4343 (define_insn "sse2_loadhpd"
4344 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4345 "=x,x,x,x,x,o,o ,o")
4348 (match_operand:V2DF 1 "nonimmediate_operand"
4349 " 0,x,0,x,x,0,0 ,0")
4350 (parallel [(const_int 0)]))
4351 (match_operand:DF 2 "nonimmediate_operand"
4352 " m,m,x,x,0,x,*f,r")))]
4353 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4355 movhpd\t{%2, %0|%0, %2}
4356 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4357 unpcklpd\t{%2, %0|%0, %2}
4358 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4359 shufpd\t{$1, %1, %0|%0, %1, 1}
4363 [(set_attr "isa" "noavx,avx,noavx,avx,noavx,base,base,base")
4364 (set_attr "type" "ssemov,ssemov,sselog,sselog,sselog,ssemov,fmov,imov")
4365 (set_attr "prefix_data16" "1,*,*,*,*,*,*,*")
4366 (set_attr "length_immediate" "*,*,*,*,1,*,*,*")
4367 (set_attr "prefix" "orig,vex,orig,vex,orig,*,*,*")
4368 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,V2DF,DF,DF,DF")])
4371 [(set (match_operand:V2DF 0 "memory_operand" "")
4373 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4374 (match_operand:DF 1 "register_operand" "")))]
4375 "TARGET_SSE2 && reload_completed"
4376 [(set (match_dup 0) (match_dup 1))]
4377 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4379 (define_expand "sse2_loadlpd_exp"
4380 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4382 (match_operand:DF 2 "nonimmediate_operand" "")
4384 (match_operand:V2DF 1 "nonimmediate_operand" "")
4385 (parallel [(const_int 1)]))))]
4388 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4390 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4392 /* Fix up the destination if needed. */
4393 if (dst != operands[0])
4394 emit_move_insn (operands[0], dst);
4399 ;; Avoid combining registers from different units in a single alternative,
4400 ;; see comment above inline_secondary_memory_needed function in i386.c
4401 (define_insn "sse2_loadlpd"
4402 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4403 "=x,x,x,x,x,x,x,x,m,m ,m")
4405 (match_operand:DF 2 "nonimmediate_operand"
4406 " m,m,m,x,x,0,0,x,x,*f,r")
4408 (match_operand:V2DF 1 "vector_move_operand"
4409 " C,0,x,0,x,x,o,o,0,0 ,0")
4410 (parallel [(const_int 1)]))))]
4411 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4413 %vmovsd\t{%2, %0|%0, %2}
4414 movlpd\t{%2, %0|%0, %2}
4415 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4416 movsd\t{%2, %0|%0, %2}
4417 vmovsd\t{%2, %1, %0|%0, %1, %2}
4418 shufpd\t{$2, %1, %0|%0, %1, 2}
4419 movhpd\t{%H1, %0|%0, %H1}
4420 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4424 [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base")
4425 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov")
4426 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4427 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4428 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4429 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4432 [(set (match_operand:V2DF 0 "memory_operand" "")
4434 (match_operand:DF 1 "register_operand" "")
4435 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4436 "TARGET_SSE2 && reload_completed"
4437 [(set (match_dup 0) (match_dup 1))]
4438 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4440 ;; Not sure these two are ever used, but it doesn't hurt to have
4442 (define_insn "*vec_extractv2df_1_sse"
4443 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4445 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4446 (parallel [(const_int 1)])))]
4447 "!TARGET_SSE2 && TARGET_SSE
4448 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4450 movhps\t{%1, %0|%0, %1}
4451 movhlps\t{%1, %0|%0, %1}
4452 movlps\t{%H1, %0|%0, %H1}"
4453 [(set_attr "type" "ssemov")
4454 (set_attr "mode" "V2SF,V4SF,V2SF")])
4456 (define_insn "*vec_extractv2df_0_sse"
4457 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4459 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4460 (parallel [(const_int 0)])))]
4461 "!TARGET_SSE2 && TARGET_SSE
4462 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4464 movlps\t{%1, %0|%0, %1}
4465 movaps\t{%1, %0|%0, %1}
4466 movlps\t{%1, %0|%0, %1}"
4467 [(set_attr "type" "ssemov")
4468 (set_attr "mode" "V2SF,V4SF,V2SF")])
4470 (define_insn "sse2_movsd"
4471 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4473 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4474 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4478 movsd\t{%2, %0|%0, %2}
4479 vmovsd\t{%2, %1, %0|%0, %1, %2}
4480 movlpd\t{%2, %0|%0, %2}
4481 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4482 %vmovlpd\t{%2, %0|%0, %2}
4483 shufpd\t{$2, %1, %0|%0, %1, 2}
4484 movhps\t{%H1, %0|%0, %H1}
4485 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4486 %vmovhps\t{%1, %H0|%H0, %1}"
4487 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base")
4488 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4489 (set (attr "prefix_data16")
4491 (and (eq_attr "alternative" "2,4")
4492 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4494 (const_string "*")))
4495 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4496 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4497 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4499 (define_insn "*vec_dupv2df_sse3"
4500 [(set (match_operand:V2DF 0 "register_operand" "=x")
4502 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4504 "%vmovddup\t{%1, %0|%0, %1}"
4505 [(set_attr "type" "sselog1")
4506 (set_attr "prefix" "maybe_vex")
4507 (set_attr "mode" "DF")])
4509 (define_insn "vec_dupv2df"
4510 [(set (match_operand:V2DF 0 "register_operand" "=x")
4512 (match_operand:DF 1 "register_operand" "0")))]
4515 [(set_attr "type" "sselog1")
4516 (set_attr "mode" "V2DF")])
4518 (define_insn "*vec_concatv2df_sse3"
4519 [(set (match_operand:V2DF 0 "register_operand" "=x")
4521 (match_operand:DF 1 "nonimmediate_operand" "xm")
4524 "%vmovddup\t{%1, %0|%0, %1}"
4525 [(set_attr "type" "sselog1")
4526 (set_attr "prefix" "maybe_vex")
4527 (set_attr "mode" "DF")])
4529 (define_insn "*vec_concatv2df"
4530 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4532 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4533 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4536 unpcklpd\t{%2, %0|%0, %2}
4537 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4538 movhpd\t{%2, %0|%0, %2}
4539 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4540 %vmovsd\t{%1, %0|%0, %1}
4541 movlhps\t{%2, %0|%0, %2}
4542 movhps\t{%2, %0|%0, %2}"
4543 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx")
4544 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov")
4545 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4546 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4547 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4549 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4551 ;; Parallel integral arithmetic
4553 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4555 (define_expand "neg<mode>2"
4556 [(set (match_operand:VI_128 0 "register_operand" "")
4559 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4561 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4563 (define_expand "<plusminus_insn><mode>3"
4564 [(set (match_operand:VI_128 0 "register_operand" "")
4566 (match_operand:VI_128 1 "nonimmediate_operand" "")
4567 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4569 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4571 (define_insn "*<plusminus_insn><mode>3"
4572 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4574 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4575 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4576 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4578 p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}
4579 vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4580 [(set_attr "isa" "noavx,avx")
4581 (set_attr "type" "sseiadd")
4582 (set_attr "prefix_data16" "1,*")
4583 (set_attr "prefix" "orig,vex")
4584 (set_attr "mode" "TI")])
4586 (define_expand "sse2_<plusminus_insn><mode>3"
4587 [(set (match_operand:VI12_128 0 "register_operand" "")
4588 (sat_plusminus:VI12_128
4589 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4590 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4592 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4594 (define_insn "*sse2_<plusminus_insn><mode>3"
4595 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4596 (sat_plusminus:VI12_128
4597 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4598 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4599 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4601 p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}
4602 vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4603 [(set_attr "isa" "noavx,avx")
4604 (set_attr "type" "sseiadd")
4605 (set_attr "prefix_data16" "1,*")
4606 (set_attr "prefix" "orig,vex")
4607 (set_attr "mode" "TI")])
4609 (define_insn_and_split "mulv16qi3"
4610 [(set (match_operand:V16QI 0 "register_operand" "")
4611 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4612 (match_operand:V16QI 2 "register_operand" "")))]
4614 && can_create_pseudo_p ()"
4622 for (i = 0; i < 6; ++i)
4623 t[i] = gen_reg_rtx (V16QImode);
4625 /* Unpack data such that we've got a source byte in each low byte of
4626 each word. We don't care what goes into the high byte of each word.
4627 Rather than trying to get zero in there, most convenient is to let
4628 it be a copy of the low byte. */
4629 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4630 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4631 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4632 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4634 /* Multiply words. The end-of-line annotations here give a picture of what
4635 the output of that instruction looks like. Dot means don't care; the
4636 letters are the bytes of the result with A being the most significant. */
4637 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4638 gen_lowpart (V8HImode, t[0]),
4639 gen_lowpart (V8HImode, t[1])));
4640 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4641 gen_lowpart (V8HImode, t[2]),
4642 gen_lowpart (V8HImode, t[3])));
4644 /* Extract the even bytes and merge them back together. */
4645 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4649 (define_expand "mulv8hi3"
4650 [(set (match_operand:V8HI 0 "register_operand" "")
4651 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4652 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4654 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4656 (define_insn "*mulv8hi3"
4657 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4658 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4659 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4660 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4662 pmullw\t{%2, %0|%0, %2}
4663 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4664 [(set_attr "isa" "noavx,avx")
4665 (set_attr "type" "sseimul")
4666 (set_attr "prefix_data16" "1,*")
4667 (set_attr "prefix" "orig,vex")
4668 (set_attr "mode" "TI")])
4670 (define_expand "<s>mulv8hi3_highpart"
4671 [(set (match_operand:V8HI 0 "register_operand" "")
4676 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4678 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4681 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4683 (define_insn "*<s>mulv8hi3_highpart"
4684 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4689 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4691 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4693 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4695 pmulh<u>w\t{%2, %0|%0, %2}
4696 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4697 [(set_attr "isa" "noavx,avx")
4698 (set_attr "type" "sseimul")
4699 (set_attr "prefix_data16" "1,*")
4700 (set_attr "prefix" "orig,vex")
4701 (set_attr "mode" "TI")])
4703 (define_expand "sse2_umulv2siv2di3"
4704 [(set (match_operand:V2DI 0 "register_operand" "")
4708 (match_operand:V4SI 1 "nonimmediate_operand" "")
4709 (parallel [(const_int 0) (const_int 2)])))
4712 (match_operand:V4SI 2 "nonimmediate_operand" "")
4713 (parallel [(const_int 0) (const_int 2)])))))]
4715 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4717 (define_insn "*sse2_umulv2siv2di3"
4718 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4722 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4723 (parallel [(const_int 0) (const_int 2)])))
4726 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4727 (parallel [(const_int 0) (const_int 2)])))))]
4728 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4730 pmuludq\t{%2, %0|%0, %2}
4731 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4732 [(set_attr "isa" "noavx,avx")
4733 (set_attr "type" "sseimul")
4734 (set_attr "prefix_data16" "1,*")
4735 (set_attr "prefix" "orig,vex")
4736 (set_attr "mode" "TI")])
4738 (define_expand "sse4_1_mulv2siv2di3"
4739 [(set (match_operand:V2DI 0 "register_operand" "")
4743 (match_operand:V4SI 1 "nonimmediate_operand" "")
4744 (parallel [(const_int 0) (const_int 2)])))
4747 (match_operand:V4SI 2 "nonimmediate_operand" "")
4748 (parallel [(const_int 0) (const_int 2)])))))]
4750 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4752 (define_insn "*sse4_1_mulv2siv2di3"
4753 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4757 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4758 (parallel [(const_int 0) (const_int 2)])))
4761 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4762 (parallel [(const_int 0) (const_int 2)])))))]
4763 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4765 pmuldq\t{%2, %0|%0, %2}
4766 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4767 [(set_attr "isa" "noavx,avx")
4768 (set_attr "type" "sseimul")
4769 (set_attr "prefix_data16" "1,*")
4770 (set_attr "prefix_extra" "1")
4771 (set_attr "prefix" "orig,vex")
4772 (set_attr "mode" "TI")])
4774 (define_expand "sse2_pmaddwd"
4775 [(set (match_operand:V4SI 0 "register_operand" "")
4780 (match_operand:V8HI 1 "nonimmediate_operand" "")
4781 (parallel [(const_int 0)
4787 (match_operand:V8HI 2 "nonimmediate_operand" "")
4788 (parallel [(const_int 0)
4794 (vec_select:V4HI (match_dup 1)
4795 (parallel [(const_int 1)
4800 (vec_select:V4HI (match_dup 2)
4801 (parallel [(const_int 1)
4804 (const_int 7)]))))))]
4806 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4808 (define_insn "*sse2_pmaddwd"
4809 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4814 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4815 (parallel [(const_int 0)
4821 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4822 (parallel [(const_int 0)
4828 (vec_select:V4HI (match_dup 1)
4829 (parallel [(const_int 1)
4834 (vec_select:V4HI (match_dup 2)
4835 (parallel [(const_int 1)
4838 (const_int 7)]))))))]
4839 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4841 pmaddwd\t{%2, %0|%0, %2}
4842 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4843 [(set_attr "isa" "noavx,avx")
4844 (set_attr "type" "sseiadd")
4845 (set_attr "atom_unit" "simul")
4846 (set_attr "prefix_data16" "1,*")
4847 (set_attr "prefix" "orig,vex")
4848 (set_attr "mode" "TI")])
4850 (define_expand "mulv4si3"
4851 [(set (match_operand:V4SI 0 "register_operand" "")
4852 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4853 (match_operand:V4SI 2 "register_operand" "")))]
4856 if (TARGET_SSE4_1 || TARGET_AVX)
4857 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4860 (define_insn "*sse4_1_mulv4si3"
4861 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4862 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4863 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4864 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4866 pmulld\t{%2, %0|%0, %2}
4867 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4868 [(set_attr "isa" "noavx,avx")
4869 (set_attr "type" "sseimul")
4870 (set_attr "prefix_extra" "1")
4871 (set_attr "prefix" "orig,vex")
4872 (set_attr "mode" "TI")])
4874 (define_insn_and_split "*sse2_mulv4si3"
4875 [(set (match_operand:V4SI 0 "register_operand" "")
4876 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4877 (match_operand:V4SI 2 "register_operand" "")))]
4878 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4879 && can_create_pseudo_p ()"
4884 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4890 t1 = gen_reg_rtx (V4SImode);
4891 t2 = gen_reg_rtx (V4SImode);
4892 t3 = gen_reg_rtx (V4SImode);
4893 t4 = gen_reg_rtx (V4SImode);
4894 t5 = gen_reg_rtx (V4SImode);
4895 t6 = gen_reg_rtx (V4SImode);
4896 thirtytwo = GEN_INT (32);
4898 /* Multiply elements 2 and 0. */
4899 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4902 /* Shift both input vectors down one element, so that elements 3
4903 and 1 are now in the slots for elements 2 and 0. For K8, at
4904 least, this is faster than using a shuffle. */
4905 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4906 gen_lowpart (V1TImode, op1),
4908 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4909 gen_lowpart (V1TImode, op2),
4911 /* Multiply elements 3 and 1. */
4912 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4915 /* Move the results in element 2 down to element 1; we don't care
4916 what goes in elements 2 and 3. */
4917 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4918 const0_rtx, const0_rtx));
4919 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4920 const0_rtx, const0_rtx));
4922 /* Merge the parts back together. */
4923 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4927 (define_insn_and_split "mulv2di3"
4928 [(set (match_operand:V2DI 0 "register_operand" "")
4929 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4930 (match_operand:V2DI 2 "register_operand" "")))]
4932 && can_create_pseudo_p ()"
4937 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4946 /* op1: A,B,C,D, op2: E,F,G,H */
4947 op1 = gen_lowpart (V4SImode, op1);
4948 op2 = gen_lowpart (V4SImode, op2);
4950 t1 = gen_reg_rtx (V4SImode);
4951 t2 = gen_reg_rtx (V4SImode);
4952 t3 = gen_reg_rtx (V2DImode);
4953 t4 = gen_reg_rtx (V2DImode);
4956 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4962 /* t2: (B*E),(A*F),(D*G),(C*H) */
4963 emit_insn (gen_mulv4si3 (t2, t1, op2));
4965 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4966 emit_insn (gen_xop_phadddq (t3, t2));
4968 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4969 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4971 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4972 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4976 t1 = gen_reg_rtx (V2DImode);
4977 t2 = gen_reg_rtx (V2DImode);
4978 t3 = gen_reg_rtx (V2DImode);
4979 t4 = gen_reg_rtx (V2DImode);
4980 t5 = gen_reg_rtx (V2DImode);
4981 t6 = gen_reg_rtx (V2DImode);
4982 thirtytwo = GEN_INT (32);
4984 /* Multiply low parts. */
4985 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4986 gen_lowpart (V4SImode, op2)));
4988 /* Shift input vectors left 32 bits so we can multiply high parts. */
4989 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4990 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4992 /* Multiply high parts by low parts. */
4993 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4994 gen_lowpart (V4SImode, t3)));
4995 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4996 gen_lowpart (V4SImode, t2)));
4998 /* Shift them back. */
4999 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5000 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5002 /* Add the three parts together. */
5003 emit_insn (gen_addv2di3 (t6, t1, t4));
5004 emit_insn (gen_addv2di3 (op0, t6, t5));
5009 (define_expand "vec_widen_smult_hi_v8hi"
5010 [(match_operand:V4SI 0 "register_operand" "")
5011 (match_operand:V8HI 1 "register_operand" "")
5012 (match_operand:V8HI 2 "register_operand" "")]
5015 rtx op1, op2, t1, t2, dest;
5019 t1 = gen_reg_rtx (V8HImode);
5020 t2 = gen_reg_rtx (V8HImode);
5021 dest = gen_lowpart (V8HImode, operands[0]);
5023 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5024 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5025 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5029 (define_expand "vec_widen_smult_lo_v8hi"
5030 [(match_operand:V4SI 0 "register_operand" "")
5031 (match_operand:V8HI 1 "register_operand" "")
5032 (match_operand:V8HI 2 "register_operand" "")]
5035 rtx op1, op2, t1, t2, dest;
5039 t1 = gen_reg_rtx (V8HImode);
5040 t2 = gen_reg_rtx (V8HImode);
5041 dest = gen_lowpart (V8HImode, operands[0]);
5043 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5044 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5045 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5049 (define_expand "vec_widen_umult_hi_v8hi"
5050 [(match_operand:V4SI 0 "register_operand" "")
5051 (match_operand:V8HI 1 "register_operand" "")
5052 (match_operand:V8HI 2 "register_operand" "")]
5055 rtx op1, op2, t1, t2, dest;
5059 t1 = gen_reg_rtx (V8HImode);
5060 t2 = gen_reg_rtx (V8HImode);
5061 dest = gen_lowpart (V8HImode, operands[0]);
5063 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5064 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5065 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5069 (define_expand "vec_widen_umult_lo_v8hi"
5070 [(match_operand:V4SI 0 "register_operand" "")
5071 (match_operand:V8HI 1 "register_operand" "")
5072 (match_operand:V8HI 2 "register_operand" "")]
5075 rtx op1, op2, t1, t2, dest;
5079 t1 = gen_reg_rtx (V8HImode);
5080 t2 = gen_reg_rtx (V8HImode);
5081 dest = gen_lowpart (V8HImode, operands[0]);
5083 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5084 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5085 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5089 (define_expand "vec_widen_smult_hi_v4si"
5090 [(match_operand:V2DI 0 "register_operand" "")
5091 (match_operand:V4SI 1 "register_operand" "")
5092 (match_operand:V4SI 2 "register_operand" "")]
5097 t1 = gen_reg_rtx (V4SImode);
5098 t2 = gen_reg_rtx (V4SImode);
5100 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5105 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5110 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5114 (define_expand "vec_widen_smult_lo_v4si"
5115 [(match_operand:V2DI 0 "register_operand" "")
5116 (match_operand:V4SI 1 "register_operand" "")
5117 (match_operand:V4SI 2 "register_operand" "")]
5122 t1 = gen_reg_rtx (V4SImode);
5123 t2 = gen_reg_rtx (V4SImode);
5125 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5130 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5135 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5139 (define_expand "vec_widen_umult_hi_v4si"
5140 [(match_operand:V2DI 0 "register_operand" "")
5141 (match_operand:V4SI 1 "register_operand" "")
5142 (match_operand:V4SI 2 "register_operand" "")]
5145 rtx op1, op2, t1, t2;
5149 t1 = gen_reg_rtx (V4SImode);
5150 t2 = gen_reg_rtx (V4SImode);
5152 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5153 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5154 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5158 (define_expand "vec_widen_umult_lo_v4si"
5159 [(match_operand:V2DI 0 "register_operand" "")
5160 (match_operand:V4SI 1 "register_operand" "")
5161 (match_operand:V4SI 2 "register_operand" "")]
5164 rtx op1, op2, t1, t2;
5168 t1 = gen_reg_rtx (V4SImode);
5169 t2 = gen_reg_rtx (V4SImode);
5171 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5172 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5173 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5177 (define_expand "sdot_prodv8hi"
5178 [(match_operand:V4SI 0 "register_operand" "")
5179 (match_operand:V8HI 1 "register_operand" "")
5180 (match_operand:V8HI 2 "register_operand" "")
5181 (match_operand:V4SI 3 "register_operand" "")]
5184 rtx t = gen_reg_rtx (V4SImode);
5185 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5186 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5190 (define_expand "udot_prodv4si"
5191 [(match_operand:V2DI 0 "register_operand" "")
5192 (match_operand:V4SI 1 "register_operand" "")
5193 (match_operand:V4SI 2 "register_operand" "")
5194 (match_operand:V2DI 3 "register_operand" "")]
5199 t1 = gen_reg_rtx (V2DImode);
5200 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5201 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5203 t2 = gen_reg_rtx (V4SImode);
5204 t3 = gen_reg_rtx (V4SImode);
5205 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5206 gen_lowpart (V1TImode, operands[1]),
5208 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5209 gen_lowpart (V1TImode, operands[2]),
5212 t4 = gen_reg_rtx (V2DImode);
5213 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5215 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5219 (define_insn "ashr<mode>3"
5220 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5222 (match_operand:VI24_128 1 "register_operand" "0,x")
5223 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5226 psra<ssevecsize>\t{%2, %0|%0, %2}
5227 vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5228 [(set_attr "isa" "noavx,avx")
5229 (set_attr "type" "sseishft")
5230 (set (attr "length_immediate")
5231 (if_then_else (match_operand 2 "const_int_operand" "")
5233 (const_string "0")))
5234 (set_attr "prefix_data16" "1,*")
5235 (set_attr "prefix" "orig,vex")
5236 (set_attr "mode" "TI")])
5238 (define_insn "lshr<mode>3"
5239 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5241 (match_operand:VI248_128 1 "register_operand" "0,x")
5242 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5245 psrl<ssevecsize>\t{%2, %0|%0, %2}
5246 vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5247 [(set_attr "isa" "noavx,avx")
5248 (set_attr "type" "sseishft")
5249 (set (attr "length_immediate")
5250 (if_then_else (match_operand 2 "const_int_operand" "")
5252 (const_string "0")))
5253 (set_attr "prefix_data16" "1,*")
5254 (set_attr "prefix" "orig,vex")
5255 (set_attr "mode" "TI")])
5257 (define_insn "ashl<mode>3"
5258 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5260 (match_operand:VI248_128 1 "register_operand" "0,x")
5261 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5264 psll<ssevecsize>\t{%2, %0|%0, %2}
5265 vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5266 [(set_attr "isa" "noavx,avx")
5267 (set_attr "type" "sseishft")
5268 (set (attr "length_immediate")
5269 (if_then_else (match_operand 2 "const_int_operand" "")
5271 (const_string "0")))
5272 (set_attr "prefix_data16" "1,*")
5273 (set_attr "prefix" "orig,vex")
5274 (set_attr "mode" "TI")])
5276 (define_expand "vec_shl_<mode>"
5277 [(set (match_operand:VI_128 0 "register_operand" "")
5279 (match_operand:VI_128 1 "register_operand" "")
5280 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5283 operands[0] = gen_lowpart (V1TImode, operands[0]);
5284 operands[1] = gen_lowpart (V1TImode, operands[1]);
5287 (define_insn "sse2_ashlv1ti3"
5288 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5290 (match_operand:V1TI 1 "register_operand" "0,x")
5291 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5294 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5296 switch (which_alternative)
5299 return "pslldq\t{%2, %0|%0, %2}";
5301 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5306 [(set_attr "isa" "noavx,avx")
5307 (set_attr "type" "sseishft")
5308 (set_attr "length_immediate" "1")
5309 (set_attr "prefix_data16" "1,*")
5310 (set_attr "prefix" "orig,vex")
5311 (set_attr "mode" "TI")])
5313 (define_expand "vec_shr_<mode>"
5314 [(set (match_operand:VI_128 0 "register_operand" "")
5316 (match_operand:VI_128 1 "register_operand" "")
5317 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5320 operands[0] = gen_lowpart (V1TImode, operands[0]);
5321 operands[1] = gen_lowpart (V1TImode, operands[1]);
5324 (define_insn "sse2_lshrv1ti3"
5325 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5327 (match_operand:V1TI 1 "register_operand" "0,x")
5328 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5331 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5333 switch (which_alternative)
5336 return "psrldq\t{%2, %0|%0, %2}";
5338 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5343 [(set_attr "isa" "noavx,avx")
5344 (set_attr "type" "sseishft")
5345 (set_attr "length_immediate" "1")
5346 (set_attr "atom_unit" "sishuf")
5347 (set_attr "prefix_data16" "1,*")
5348 (set_attr "prefix" "orig,vex")
5349 (set_attr "mode" "TI")])
5351 (define_insn "*sse4_1_<code><mode>3"
5352 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5354 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5355 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5356 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5358 p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}
5359 vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5360 [(set_attr "isa" "noavx,avx")
5361 (set_attr "type" "sseiadd")
5362 (set_attr "prefix_extra" "1,*")
5363 (set_attr "prefix" "orig,vex")
5364 (set_attr "mode" "TI")])
5366 (define_insn "*<code>v8hi3"
5367 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5369 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5370 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5371 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5373 p<maxmin_int>w\t{%2, %0|%0, %2}
5374 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5375 [(set_attr "isa" "noavx,avx")
5376 (set_attr "type" "sseiadd")
5377 (set_attr "prefix_data16" "1,*")
5378 (set_attr "prefix_extra" "*,1")
5379 (set_attr "prefix" "orig,vex")
5380 (set_attr "mode" "TI")])
5382 (define_expand "smax<mode>3"
5383 [(set (match_operand:VI14_128 0 "register_operand" "")
5384 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5385 (match_operand:VI14_128 2 "register_operand" "")))]
5389 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5395 xops[0] = operands[0];
5396 xops[1] = operands[1];
5397 xops[2] = operands[2];
5398 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5399 xops[4] = operands[1];
5400 xops[5] = operands[2];
5401 ok = ix86_expand_int_vcond (xops);
5407 (define_expand "smin<mode>3"
5408 [(set (match_operand:VI14_128 0 "register_operand" "")
5409 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5410 (match_operand:VI14_128 2 "register_operand" "")))]
5414 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5420 xops[0] = operands[0];
5421 xops[1] = operands[2];
5422 xops[2] = operands[1];
5423 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5424 xops[4] = operands[1];
5425 xops[5] = operands[2];
5426 ok = ix86_expand_int_vcond (xops);
5432 (define_expand "<code>v8hi3"
5433 [(set (match_operand:V8HI 0 "register_operand" "")
5435 (match_operand:V8HI 1 "nonimmediate_operand" "")
5436 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5438 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5440 (define_expand "smaxv2di3"
5441 [(set (match_operand:V2DI 0 "register_operand" "")
5442 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5443 (match_operand:V2DI 2 "register_operand" "")))]
5449 xops[0] = operands[0];
5450 xops[1] = operands[1];
5451 xops[2] = operands[2];
5452 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5453 xops[4] = operands[1];
5454 xops[5] = operands[2];
5455 ok = ix86_expand_int_vcond (xops);
5460 (define_expand "sminv2di3"
5461 [(set (match_operand:V2DI 0 "register_operand" "")
5462 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5463 (match_operand:V2DI 2 "register_operand" "")))]
5469 xops[0] = operands[0];
5470 xops[1] = operands[2];
5471 xops[2] = operands[1];
5472 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5473 xops[4] = operands[1];
5474 xops[5] = operands[2];
5475 ok = ix86_expand_int_vcond (xops);
5480 (define_insn "*sse4_1_<code><mode>3"
5481 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5483 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5484 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5485 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5487 p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}
5488 vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5489 [(set_attr "isa" "noavx,avx")
5490 (set_attr "type" "sseiadd")
5491 (set_attr "prefix_extra" "1,*")
5492 (set_attr "prefix" "orig,vex")
5493 (set_attr "mode" "TI")])
5495 (define_insn "*<code>v16qi3"
5496 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5498 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5499 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5500 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5502 p<maxmin_int>b\t{%2, %0|%0, %2}
5503 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5504 [(set_attr "isa" "noavx,avx")
5505 (set_attr "type" "sseiadd")
5506 (set_attr "prefix_data16" "1,*")
5507 (set_attr "prefix_extra" "*,1")
5508 (set_attr "prefix" "orig,vex")
5509 (set_attr "mode" "TI")])
5511 (define_expand "<code>v16qi3"
5512 [(set (match_operand:V16QI 0 "register_operand" "")
5514 (match_operand:V16QI 1 "nonimmediate_operand" "")
5515 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5517 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5519 (define_expand "umaxv8hi3"
5520 [(set (match_operand:V8HI 0 "register_operand" "")
5521 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5522 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5526 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5529 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5530 if (rtx_equal_p (op3, op2))
5531 op3 = gen_reg_rtx (V8HImode);
5532 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5533 emit_insn (gen_addv8hi3 (op0, op3, op2));
5538 (define_expand "umaxv4si3"
5539 [(set (match_operand:V4SI 0 "register_operand" "")
5540 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5541 (match_operand:V4SI 2 "register_operand" "")))]
5545 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5551 xops[0] = operands[0];
5552 xops[1] = operands[1];
5553 xops[2] = operands[2];
5554 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5555 xops[4] = operands[1];
5556 xops[5] = operands[2];
5557 ok = ix86_expand_int_vcond (xops);
5563 (define_expand "umin<mode>3"
5564 [(set (match_operand:VI24_128 0 "register_operand" "")
5565 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5566 (match_operand:VI24_128 2 "register_operand" "")))]
5570 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5576 xops[0] = operands[0];
5577 xops[1] = operands[2];
5578 xops[2] = operands[1];
5579 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5580 xops[4] = operands[1];
5581 xops[5] = operands[2];
5582 ok = ix86_expand_int_vcond (xops);
5588 (define_expand "umaxv2di3"
5589 [(set (match_operand:V2DI 0 "register_operand" "")
5590 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5591 (match_operand:V2DI 2 "register_operand" "")))]
5597 xops[0] = operands[0];
5598 xops[1] = operands[1];
5599 xops[2] = operands[2];
5600 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5601 xops[4] = operands[1];
5602 xops[5] = operands[2];
5603 ok = ix86_expand_int_vcond (xops);
5608 (define_expand "uminv2di3"
5609 [(set (match_operand:V2DI 0 "register_operand" "")
5610 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5611 (match_operand:V2DI 2 "register_operand" "")))]
5617 xops[0] = operands[0];
5618 xops[1] = operands[2];
5619 xops[2] = operands[1];
5620 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5621 xops[4] = operands[1];
5622 xops[5] = operands[2];
5623 ok = ix86_expand_int_vcond (xops);
5628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5630 ;; Parallel integral comparisons
5632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5634 (define_insn "*sse4_1_eqv2di3"
5635 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5637 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5638 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5639 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5641 pcmpeqq\t{%2, %0|%0, %2}
5642 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5643 [(set_attr "isa" "noavx,avx")
5644 (set_attr "type" "ssecmp")
5645 (set_attr "prefix_extra" "1")
5646 (set_attr "prefix" "orig,vex")
5647 (set_attr "mode" "TI")])
5649 (define_insn "*sse2_eq<mode>3"
5650 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5652 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5653 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5654 "TARGET_SSE2 && !TARGET_XOP
5655 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5657 pcmpeq<ssevecsize>\t{%2, %0|%0, %2}
5658 vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5659 [(set_attr "isa" "noavx,avx")
5660 (set_attr "type" "ssecmp")
5661 (set_attr "prefix_data16" "1,*")
5662 (set_attr "prefix" "orig,vex")
5663 (set_attr "mode" "TI")])
5665 (define_expand "sse2_eq<mode>3"
5666 [(set (match_operand:VI124_128 0 "register_operand" "")
5668 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5669 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5670 "TARGET_SSE2 && !TARGET_XOP "
5671 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5673 (define_expand "sse4_1_eqv2di3"
5674 [(set (match_operand:V2DI 0 "register_operand" "")
5676 (match_operand:V2DI 1 "nonimmediate_operand" "")
5677 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5679 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5681 (define_insn "sse4_2_gtv2di3"
5682 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5684 (match_operand:V2DI 1 "register_operand" "0,x")
5685 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5688 pcmpgtq\t{%2, %0|%0, %2}
5689 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5690 [(set_attr "isa" "noavx,avx")
5691 (set_attr "type" "ssecmp")
5692 (set_attr "prefix_extra" "1")
5693 (set_attr "prefix" "orig,vex")
5694 (set_attr "mode" "TI")])
5696 (define_insn "sse2_gt<mode>3"
5697 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5699 (match_operand:VI124_128 1 "register_operand" "0,x")
5700 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5701 "TARGET_SSE2 && !TARGET_XOP"
5703 pcmpgt<ssevecsize>\t{%2, %0|%0, %2}
5704 vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5705 [(set_attr "isa" "noavx,avx")
5706 (set_attr "type" "ssecmp")
5707 (set_attr "prefix_data16" "1,*")
5708 (set_attr "prefix" "orig,vex")
5709 (set_attr "mode" "TI")])
5711 (define_expand "vcond<mode>"
5712 [(set (match_operand:VI124_128 0 "register_operand" "")
5713 (if_then_else:VI124_128
5714 (match_operator 3 ""
5715 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5716 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5717 (match_operand:VI124_128 1 "general_operand" "")
5718 (match_operand:VI124_128 2 "general_operand" "")))]
5721 bool ok = ix86_expand_int_vcond (operands);
5726 (define_expand "vcondv2di"
5727 [(set (match_operand:V2DI 0 "register_operand" "")
5729 (match_operator 3 ""
5730 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5731 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5732 (match_operand:V2DI 1 "general_operand" "")
5733 (match_operand:V2DI 2 "general_operand" "")))]
5736 bool ok = ix86_expand_int_vcond (operands);
5741 (define_expand "vcondu<mode>"
5742 [(set (match_operand:VI124_128 0 "register_operand" "")
5743 (if_then_else:VI124_128
5744 (match_operator 3 ""
5745 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5746 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5747 (match_operand:VI124_128 1 "general_operand" "")
5748 (match_operand:VI124_128 2 "general_operand" "")))]
5751 bool ok = ix86_expand_int_vcond (operands);
5756 (define_expand "vconduv2di"
5757 [(set (match_operand:V2DI 0 "register_operand" "")
5759 (match_operator 3 ""
5760 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5761 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5762 (match_operand:V2DI 1 "general_operand" "")
5763 (match_operand:V2DI 2 "general_operand" "")))]
5766 bool ok = ix86_expand_int_vcond (operands);
5771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5773 ;; Parallel bitwise logical operations
5775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5777 (define_expand "one_cmpl<mode>2"
5778 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5779 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5783 int i, n = GET_MODE_NUNITS (<MODE>mode);
5784 rtvec v = rtvec_alloc (n);
5786 for (i = 0; i < n; ++i)
5787 RTVEC_ELT (v, i) = constm1_rtx;
5789 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5792 (define_insn "*avx_andnot<mode>3"
5793 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5795 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5796 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5798 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5799 [(set_attr "type" "sselog")
5800 (set_attr "prefix" "vex")
5801 (set_attr "mode" "<avxvecpsmode>")])
5803 (define_insn "*sse_andnot<mode>3"
5804 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5806 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5807 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5808 "(TARGET_SSE && !TARGET_SSE2)"
5809 "andnps\t{%2, %0|%0, %2}"
5810 [(set_attr "type" "sselog")
5811 (set_attr "mode" "V4SF")])
5813 (define_insn "*avx_andnot<mode>3"
5814 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5816 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5817 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5819 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5820 [(set_attr "type" "sselog")
5821 (set_attr "prefix" "vex")
5822 (set_attr "mode" "TI")])
5824 (define_insn "sse2_andnot<mode>3"
5825 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5827 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5828 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5830 "pandn\t{%2, %0|%0, %2}"
5831 [(set_attr "type" "sselog")
5832 (set_attr "prefix_data16" "1")
5833 (set_attr "mode" "TI")])
5835 (define_insn "*andnottf3"
5836 [(set (match_operand:TF 0 "register_operand" "=x")
5838 (not:TF (match_operand:TF 1 "register_operand" "0"))
5839 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5841 "pandn\t{%2, %0|%0, %2}"
5842 [(set_attr "type" "sselog")
5843 (set_attr "prefix_data16" "1")
5844 (set_attr "mode" "TI")])
5846 (define_expand "<code><mode>3"
5847 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5849 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5850 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5852 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5854 (define_insn "*avx_<code><mode>3"
5855 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5856 (any_logic:AVX256MODEI
5857 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5858 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5860 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5861 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
5862 [(set_attr "type" "sselog")
5863 (set_attr "prefix" "vex")
5864 (set_attr "mode" "<avxvecpsmode>")])
5866 (define_insn "*sse_<code><mode>3"
5867 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5869 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5870 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5871 "(TARGET_SSE && !TARGET_SSE2)
5872 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5873 "<logic>ps\t{%2, %0|%0, %2}"
5874 [(set_attr "type" "sselog")
5875 (set_attr "mode" "V4SF")])
5877 (define_insn "*avx_<code><mode>3"
5878 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5880 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5881 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5883 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5884 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5885 [(set_attr "type" "sselog")
5886 (set_attr "prefix" "vex")
5887 (set_attr "mode" "TI")])
5889 (define_insn "*sse2_<code><mode>3"
5890 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5892 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5893 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5894 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5895 "p<logic>\t{%2, %0|%0, %2}"
5896 [(set_attr "type" "sselog")
5897 (set_attr "prefix_data16" "1")
5898 (set_attr "mode" "TI")])
5900 (define_expand "<code>tf3"
5901 [(set (match_operand:TF 0 "register_operand" "")
5903 (match_operand:TF 1 "nonimmediate_operand" "")
5904 (match_operand:TF 2 "nonimmediate_operand" "")))]
5906 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5908 (define_insn "*<code>tf3"
5909 [(set (match_operand:TF 0 "register_operand" "=x")
5911 (match_operand:TF 1 "nonimmediate_operand" "%0")
5912 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5913 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5914 "p<logic>\t{%2, %0|%0, %2}"
5915 [(set_attr "type" "sselog")
5916 (set_attr "prefix_data16" "1")
5917 (set_attr "mode" "TI")])
5919 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5921 ;; Parallel integral element swizzling
5923 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5925 (define_expand "vec_pack_trunc_v8hi"
5926 [(match_operand:V16QI 0 "register_operand" "")
5927 (match_operand:V8HI 1 "register_operand" "")
5928 (match_operand:V8HI 2 "register_operand" "")]
5931 rtx op1 = gen_lowpart (V16QImode, operands[1]);
5932 rtx op2 = gen_lowpart (V16QImode, operands[2]);
5933 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5937 (define_expand "vec_pack_trunc_v4si"
5938 [(match_operand:V8HI 0 "register_operand" "")
5939 (match_operand:V4SI 1 "register_operand" "")
5940 (match_operand:V4SI 2 "register_operand" "")]
5943 rtx op1 = gen_lowpart (V8HImode, operands[1]);
5944 rtx op2 = gen_lowpart (V8HImode, operands[2]);
5945 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5949 (define_expand "vec_pack_trunc_v2di"
5950 [(match_operand:V4SI 0 "register_operand" "")
5951 (match_operand:V2DI 1 "register_operand" "")
5952 (match_operand:V2DI 2 "register_operand" "")]
5955 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5956 rtx op2 = gen_lowpart (V4SImode, operands[2]);
5957 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5961 (define_insn "*avx_packsswb"
5962 [(set (match_operand:V16QI 0 "register_operand" "=x")
5965 (match_operand:V8HI 1 "register_operand" "x"))
5967 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
5969 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5970 [(set_attr "type" "sselog")
5971 (set_attr "prefix" "vex")
5972 (set_attr "mode" "TI")])
5974 (define_insn "sse2_packsswb"
5975 [(set (match_operand:V16QI 0 "register_operand" "=x")
5978 (match_operand:V8HI 1 "register_operand" "0"))
5980 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
5982 "packsswb\t{%2, %0|%0, %2}"
5983 [(set_attr "type" "sselog")
5984 (set_attr "prefix_data16" "1")
5985 (set_attr "mode" "TI")])
5987 (define_insn "*avx_packssdw"
5988 [(set (match_operand:V8HI 0 "register_operand" "=x")
5991 (match_operand:V4SI 1 "register_operand" "x"))
5993 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
5995 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5996 [(set_attr "type" "sselog")
5997 (set_attr "prefix" "vex")
5998 (set_attr "mode" "TI")])
6000 (define_insn "sse2_packssdw"
6001 [(set (match_operand:V8HI 0 "register_operand" "=x")
6004 (match_operand:V4SI 1 "register_operand" "0"))
6006 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6008 "packssdw\t{%2, %0|%0, %2}"
6009 [(set_attr "type" "sselog")
6010 (set_attr "prefix_data16" "1")
6011 (set_attr "mode" "TI")])
6013 (define_insn "*avx_packuswb"
6014 [(set (match_operand:V16QI 0 "register_operand" "=x")
6017 (match_operand:V8HI 1 "register_operand" "x"))
6019 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6021 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6022 [(set_attr "type" "sselog")
6023 (set_attr "prefix" "vex")
6024 (set_attr "mode" "TI")])
6026 (define_insn "sse2_packuswb"
6027 [(set (match_operand:V16QI 0 "register_operand" "=x")
6030 (match_operand:V8HI 1 "register_operand" "0"))
6032 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6034 "packuswb\t{%2, %0|%0, %2}"
6035 [(set_attr "type" "sselog")
6036 (set_attr "prefix_data16" "1")
6037 (set_attr "mode" "TI")])
6039 (define_insn "*avx_interleave_highv16qi"
6040 [(set (match_operand:V16QI 0 "register_operand" "=x")
6043 (match_operand:V16QI 1 "register_operand" "x")
6044 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6045 (parallel [(const_int 8) (const_int 24)
6046 (const_int 9) (const_int 25)
6047 (const_int 10) (const_int 26)
6048 (const_int 11) (const_int 27)
6049 (const_int 12) (const_int 28)
6050 (const_int 13) (const_int 29)
6051 (const_int 14) (const_int 30)
6052 (const_int 15) (const_int 31)])))]
6054 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6055 [(set_attr "type" "sselog")
6056 (set_attr "prefix" "vex")
6057 (set_attr "mode" "TI")])
6059 (define_insn "vec_interleave_highv16qi"
6060 [(set (match_operand:V16QI 0 "register_operand" "=x")
6063 (match_operand:V16QI 1 "register_operand" "0")
6064 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6065 (parallel [(const_int 8) (const_int 24)
6066 (const_int 9) (const_int 25)
6067 (const_int 10) (const_int 26)
6068 (const_int 11) (const_int 27)
6069 (const_int 12) (const_int 28)
6070 (const_int 13) (const_int 29)
6071 (const_int 14) (const_int 30)
6072 (const_int 15) (const_int 31)])))]
6074 "punpckhbw\t{%2, %0|%0, %2}"
6075 [(set_attr "type" "sselog")
6076 (set_attr "prefix_data16" "1")
6077 (set_attr "mode" "TI")])
6079 (define_insn "*avx_interleave_lowv16qi"
6080 [(set (match_operand:V16QI 0 "register_operand" "=x")
6083 (match_operand:V16QI 1 "register_operand" "x")
6084 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6085 (parallel [(const_int 0) (const_int 16)
6086 (const_int 1) (const_int 17)
6087 (const_int 2) (const_int 18)
6088 (const_int 3) (const_int 19)
6089 (const_int 4) (const_int 20)
6090 (const_int 5) (const_int 21)
6091 (const_int 6) (const_int 22)
6092 (const_int 7) (const_int 23)])))]
6094 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6095 [(set_attr "type" "sselog")
6096 (set_attr "prefix" "vex")
6097 (set_attr "mode" "TI")])
6099 (define_insn "vec_interleave_lowv16qi"
6100 [(set (match_operand:V16QI 0 "register_operand" "=x")
6103 (match_operand:V16QI 1 "register_operand" "0")
6104 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6105 (parallel [(const_int 0) (const_int 16)
6106 (const_int 1) (const_int 17)
6107 (const_int 2) (const_int 18)
6108 (const_int 3) (const_int 19)
6109 (const_int 4) (const_int 20)
6110 (const_int 5) (const_int 21)
6111 (const_int 6) (const_int 22)
6112 (const_int 7) (const_int 23)])))]
6114 "punpcklbw\t{%2, %0|%0, %2}"
6115 [(set_attr "type" "sselog")
6116 (set_attr "prefix_data16" "1")
6117 (set_attr "mode" "TI")])
6119 (define_insn "*avx_interleave_highv8hi"
6120 [(set (match_operand:V8HI 0 "register_operand" "=x")
6123 (match_operand:V8HI 1 "register_operand" "x")
6124 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6125 (parallel [(const_int 4) (const_int 12)
6126 (const_int 5) (const_int 13)
6127 (const_int 6) (const_int 14)
6128 (const_int 7) (const_int 15)])))]
6130 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6131 [(set_attr "type" "sselog")
6132 (set_attr "prefix" "vex")
6133 (set_attr "mode" "TI")])
6135 (define_insn "vec_interleave_highv8hi"
6136 [(set (match_operand:V8HI 0 "register_operand" "=x")
6139 (match_operand:V8HI 1 "register_operand" "0")
6140 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6141 (parallel [(const_int 4) (const_int 12)
6142 (const_int 5) (const_int 13)
6143 (const_int 6) (const_int 14)
6144 (const_int 7) (const_int 15)])))]
6146 "punpckhwd\t{%2, %0|%0, %2}"
6147 [(set_attr "type" "sselog")
6148 (set_attr "prefix_data16" "1")
6149 (set_attr "mode" "TI")])
6151 (define_insn "*avx_interleave_lowv8hi"
6152 [(set (match_operand:V8HI 0 "register_operand" "=x")
6155 (match_operand:V8HI 1 "register_operand" "x")
6156 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6157 (parallel [(const_int 0) (const_int 8)
6158 (const_int 1) (const_int 9)
6159 (const_int 2) (const_int 10)
6160 (const_int 3) (const_int 11)])))]
6162 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6163 [(set_attr "type" "sselog")
6164 (set_attr "prefix" "vex")
6165 (set_attr "mode" "TI")])
6167 (define_insn "vec_interleave_lowv8hi"
6168 [(set (match_operand:V8HI 0 "register_operand" "=x")
6171 (match_operand:V8HI 1 "register_operand" "0")
6172 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6173 (parallel [(const_int 0) (const_int 8)
6174 (const_int 1) (const_int 9)
6175 (const_int 2) (const_int 10)
6176 (const_int 3) (const_int 11)])))]
6178 "punpcklwd\t{%2, %0|%0, %2}"
6179 [(set_attr "type" "sselog")
6180 (set_attr "prefix_data16" "1")
6181 (set_attr "mode" "TI")])
6183 (define_insn "*avx_interleave_highv4si"
6184 [(set (match_operand:V4SI 0 "register_operand" "=x")
6187 (match_operand:V4SI 1 "register_operand" "x")
6188 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6189 (parallel [(const_int 2) (const_int 6)
6190 (const_int 3) (const_int 7)])))]
6192 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6193 [(set_attr "type" "sselog")
6194 (set_attr "prefix" "vex")
6195 (set_attr "mode" "TI")])
6197 (define_insn "vec_interleave_highv4si"
6198 [(set (match_operand:V4SI 0 "register_operand" "=x")
6201 (match_operand:V4SI 1 "register_operand" "0")
6202 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6203 (parallel [(const_int 2) (const_int 6)
6204 (const_int 3) (const_int 7)])))]
6206 "punpckhdq\t{%2, %0|%0, %2}"
6207 [(set_attr "type" "sselog")
6208 (set_attr "prefix_data16" "1")
6209 (set_attr "mode" "TI")])
6211 (define_insn "*avx_interleave_lowv4si"
6212 [(set (match_operand:V4SI 0 "register_operand" "=x")
6215 (match_operand:V4SI 1 "register_operand" "x")
6216 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6217 (parallel [(const_int 0) (const_int 4)
6218 (const_int 1) (const_int 5)])))]
6220 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6221 [(set_attr "type" "sselog")
6222 (set_attr "prefix" "vex")
6223 (set_attr "mode" "TI")])
6225 (define_insn "vec_interleave_lowv4si"
6226 [(set (match_operand:V4SI 0 "register_operand" "=x")
6229 (match_operand:V4SI 1 "register_operand" "0")
6230 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6231 (parallel [(const_int 0) (const_int 4)
6232 (const_int 1) (const_int 5)])))]
6234 "punpckldq\t{%2, %0|%0, %2}"
6235 [(set_attr "type" "sselog")
6236 (set_attr "prefix_data16" "1")
6237 (set_attr "mode" "TI")])
6239 (define_insn "*avx_pinsr<ssevecsize>"
6240 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6241 (vec_merge:SSEMODE124
6242 (vec_duplicate:SSEMODE124
6243 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6244 (match_operand:SSEMODE124 1 "register_operand" "x")
6245 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6248 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6249 if (MEM_P (operands[2]))
6250 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6252 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6254 [(set_attr "type" "sselog")
6255 (set (attr "prefix_extra")
6256 (if_then_else (match_operand:V8HI 0 "" "")
6258 (const_string "1")))
6259 (set_attr "length_immediate" "1")
6260 (set_attr "prefix" "vex")
6261 (set_attr "mode" "TI")])
6263 (define_insn "*sse4_1_pinsrb"
6264 [(set (match_operand:V16QI 0 "register_operand" "=x")
6266 (vec_duplicate:V16QI
6267 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6268 (match_operand:V16QI 1 "register_operand" "0")
6269 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6272 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6273 if (MEM_P (operands[2]))
6274 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6276 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6278 [(set_attr "type" "sselog")
6279 (set_attr "prefix_extra" "1")
6280 (set_attr "length_immediate" "1")
6281 (set_attr "mode" "TI")])
6283 (define_insn "*sse2_pinsrw"
6284 [(set (match_operand:V8HI 0 "register_operand" "=x")
6287 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6288 (match_operand:V8HI 1 "register_operand" "0")
6289 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6292 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6293 if (MEM_P (operands[2]))
6294 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6296 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6298 [(set_attr "type" "sselog")
6299 (set_attr "prefix_data16" "1")
6300 (set_attr "length_immediate" "1")
6301 (set_attr "mode" "TI")])
6303 ;; It must come before sse2_loadld since it is preferred.
6304 (define_insn "*sse4_1_pinsrd"
6305 [(set (match_operand:V4SI 0 "register_operand" "=x")
6308 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6309 (match_operand:V4SI 1 "register_operand" "0")
6310 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6314 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6316 [(set_attr "type" "sselog")
6317 (set_attr "prefix_extra" "1")
6318 (set_attr "length_immediate" "1")
6319 (set_attr "mode" "TI")])
6321 (define_insn "*avx_pinsrq"
6322 [(set (match_operand:V2DI 0 "register_operand" "=x")
6325 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6326 (match_operand:V2DI 1 "register_operand" "x")
6327 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6328 "TARGET_AVX && TARGET_64BIT"
6330 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6331 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6333 [(set_attr "type" "sselog")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "length_immediate" "1")
6336 (set_attr "prefix" "vex")
6337 (set_attr "mode" "TI")])
6339 (define_insn "*sse4_1_pinsrq"
6340 [(set (match_operand:V2DI 0 "register_operand" "=x")
6343 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6344 (match_operand:V2DI 1 "register_operand" "0")
6345 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6346 "TARGET_SSE4_1 && TARGET_64BIT"
6348 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6349 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6351 [(set_attr "type" "sselog")
6352 (set_attr "prefix_rex" "1")
6353 (set_attr "prefix_extra" "1")
6354 (set_attr "length_immediate" "1")
6355 (set_attr "mode" "TI")])
6357 (define_insn "*sse4_1_pextrb_<mode>"
6358 [(set (match_operand:SWI48 0 "register_operand" "=r")
6361 (match_operand:V16QI 1 "register_operand" "x")
6362 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6364 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6365 [(set_attr "type" "sselog")
6366 (set_attr "prefix_extra" "1")
6367 (set_attr "length_immediate" "1")
6368 (set_attr "prefix" "maybe_vex")
6369 (set_attr "mode" "TI")])
6371 (define_insn "*sse4_1_pextrb_memory"
6372 [(set (match_operand:QI 0 "memory_operand" "=m")
6374 (match_operand:V16QI 1 "register_operand" "x")
6375 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6377 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6378 [(set_attr "type" "sselog")
6379 (set_attr "prefix_extra" "1")
6380 (set_attr "length_immediate" "1")
6381 (set_attr "prefix" "maybe_vex")
6382 (set_attr "mode" "TI")])
6384 (define_insn "*sse2_pextrw_<mode>"
6385 [(set (match_operand:SWI48 0 "register_operand" "=r")
6388 (match_operand:V8HI 1 "register_operand" "x")
6389 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6391 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6392 [(set_attr "type" "sselog")
6393 (set_attr "prefix_data16" "1")
6394 (set_attr "length_immediate" "1")
6395 (set_attr "prefix" "maybe_vex")
6396 (set_attr "mode" "TI")])
6398 (define_insn "*sse4_1_pextrw_memory"
6399 [(set (match_operand:HI 0 "memory_operand" "=m")
6401 (match_operand:V8HI 1 "register_operand" "x")
6402 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6404 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6405 [(set_attr "type" "sselog")
6406 (set_attr "prefix_extra" "1")
6407 (set_attr "length_immediate" "1")
6408 (set_attr "prefix" "maybe_vex")
6409 (set_attr "mode" "TI")])
6411 (define_insn "*sse4_1_pextrd"
6412 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6414 (match_operand:V4SI 1 "register_operand" "x")
6415 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6417 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6418 [(set_attr "type" "sselog")
6419 (set_attr "prefix_extra" "1")
6420 (set_attr "length_immediate" "1")
6421 (set_attr "prefix" "maybe_vex")
6422 (set_attr "mode" "TI")])
6424 (define_insn "*sse4_1_pextrd_zext"
6425 [(set (match_operand:DI 0 "register_operand" "=r")
6428 (match_operand:V4SI 1 "register_operand" "x")
6429 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6430 "TARGET_64BIT && TARGET_SSE4_1"
6431 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6432 [(set_attr "type" "sselog")
6433 (set_attr "prefix_extra" "1")
6434 (set_attr "length_immediate" "1")
6435 (set_attr "prefix" "maybe_vex")
6436 (set_attr "mode" "TI")])
6438 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6439 (define_insn "*sse4_1_pextrq"
6440 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6442 (match_operand:V2DI 1 "register_operand" "x")
6443 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6444 "TARGET_SSE4_1 && TARGET_64BIT"
6445 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6446 [(set_attr "type" "sselog")
6447 (set_attr "prefix_rex" "1")
6448 (set_attr "prefix_extra" "1")
6449 (set_attr "length_immediate" "1")
6450 (set_attr "prefix" "maybe_vex")
6451 (set_attr "mode" "TI")])
6453 (define_expand "sse2_pshufd"
6454 [(match_operand:V4SI 0 "register_operand" "")
6455 (match_operand:V4SI 1 "nonimmediate_operand" "")
6456 (match_operand:SI 2 "const_int_operand" "")]
6459 int mask = INTVAL (operands[2]);
6460 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6461 GEN_INT ((mask >> 0) & 3),
6462 GEN_INT ((mask >> 2) & 3),
6463 GEN_INT ((mask >> 4) & 3),
6464 GEN_INT ((mask >> 6) & 3)));
6468 (define_insn "sse2_pshufd_1"
6469 [(set (match_operand:V4SI 0 "register_operand" "=x")
6471 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6472 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6473 (match_operand 3 "const_0_to_3_operand" "")
6474 (match_operand 4 "const_0_to_3_operand" "")
6475 (match_operand 5 "const_0_to_3_operand" "")])))]
6479 mask |= INTVAL (operands[2]) << 0;
6480 mask |= INTVAL (operands[3]) << 2;
6481 mask |= INTVAL (operands[4]) << 4;
6482 mask |= INTVAL (operands[5]) << 6;
6483 operands[2] = GEN_INT (mask);
6485 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6487 [(set_attr "type" "sselog1")
6488 (set_attr "prefix_data16" "1")
6489 (set_attr "prefix" "maybe_vex")
6490 (set_attr "length_immediate" "1")
6491 (set_attr "mode" "TI")])
6493 (define_expand "sse2_pshuflw"
6494 [(match_operand:V8HI 0 "register_operand" "")
6495 (match_operand:V8HI 1 "nonimmediate_operand" "")
6496 (match_operand:SI 2 "const_int_operand" "")]
6499 int mask = INTVAL (operands[2]);
6500 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6501 GEN_INT ((mask >> 0) & 3),
6502 GEN_INT ((mask >> 2) & 3),
6503 GEN_INT ((mask >> 4) & 3),
6504 GEN_INT ((mask >> 6) & 3)));
6508 (define_insn "sse2_pshuflw_1"
6509 [(set (match_operand:V8HI 0 "register_operand" "=x")
6511 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6512 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6513 (match_operand 3 "const_0_to_3_operand" "")
6514 (match_operand 4 "const_0_to_3_operand" "")
6515 (match_operand 5 "const_0_to_3_operand" "")
6523 mask |= INTVAL (operands[2]) << 0;
6524 mask |= INTVAL (operands[3]) << 2;
6525 mask |= INTVAL (operands[4]) << 4;
6526 mask |= INTVAL (operands[5]) << 6;
6527 operands[2] = GEN_INT (mask);
6529 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6531 [(set_attr "type" "sselog")
6532 (set_attr "prefix_data16" "0")
6533 (set_attr "prefix_rep" "1")
6534 (set_attr "prefix" "maybe_vex")
6535 (set_attr "length_immediate" "1")
6536 (set_attr "mode" "TI")])
6538 (define_expand "sse2_pshufhw"
6539 [(match_operand:V8HI 0 "register_operand" "")
6540 (match_operand:V8HI 1 "nonimmediate_operand" "")
6541 (match_operand:SI 2 "const_int_operand" "")]
6544 int mask = INTVAL (operands[2]);
6545 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6546 GEN_INT (((mask >> 0) & 3) + 4),
6547 GEN_INT (((mask >> 2) & 3) + 4),
6548 GEN_INT (((mask >> 4) & 3) + 4),
6549 GEN_INT (((mask >> 6) & 3) + 4)));
6553 (define_insn "sse2_pshufhw_1"
6554 [(set (match_operand:V8HI 0 "register_operand" "=x")
6556 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6557 (parallel [(const_int 0)
6561 (match_operand 2 "const_4_to_7_operand" "")
6562 (match_operand 3 "const_4_to_7_operand" "")
6563 (match_operand 4 "const_4_to_7_operand" "")
6564 (match_operand 5 "const_4_to_7_operand" "")])))]
6568 mask |= (INTVAL (operands[2]) - 4) << 0;
6569 mask |= (INTVAL (operands[3]) - 4) << 2;
6570 mask |= (INTVAL (operands[4]) - 4) << 4;
6571 mask |= (INTVAL (operands[5]) - 4) << 6;
6572 operands[2] = GEN_INT (mask);
6574 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6576 [(set_attr "type" "sselog")
6577 (set_attr "prefix_rep" "1")
6578 (set_attr "prefix_data16" "0")
6579 (set_attr "prefix" "maybe_vex")
6580 (set_attr "length_immediate" "1")
6581 (set_attr "mode" "TI")])
6583 (define_expand "sse2_loadd"
6584 [(set (match_operand:V4SI 0 "register_operand" "")
6587 (match_operand:SI 1 "nonimmediate_operand" ""))
6591 "operands[2] = CONST0_RTX (V4SImode);")
6593 (define_insn "*avx_loadld"
6594 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6597 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6598 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6602 vmovd\t{%2, %0|%0, %2}
6603 vmovd\t{%2, %0|%0, %2}
6604 vmovss\t{%2, %1, %0|%0, %1, %2}"
6605 [(set_attr "type" "ssemov")
6606 (set_attr "prefix" "vex")
6607 (set_attr "mode" "TI,TI,V4SF")])
6609 (define_insn "sse2_loadld"
6610 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6613 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6614 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6618 movd\t{%2, %0|%0, %2}
6619 movd\t{%2, %0|%0, %2}
6620 movss\t{%2, %0|%0, %2}
6621 movss\t{%2, %0|%0, %2}"
6622 [(set_attr "type" "ssemov")
6623 (set_attr "mode" "TI,TI,V4SF,SF")])
6625 (define_insn_and_split "sse2_stored"
6626 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6628 (match_operand:V4SI 1 "register_operand" "x,Yi")
6629 (parallel [(const_int 0)])))]
6632 "&& reload_completed
6633 && (TARGET_INTER_UNIT_MOVES
6634 || MEM_P (operands [0])
6635 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6636 [(set (match_dup 0) (match_dup 1))]
6637 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6639 (define_insn_and_split "*vec_ext_v4si_mem"
6640 [(set (match_operand:SI 0 "register_operand" "=r")
6642 (match_operand:V4SI 1 "memory_operand" "o")
6643 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6649 int i = INTVAL (operands[2]);
6651 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6655 (define_expand "sse_storeq"
6656 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6658 (match_operand:V2DI 1 "register_operand" "")
6659 (parallel [(const_int 0)])))]
6662 (define_insn "*sse2_storeq_rex64"
6663 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6665 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6666 (parallel [(const_int 0)])))]
6667 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6671 %vmov{q}\t{%1, %0|%0, %1}"
6672 [(set_attr "type" "*,*,imov")
6673 (set_attr "prefix" "*,*,maybe_vex")
6674 (set_attr "mode" "*,*,DI")])
6676 (define_insn "*sse2_storeq"
6677 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
6679 (match_operand:V2DI 1 "register_operand" "x")
6680 (parallel [(const_int 0)])))]
6685 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6687 (match_operand:V2DI 1 "register_operand" "")
6688 (parallel [(const_int 0)])))]
6691 && (TARGET_INTER_UNIT_MOVES
6692 || MEM_P (operands [0])
6693 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6694 [(set (match_dup 0) (match_dup 1))]
6695 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6697 (define_insn "*vec_extractv2di_1_rex64_avx"
6698 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
6700 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
6701 (parallel [(const_int 1)])))]
6704 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6706 vmovhps\t{%1, %0|%0, %1}
6707 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6708 vmovq\t{%H1, %0|%0, %H1}
6709 vmov{q}\t{%H1, %0|%0, %H1}"
6710 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
6711 (set_attr "length_immediate" "*,1,*,*")
6712 (set_attr "memory" "*,none,*,*")
6713 (set_attr "prefix" "vex")
6714 (set_attr "mode" "V2SF,TI,TI,DI")])
6716 (define_insn "*vec_extractv2di_1_rex64"
6717 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
6719 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
6720 (parallel [(const_int 1)])))]
6721 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6723 movhps\t{%1, %0|%0, %1}
6724 psrldq\t{$8, %0|%0, 8}
6725 movq\t{%H1, %0|%0, %H1}
6726 mov{q}\t{%H1, %0|%0, %H1}"
6727 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
6728 (set_attr "length_immediate" "*,1,*,*")
6729 (set_attr "memory" "*,none,*,*")
6730 (set_attr "mode" "V2SF,TI,TI,DI")])
6732 (define_insn "*vec_extractv2di_1_avx"
6733 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6735 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
6736 (parallel [(const_int 1)])))]
6739 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6741 vmovhps\t{%1, %0|%0, %1}
6742 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6743 vmovq\t{%H1, %0|%0, %H1}"
6744 [(set_attr "type" "ssemov,sseishft1,ssemov")
6745 (set_attr "length_immediate" "*,1,*")
6746 (set_attr "memory" "*,none,*")
6747 (set_attr "prefix" "vex")
6748 (set_attr "mode" "V2SF,TI,TI")])
6750 (define_insn "*vec_extractv2di_1_sse2"
6751 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6753 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
6754 (parallel [(const_int 1)])))]
6756 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6758 movhps\t{%1, %0|%0, %1}
6759 psrldq\t{$8, %0|%0, 8}
6760 movq\t{%H1, %0|%0, %H1}"
6761 [(set_attr "type" "ssemov,sseishft1,ssemov")
6762 (set_attr "length_immediate" "*,1,*")
6763 (set_attr "memory" "*,none,*")
6764 (set_attr "mode" "V2SF,TI,TI")])
6766 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
6767 (define_insn "*vec_extractv2di_1_sse"
6768 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6770 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
6771 (parallel [(const_int 1)])))]
6772 "!TARGET_SSE2 && TARGET_SSE
6773 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6775 movhps\t{%1, %0|%0, %1}
6776 movhlps\t{%1, %0|%0, %1}
6777 movlps\t{%H1, %0|%0, %H1}"
6778 [(set_attr "type" "ssemov")
6779 (set_attr "mode" "V2SF,V4SF,V2SF")])
6781 (define_insn "*vec_dupv4si_avx"
6782 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6784 (match_operand:SI 1 "register_operand" "x,m")))]
6787 vpshufd\t{$0, %1, %0|%0, %1, 0}
6788 vbroadcastss\t{%1, %0|%0, %1}"
6789 [(set_attr "type" "sselog1,ssemov")
6790 (set_attr "length_immediate" "1,0")
6791 (set_attr "prefix_extra" "0,1")
6792 (set_attr "prefix" "vex")
6793 (set_attr "mode" "TI,V4SF")])
6795 (define_insn "*vec_dupv4si"
6796 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6798 (match_operand:SI 1 "register_operand" " Y2,0")))]
6801 %vpshufd\t{$0, %1, %0|%0, %1, 0}
6802 shufps\t{$0, %0, %0|%0, %0, 0}"
6803 [(set_attr "type" "sselog1")
6804 (set_attr "length_immediate" "1")
6805 (set_attr "mode" "TI,V4SF")])
6807 (define_insn "*vec_dupv2di_avx"
6808 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6810 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
6813 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
6814 vmovddup\t{%1, %0|%0, %1}"
6815 [(set_attr "type" "sselog1")
6816 (set_attr "prefix" "vex")
6817 (set_attr "mode" "TI,DF")])
6819 (define_insn "*vec_dupv2di_sse3"
6820 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6822 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
6826 movddup\t{%1, %0|%0, %1}"
6827 [(set_attr "type" "sselog1")
6828 (set_attr "mode" "TI,DF")])
6830 (define_insn "*vec_dupv2di"
6831 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6833 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6838 [(set_attr "type" "sselog1,ssemov")
6839 (set_attr "mode" "TI,V4SF")])
6841 (define_insn "*vec_concatv2si_avx"
6842 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
6844 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
6845 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
6848 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6849 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6850 vmovd\t{%1, %0|%0, %1}
6851 punpckldq\t{%2, %0|%0, %2}
6852 movd\t{%1, %0|%0, %1}"
6853 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
6854 (set_attr "prefix_extra" "1,*,*,*,*")
6855 (set_attr "length_immediate" "1,*,*,*,*")
6856 (set (attr "prefix")
6857 (if_then_else (eq_attr "alternative" "3,4")
6858 (const_string "orig")
6859 (const_string "vex")))
6860 (set_attr "mode" "TI,TI,TI,DI,DI")])
6862 (define_insn "*vec_concatv2si_sse4_1"
6863 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
6865 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
6866 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
6869 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
6870 punpckldq\t{%2, %0|%0, %2}
6871 movd\t{%1, %0|%0, %1}
6872 punpckldq\t{%2, %0|%0, %2}
6873 movd\t{%1, %0|%0, %1}"
6874 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
6875 (set_attr "prefix_extra" "1,*,*,*,*")
6876 (set_attr "length_immediate" "1,*,*,*,*")
6877 (set_attr "mode" "TI,TI,TI,DI,DI")])
6879 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6880 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6881 ;; alternatives pretty much forces the MMX alternative to be chosen.
6882 (define_insn "*vec_concatv2si_sse2"
6883 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6885 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6886 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6889 punpckldq\t{%2, %0|%0, %2}
6890 movd\t{%1, %0|%0, %1}
6891 punpckldq\t{%2, %0|%0, %2}
6892 movd\t{%1, %0|%0, %1}"
6893 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6894 (set_attr "mode" "TI,TI,DI,DI")])
6896 (define_insn "*vec_concatv2si_sse"
6897 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6899 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6900 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6903 unpcklps\t{%2, %0|%0, %2}
6904 movss\t{%1, %0|%0, %1}
6905 punpckldq\t{%2, %0|%0, %2}
6906 movd\t{%1, %0|%0, %1}"
6907 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6908 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6910 (define_insn "*vec_concatv4si_1_avx"
6911 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6913 (match_operand:V2SI 1 "register_operand" " x,x")
6914 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
6917 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6918 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6919 [(set_attr "type" "sselog,ssemov")
6920 (set_attr "prefix" "vex")
6921 (set_attr "mode" "TI,V2SF")])
6923 (define_insn "*vec_concatv4si_1"
6924 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
6926 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
6927 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
6930 punpcklqdq\t{%2, %0|%0, %2}
6931 movlhps\t{%2, %0|%0, %2}
6932 movhps\t{%2, %0|%0, %2}"
6933 [(set_attr "type" "sselog,ssemov,ssemov")
6934 (set_attr "mode" "TI,V4SF,V2SF")])
6936 (define_insn "*vec_concatv2di_avx"
6937 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
6939 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
6940 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
6941 "!TARGET_64BIT && TARGET_AVX"
6943 vmovq\t{%1, %0|%0, %1}
6944 movq2dq\t{%1, %0|%0, %1}
6945 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6946 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6947 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
6948 (set (attr "prefix")
6949 (if_then_else (eq_attr "alternative" "1")
6950 (const_string "orig")
6951 (const_string "vex")))
6952 (set_attr "mode" "TI,TI,TI,V2SF")])
6954 (define_insn "vec_concatv2di"
6955 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
6957 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
6958 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
6959 "!TARGET_64BIT && TARGET_SSE"
6961 movq\t{%1, %0|%0, %1}
6962 movq2dq\t{%1, %0|%0, %1}
6963 punpcklqdq\t{%2, %0|%0, %2}
6964 movlhps\t{%2, %0|%0, %2}
6965 movhps\t{%2, %0|%0, %2}"
6966 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
6967 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
6969 (define_insn "*vec_concatv2di_rex64_avx"
6970 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
6972 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
6973 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
6974 "TARGET_64BIT && TARGET_AVX"
6976 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6977 vmovq\t{%1, %0|%0, %1}
6978 vmovq\t{%1, %0|%0, %1}
6979 movq2dq\t{%1, %0|%0, %1}
6980 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6981 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6982 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
6983 (set_attr "prefix_extra" "1,*,*,*,*,*")
6984 (set_attr "length_immediate" "1,*,*,*,*,*")
6985 (set (attr "prefix")
6986 (if_then_else (eq_attr "alternative" "3")
6987 (const_string "orig")
6988 (const_string "vex")))
6989 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
6991 (define_insn "*vec_concatv2di_rex64_sse4_1"
6992 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
6994 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
6995 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
6996 "TARGET_64BIT && TARGET_SSE4_1"
6998 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
6999 movq\t{%1, %0|%0, %1}
7000 movd\t{%1, %0|%0, %1}
7001 movq2dq\t{%1, %0|%0, %1}
7002 punpcklqdq\t{%2, %0|%0, %2}
7003 movlhps\t{%2, %0|%0, %2}
7004 movhps\t{%2, %0|%0, %2}"
7005 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7006 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7007 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7008 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7009 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7011 (define_insn "*vec_concatv2di_rex64_sse"
7012 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7014 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7015 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7016 "TARGET_64BIT && TARGET_SSE"
7018 movq\t{%1, %0|%0, %1}
7019 movd\t{%1, %0|%0, %1}
7020 movq2dq\t{%1, %0|%0, %1}
7021 punpcklqdq\t{%2, %0|%0, %2}
7022 movlhps\t{%2, %0|%0, %2}
7023 movhps\t{%2, %0|%0, %2}"
7024 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7025 (set_attr "prefix_rex" "*,1,*,*,*,*")
7026 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7028 (define_expand "vec_unpacku_hi_v16qi"
7029 [(match_operand:V8HI 0 "register_operand" "")
7030 (match_operand:V16QI 1 "register_operand" "")]
7034 ix86_expand_sse4_unpack (operands, true, true);
7036 ix86_expand_sse_unpack (operands, true, true);
7040 (define_expand "vec_unpacks_hi_v16qi"
7041 [(match_operand:V8HI 0 "register_operand" "")
7042 (match_operand:V16QI 1 "register_operand" "")]
7046 ix86_expand_sse4_unpack (operands, false, true);
7048 ix86_expand_sse_unpack (operands, false, true);
7052 (define_expand "vec_unpacku_lo_v16qi"
7053 [(match_operand:V8HI 0 "register_operand" "")
7054 (match_operand:V16QI 1 "register_operand" "")]
7058 ix86_expand_sse4_unpack (operands, true, false);
7060 ix86_expand_sse_unpack (operands, true, false);
7064 (define_expand "vec_unpacks_lo_v16qi"
7065 [(match_operand:V8HI 0 "register_operand" "")
7066 (match_operand:V16QI 1 "register_operand" "")]
7070 ix86_expand_sse4_unpack (operands, false, false);
7072 ix86_expand_sse_unpack (operands, false, false);
7076 (define_expand "vec_unpacku_hi_v8hi"
7077 [(match_operand:V4SI 0 "register_operand" "")
7078 (match_operand:V8HI 1 "register_operand" "")]
7082 ix86_expand_sse4_unpack (operands, true, true);
7084 ix86_expand_sse_unpack (operands, true, true);
7088 (define_expand "vec_unpacks_hi_v8hi"
7089 [(match_operand:V4SI 0 "register_operand" "")
7090 (match_operand:V8HI 1 "register_operand" "")]
7094 ix86_expand_sse4_unpack (operands, false, true);
7096 ix86_expand_sse_unpack (operands, false, true);
7100 (define_expand "vec_unpacku_lo_v8hi"
7101 [(match_operand:V4SI 0 "register_operand" "")
7102 (match_operand:V8HI 1 "register_operand" "")]
7106 ix86_expand_sse4_unpack (operands, true, false);
7108 ix86_expand_sse_unpack (operands, true, false);
7112 (define_expand "vec_unpacks_lo_v8hi"
7113 [(match_operand:V4SI 0 "register_operand" "")
7114 (match_operand:V8HI 1 "register_operand" "")]
7118 ix86_expand_sse4_unpack (operands, false, false);
7120 ix86_expand_sse_unpack (operands, false, false);
7124 (define_expand "vec_unpacku_hi_v4si"
7125 [(match_operand:V2DI 0 "register_operand" "")
7126 (match_operand:V4SI 1 "register_operand" "")]
7130 ix86_expand_sse4_unpack (operands, true, true);
7132 ix86_expand_sse_unpack (operands, true, true);
7136 (define_expand "vec_unpacks_hi_v4si"
7137 [(match_operand:V2DI 0 "register_operand" "")
7138 (match_operand:V4SI 1 "register_operand" "")]
7142 ix86_expand_sse4_unpack (operands, false, true);
7144 ix86_expand_sse_unpack (operands, false, true);
7148 (define_expand "vec_unpacku_lo_v4si"
7149 [(match_operand:V2DI 0 "register_operand" "")
7150 (match_operand:V4SI 1 "register_operand" "")]
7154 ix86_expand_sse4_unpack (operands, true, false);
7156 ix86_expand_sse_unpack (operands, true, false);
7160 (define_expand "vec_unpacks_lo_v4si"
7161 [(match_operand:V2DI 0 "register_operand" "")
7162 (match_operand:V4SI 1 "register_operand" "")]
7166 ix86_expand_sse4_unpack (operands, false, false);
7168 ix86_expand_sse_unpack (operands, false, false);
7172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7178 (define_expand "sse2_uavgv16qi3"
7179 [(set (match_operand:V16QI 0 "register_operand" "")
7185 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7187 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7188 (const_vector:V16QI [(const_int 1) (const_int 1)
7189 (const_int 1) (const_int 1)
7190 (const_int 1) (const_int 1)
7191 (const_int 1) (const_int 1)
7192 (const_int 1) (const_int 1)
7193 (const_int 1) (const_int 1)
7194 (const_int 1) (const_int 1)
7195 (const_int 1) (const_int 1)]))
7198 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7200 (define_insn "*avx_uavgv16qi3"
7201 [(set (match_operand:V16QI 0 "register_operand" "=x")
7207 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7209 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7210 (const_vector:V16QI [(const_int 1) (const_int 1)
7211 (const_int 1) (const_int 1)
7212 (const_int 1) (const_int 1)
7213 (const_int 1) (const_int 1)
7214 (const_int 1) (const_int 1)
7215 (const_int 1) (const_int 1)
7216 (const_int 1) (const_int 1)
7217 (const_int 1) (const_int 1)]))
7219 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7220 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7221 [(set_attr "type" "sseiadd")
7222 (set_attr "prefix" "vex")
7223 (set_attr "mode" "TI")])
7225 (define_insn "*sse2_uavgv16qi3"
7226 [(set (match_operand:V16QI 0 "register_operand" "=x")
7232 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7234 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7235 (const_vector:V16QI [(const_int 1) (const_int 1)
7236 (const_int 1) (const_int 1)
7237 (const_int 1) (const_int 1)
7238 (const_int 1) (const_int 1)
7239 (const_int 1) (const_int 1)
7240 (const_int 1) (const_int 1)
7241 (const_int 1) (const_int 1)
7242 (const_int 1) (const_int 1)]))
7244 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7245 "pavgb\t{%2, %0|%0, %2}"
7246 [(set_attr "type" "sseiadd")
7247 (set_attr "prefix_data16" "1")
7248 (set_attr "mode" "TI")])
7250 (define_expand "sse2_uavgv8hi3"
7251 [(set (match_operand:V8HI 0 "register_operand" "")
7257 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7259 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7260 (const_vector:V8HI [(const_int 1) (const_int 1)
7261 (const_int 1) (const_int 1)
7262 (const_int 1) (const_int 1)
7263 (const_int 1) (const_int 1)]))
7266 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7268 (define_insn "*avx_uavgv8hi3"
7269 [(set (match_operand:V8HI 0 "register_operand" "=x")
7275 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7277 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7278 (const_vector:V8HI [(const_int 1) (const_int 1)
7279 (const_int 1) (const_int 1)
7280 (const_int 1) (const_int 1)
7281 (const_int 1) (const_int 1)]))
7283 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7284 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7285 [(set_attr "type" "sseiadd")
7286 (set_attr "prefix" "vex")
7287 (set_attr "mode" "TI")])
7289 (define_insn "*sse2_uavgv8hi3"
7290 [(set (match_operand:V8HI 0 "register_operand" "=x")
7296 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7298 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7299 (const_vector:V8HI [(const_int 1) (const_int 1)
7300 (const_int 1) (const_int 1)
7301 (const_int 1) (const_int 1)
7302 (const_int 1) (const_int 1)]))
7304 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7305 "pavgw\t{%2, %0|%0, %2}"
7306 [(set_attr "type" "sseiadd")
7307 (set_attr "prefix_data16" "1")
7308 (set_attr "mode" "TI")])
7310 ;; The correct representation for this is absolutely enormous, and
7311 ;; surely not generally useful.
7312 (define_insn "*avx_psadbw"
7313 [(set (match_operand:V2DI 0 "register_operand" "=x")
7314 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7315 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7318 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7319 [(set_attr "type" "sseiadd")
7320 (set_attr "prefix" "vex")
7321 (set_attr "mode" "TI")])
7323 (define_insn "sse2_psadbw"
7324 [(set (match_operand:V2DI 0 "register_operand" "=x")
7325 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7326 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7329 "psadbw\t{%2, %0|%0, %2}"
7330 [(set_attr "type" "sseiadd")
7331 (set_attr "atom_unit" "simul")
7332 (set_attr "prefix_data16" "1")
7333 (set_attr "mode" "TI")])
7335 (define_insn "avx_movmsk<ssemodesuffix>256"
7336 [(set (match_operand:SI 0 "register_operand" "=r")
7338 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7340 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7341 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7342 [(set_attr "type" "ssecvt")
7343 (set_attr "prefix" "vex")
7344 (set_attr "mode" "<MODE>")])
7346 (define_insn "<sse>_movmsk<ssemodesuffix>"
7347 [(set (match_operand:SI 0 "register_operand" "=r")
7349 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7351 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7352 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7353 [(set_attr "type" "ssemov")
7354 (set_attr "prefix" "maybe_vex")
7355 (set_attr "mode" "<MODE>")])
7357 (define_insn "sse2_pmovmskb"
7358 [(set (match_operand:SI 0 "register_operand" "=r")
7359 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7362 "%vpmovmskb\t{%1, %0|%0, %1}"
7363 [(set_attr "type" "ssemov")
7364 (set_attr "prefix_data16" "1")
7365 (set_attr "prefix" "maybe_vex")
7366 (set_attr "mode" "SI")])
7368 (define_expand "sse2_maskmovdqu"
7369 [(set (match_operand:V16QI 0 "memory_operand" "")
7370 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7371 (match_operand:V16QI 2 "register_operand" "")
7376 (define_insn "*sse2_maskmovdqu"
7377 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7378 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7379 (match_operand:V16QI 2 "register_operand" "x")
7380 (mem:V16QI (match_dup 0))]
7382 "TARGET_SSE2 && !TARGET_64BIT"
7383 ;; @@@ check ordering of operands in intel/nonintel syntax
7384 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7385 [(set_attr "type" "ssemov")
7386 (set_attr "prefix_data16" "1")
7387 ;; The implicit %rdi operand confuses default length_vex computation.
7388 (set_attr "length_vex" "3")
7389 (set_attr "prefix" "maybe_vex")
7390 (set_attr "mode" "TI")])
7392 (define_insn "*sse2_maskmovdqu_rex64"
7393 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7394 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7395 (match_operand:V16QI 2 "register_operand" "x")
7396 (mem:V16QI (match_dup 0))]
7398 "TARGET_SSE2 && TARGET_64BIT"
7399 ;; @@@ check ordering of operands in intel/nonintel syntax
7400 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7401 [(set_attr "type" "ssemov")
7402 (set_attr "prefix_data16" "1")
7403 ;; The implicit %rdi operand confuses default length_vex computation.
7404 (set (attr "length_vex")
7405 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
7406 (set_attr "prefix" "maybe_vex")
7407 (set_attr "mode" "TI")])
7409 (define_insn "sse_ldmxcsr"
7410 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7414 [(set_attr "type" "sse")
7415 (set_attr "atom_sse_attr" "mxcsr")
7416 (set_attr "prefix" "maybe_vex")
7417 (set_attr "memory" "load")])
7419 (define_insn "sse_stmxcsr"
7420 [(set (match_operand:SI 0 "memory_operand" "=m")
7421 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7424 [(set_attr "type" "sse")
7425 (set_attr "atom_sse_attr" "mxcsr")
7426 (set_attr "prefix" "maybe_vex")
7427 (set_attr "memory" "store")])
7429 (define_expand "sse_sfence"
7431 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7432 "TARGET_SSE || TARGET_3DNOW_A"
7434 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7435 MEM_VOLATILE_P (operands[0]) = 1;
7438 (define_insn "*sse_sfence"
7439 [(set (match_operand:BLK 0 "" "")
7440 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7441 "TARGET_SSE || TARGET_3DNOW_A"
7443 [(set_attr "type" "sse")
7444 (set_attr "length_address" "0")
7445 (set_attr "atom_sse_attr" "fence")
7446 (set_attr "memory" "unknown")])
7448 (define_insn "sse2_clflush"
7449 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7453 [(set_attr "type" "sse")
7454 (set_attr "atom_sse_attr" "fence")
7455 (set_attr "memory" "unknown")])
7457 (define_expand "sse2_mfence"
7459 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7462 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7463 MEM_VOLATILE_P (operands[0]) = 1;
7466 (define_insn "*sse2_mfence"
7467 [(set (match_operand:BLK 0 "" "")
7468 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7469 "TARGET_64BIT || TARGET_SSE2"
7471 [(set_attr "type" "sse")
7472 (set_attr "length_address" "0")
7473 (set_attr "atom_sse_attr" "fence")
7474 (set_attr "memory" "unknown")])
7476 (define_expand "sse2_lfence"
7478 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7481 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7482 MEM_VOLATILE_P (operands[0]) = 1;
7485 (define_insn "*sse2_lfence"
7486 [(set (match_operand:BLK 0 "" "")
7487 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7490 [(set_attr "type" "sse")
7491 (set_attr "length_address" "0")
7492 (set_attr "atom_sse_attr" "lfence")
7493 (set_attr "memory" "unknown")])
7495 (define_insn "sse3_mwait"
7496 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7497 (match_operand:SI 1 "register_operand" "c")]
7500 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7501 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7502 ;; we only need to set up 32bit registers.
7504 [(set_attr "length" "3")])
7506 (define_insn "sse3_monitor"
7507 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7508 (match_operand:SI 1 "register_operand" "c")
7509 (match_operand:SI 2 "register_operand" "d")]
7511 "TARGET_SSE3 && !TARGET_64BIT"
7512 "monitor\t%0, %1, %2"
7513 [(set_attr "length" "3")])
7515 (define_insn "sse3_monitor64"
7516 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7517 (match_operand:SI 1 "register_operand" "c")
7518 (match_operand:SI 2 "register_operand" "d")]
7520 "TARGET_SSE3 && TARGET_64BIT"
7521 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7522 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7523 ;; zero extended to 64bit, we only need to set up 32bit registers.
7525 [(set_attr "length" "3")])
7527 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7529 ;; SSSE3 instructions
7531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7533 (define_insn "*avx_phaddwv8hi3"
7534 [(set (match_operand:V8HI 0 "register_operand" "=x")
7540 (match_operand:V8HI 1 "register_operand" "x")
7541 (parallel [(const_int 0)]))
7542 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7544 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7545 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7548 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7549 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7551 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7552 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7557 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7558 (parallel [(const_int 0)]))
7559 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7561 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7562 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7565 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7566 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7568 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7569 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7571 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7572 [(set_attr "type" "sseiadd")
7573 (set_attr "prefix_extra" "1")
7574 (set_attr "prefix" "vex")
7575 (set_attr "mode" "TI")])
7577 (define_insn "ssse3_phaddwv8hi3"
7578 [(set (match_operand:V8HI 0 "register_operand" "=x")
7584 (match_operand:V8HI 1 "register_operand" "0")
7585 (parallel [(const_int 0)]))
7586 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7588 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7589 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7592 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7593 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7595 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7596 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7601 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7602 (parallel [(const_int 0)]))
7603 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7605 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7606 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7609 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7610 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7612 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7613 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7615 "phaddw\t{%2, %0|%0, %2}"
7616 [(set_attr "type" "sseiadd")
7617 (set_attr "atom_unit" "complex")
7618 (set_attr "prefix_data16" "1")
7619 (set_attr "prefix_extra" "1")
7620 (set_attr "mode" "TI")])
7622 (define_insn "ssse3_phaddwv4hi3"
7623 [(set (match_operand:V4HI 0 "register_operand" "=y")
7628 (match_operand:V4HI 1 "register_operand" "0")
7629 (parallel [(const_int 0)]))
7630 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7632 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7633 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7637 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7638 (parallel [(const_int 0)]))
7639 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7641 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7642 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7644 "phaddw\t{%2, %0|%0, %2}"
7645 [(set_attr "type" "sseiadd")
7646 (set_attr "atom_unit" "complex")
7647 (set_attr "prefix_extra" "1")
7648 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7649 (set_attr "mode" "DI")])
7651 (define_insn "*avx_phadddv4si3"
7652 [(set (match_operand:V4SI 0 "register_operand" "=x")
7657 (match_operand:V4SI 1 "register_operand" "x")
7658 (parallel [(const_int 0)]))
7659 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7661 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7662 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7666 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7667 (parallel [(const_int 0)]))
7668 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7670 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7671 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7673 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7674 [(set_attr "type" "sseiadd")
7675 (set_attr "prefix_extra" "1")
7676 (set_attr "prefix" "vex")
7677 (set_attr "mode" "TI")])
7679 (define_insn "ssse3_phadddv4si3"
7680 [(set (match_operand:V4SI 0 "register_operand" "=x")
7685 (match_operand:V4SI 1 "register_operand" "0")
7686 (parallel [(const_int 0)]))
7687 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7689 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7690 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7694 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7695 (parallel [(const_int 0)]))
7696 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7698 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7699 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7701 "phaddd\t{%2, %0|%0, %2}"
7702 [(set_attr "type" "sseiadd")
7703 (set_attr "atom_unit" "complex")
7704 (set_attr "prefix_data16" "1")
7705 (set_attr "prefix_extra" "1")
7706 (set_attr "mode" "TI")])
7708 (define_insn "ssse3_phadddv2si3"
7709 [(set (match_operand:V2SI 0 "register_operand" "=y")
7713 (match_operand:V2SI 1 "register_operand" "0")
7714 (parallel [(const_int 0)]))
7715 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7718 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7719 (parallel [(const_int 0)]))
7720 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7722 "phaddd\t{%2, %0|%0, %2}"
7723 [(set_attr "type" "sseiadd")
7724 (set_attr "atom_unit" "complex")
7725 (set_attr "prefix_extra" "1")
7726 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7727 (set_attr "mode" "DI")])
7729 (define_insn "*avx_phaddswv8hi3"
7730 [(set (match_operand:V8HI 0 "register_operand" "=x")
7736 (match_operand:V8HI 1 "register_operand" "x")
7737 (parallel [(const_int 0)]))
7738 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7740 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7741 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7744 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7745 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7747 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7748 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7753 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7754 (parallel [(const_int 0)]))
7755 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7757 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7758 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7761 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7762 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7764 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7765 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7767 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7768 [(set_attr "type" "sseiadd")
7769 (set_attr "prefix_extra" "1")
7770 (set_attr "prefix" "vex")
7771 (set_attr "mode" "TI")])
7773 (define_insn "ssse3_phaddswv8hi3"
7774 [(set (match_operand:V8HI 0 "register_operand" "=x")
7780 (match_operand:V8HI 1 "register_operand" "0")
7781 (parallel [(const_int 0)]))
7782 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7784 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7785 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7788 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7789 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7791 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7792 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7797 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7798 (parallel [(const_int 0)]))
7799 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7801 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7802 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7805 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7806 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7808 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7809 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7811 "phaddsw\t{%2, %0|%0, %2}"
7812 [(set_attr "type" "sseiadd")
7813 (set_attr "atom_unit" "complex")
7814 (set_attr "prefix_data16" "1")
7815 (set_attr "prefix_extra" "1")
7816 (set_attr "mode" "TI")])
7818 (define_insn "ssse3_phaddswv4hi3"
7819 [(set (match_operand:V4HI 0 "register_operand" "=y")
7824 (match_operand:V4HI 1 "register_operand" "0")
7825 (parallel [(const_int 0)]))
7826 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7828 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7829 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7833 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7834 (parallel [(const_int 0)]))
7835 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7837 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7838 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7840 "phaddsw\t{%2, %0|%0, %2}"
7841 [(set_attr "type" "sseiadd")
7842 (set_attr "atom_unit" "complex")
7843 (set_attr "prefix_extra" "1")
7844 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7845 (set_attr "mode" "DI")])
7847 (define_insn "*avx_phsubwv8hi3"
7848 [(set (match_operand:V8HI 0 "register_operand" "=x")
7854 (match_operand:V8HI 1 "register_operand" "x")
7855 (parallel [(const_int 0)]))
7856 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7858 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7859 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7862 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7863 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7865 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7866 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7871 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7872 (parallel [(const_int 0)]))
7873 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7875 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7876 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7879 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7880 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7882 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7883 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7885 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
7886 [(set_attr "type" "sseiadd")
7887 (set_attr "prefix_extra" "1")
7888 (set_attr "prefix" "vex")
7889 (set_attr "mode" "TI")])
7891 (define_insn "ssse3_phsubwv8hi3"
7892 [(set (match_operand:V8HI 0 "register_operand" "=x")
7898 (match_operand:V8HI 1 "register_operand" "0")
7899 (parallel [(const_int 0)]))
7900 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7902 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7903 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7906 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7907 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7909 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7910 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7915 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7916 (parallel [(const_int 0)]))
7917 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7919 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7920 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7923 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7924 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7926 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7927 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7929 "phsubw\t{%2, %0|%0, %2}"
7930 [(set_attr "type" "sseiadd")
7931 (set_attr "atom_unit" "complex")
7932 (set_attr "prefix_data16" "1")
7933 (set_attr "prefix_extra" "1")
7934 (set_attr "mode" "TI")])
7936 (define_insn "ssse3_phsubwv4hi3"
7937 [(set (match_operand:V4HI 0 "register_operand" "=y")
7942 (match_operand:V4HI 1 "register_operand" "0")
7943 (parallel [(const_int 0)]))
7944 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7946 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7947 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7951 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7952 (parallel [(const_int 0)]))
7953 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7955 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7956 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7958 "phsubw\t{%2, %0|%0, %2}"
7959 [(set_attr "type" "sseiadd")
7960 (set_attr "atom_unit" "complex")
7961 (set_attr "prefix_extra" "1")
7962 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7963 (set_attr "mode" "DI")])
7965 (define_insn "*avx_phsubdv4si3"
7966 [(set (match_operand:V4SI 0 "register_operand" "=x")
7971 (match_operand:V4SI 1 "register_operand" "x")
7972 (parallel [(const_int 0)]))
7973 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7975 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7976 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7980 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7981 (parallel [(const_int 0)]))
7982 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7984 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7985 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7987 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
7988 [(set_attr "type" "sseiadd")
7989 (set_attr "prefix_extra" "1")
7990 (set_attr "prefix" "vex")
7991 (set_attr "mode" "TI")])
7993 (define_insn "ssse3_phsubdv4si3"
7994 [(set (match_operand:V4SI 0 "register_operand" "=x")
7999 (match_operand:V4SI 1 "register_operand" "0")
8000 (parallel [(const_int 0)]))
8001 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8003 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8004 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8008 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8009 (parallel [(const_int 0)]))
8010 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8012 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8013 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8015 "phsubd\t{%2, %0|%0, %2}"
8016 [(set_attr "type" "sseiadd")
8017 (set_attr "atom_unit" "complex")
8018 (set_attr "prefix_data16" "1")
8019 (set_attr "prefix_extra" "1")
8020 (set_attr "mode" "TI")])
8022 (define_insn "ssse3_phsubdv2si3"
8023 [(set (match_operand:V2SI 0 "register_operand" "=y")
8027 (match_operand:V2SI 1 "register_operand" "0")
8028 (parallel [(const_int 0)]))
8029 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8032 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8033 (parallel [(const_int 0)]))
8034 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8036 "phsubd\t{%2, %0|%0, %2}"
8037 [(set_attr "type" "sseiadd")
8038 (set_attr "atom_unit" "complex")
8039 (set_attr "prefix_extra" "1")
8040 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8041 (set_attr "mode" "DI")])
8043 (define_insn "*avx_phsubswv8hi3"
8044 [(set (match_operand:V8HI 0 "register_operand" "=x")
8050 (match_operand:V8HI 1 "register_operand" "x")
8051 (parallel [(const_int 0)]))
8052 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8054 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8055 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8058 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8059 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8061 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8062 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8067 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8068 (parallel [(const_int 0)]))
8069 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8071 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8072 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8075 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8076 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8078 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8079 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8081 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8082 [(set_attr "type" "sseiadd")
8083 (set_attr "prefix_extra" "1")
8084 (set_attr "prefix" "vex")
8085 (set_attr "mode" "TI")])
8087 (define_insn "ssse3_phsubswv8hi3"
8088 [(set (match_operand:V8HI 0 "register_operand" "=x")
8094 (match_operand:V8HI 1 "register_operand" "0")
8095 (parallel [(const_int 0)]))
8096 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8098 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8099 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8103 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8105 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8106 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8111 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8112 (parallel [(const_int 0)]))
8113 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8115 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8116 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8119 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8120 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8123 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8125 "phsubsw\t{%2, %0|%0, %2}"
8126 [(set_attr "type" "sseiadd")
8127 (set_attr "atom_unit" "complex")
8128 (set_attr "prefix_data16" "1")
8129 (set_attr "prefix_extra" "1")
8130 (set_attr "mode" "TI")])
8132 (define_insn "ssse3_phsubswv4hi3"
8133 [(set (match_operand:V4HI 0 "register_operand" "=y")
8138 (match_operand:V4HI 1 "register_operand" "0")
8139 (parallel [(const_int 0)]))
8140 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8142 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8143 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8147 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8148 (parallel [(const_int 0)]))
8149 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8151 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8152 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8154 "phsubsw\t{%2, %0|%0, %2}"
8155 [(set_attr "type" "sseiadd")
8156 (set_attr "atom_unit" "complex")
8157 (set_attr "prefix_extra" "1")
8158 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8159 (set_attr "mode" "DI")])
8161 (define_insn "*avx_pmaddubsw128"
8162 [(set (match_operand:V8HI 0 "register_operand" "=x")
8167 (match_operand:V16QI 1 "register_operand" "x")
8168 (parallel [(const_int 0)
8178 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8179 (parallel [(const_int 0)
8189 (vec_select:V8QI (match_dup 1)
8190 (parallel [(const_int 1)
8199 (vec_select:V8QI (match_dup 2)
8200 (parallel [(const_int 1)
8207 (const_int 15)]))))))]
8209 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8210 [(set_attr "type" "sseiadd")
8211 (set_attr "prefix_extra" "1")
8212 (set_attr "prefix" "vex")
8213 (set_attr "mode" "TI")])
8215 (define_insn "ssse3_pmaddubsw128"
8216 [(set (match_operand:V8HI 0 "register_operand" "=x")
8221 (match_operand:V16QI 1 "register_operand" "0")
8222 (parallel [(const_int 0)
8232 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8233 (parallel [(const_int 0)
8243 (vec_select:V8QI (match_dup 1)
8244 (parallel [(const_int 1)
8253 (vec_select:V8QI (match_dup 2)
8254 (parallel [(const_int 1)
8261 (const_int 15)]))))))]
8263 "pmaddubsw\t{%2, %0|%0, %2}"
8264 [(set_attr "type" "sseiadd")
8265 (set_attr "atom_unit" "simul")
8266 (set_attr "prefix_data16" "1")
8267 (set_attr "prefix_extra" "1")
8268 (set_attr "mode" "TI")])
8270 (define_insn "ssse3_pmaddubsw"
8271 [(set (match_operand:V4HI 0 "register_operand" "=y")
8276 (match_operand:V8QI 1 "register_operand" "0")
8277 (parallel [(const_int 0)
8283 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8284 (parallel [(const_int 0)
8290 (vec_select:V4QI (match_dup 1)
8291 (parallel [(const_int 1)
8296 (vec_select:V4QI (match_dup 2)
8297 (parallel [(const_int 1)
8300 (const_int 7)]))))))]
8302 "pmaddubsw\t{%2, %0|%0, %2}"
8303 [(set_attr "type" "sseiadd")
8304 (set_attr "atom_unit" "simul")
8305 (set_attr "prefix_extra" "1")
8306 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8307 (set_attr "mode" "DI")])
8309 (define_expand "ssse3_pmulhrswv8hi3"
8310 [(set (match_operand:V8HI 0 "register_operand" "")
8317 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8319 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8321 (const_vector:V8HI [(const_int 1) (const_int 1)
8322 (const_int 1) (const_int 1)
8323 (const_int 1) (const_int 1)
8324 (const_int 1) (const_int 1)]))
8327 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8329 (define_insn "*avx_pmulhrswv8hi3"
8330 [(set (match_operand:V8HI 0 "register_operand" "=x")
8337 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8339 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8341 (const_vector:V8HI [(const_int 1) (const_int 1)
8342 (const_int 1) (const_int 1)
8343 (const_int 1) (const_int 1)
8344 (const_int 1) (const_int 1)]))
8346 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8347 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8348 [(set_attr "type" "sseimul")
8349 (set_attr "prefix_extra" "1")
8350 (set_attr "prefix" "vex")
8351 (set_attr "mode" "TI")])
8353 (define_insn "*ssse3_pmulhrswv8hi3"
8354 [(set (match_operand:V8HI 0 "register_operand" "=x")
8361 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8363 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8365 (const_vector:V8HI [(const_int 1) (const_int 1)
8366 (const_int 1) (const_int 1)
8367 (const_int 1) (const_int 1)
8368 (const_int 1) (const_int 1)]))
8370 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8371 "pmulhrsw\t{%2, %0|%0, %2}"
8372 [(set_attr "type" "sseimul")
8373 (set_attr "prefix_data16" "1")
8374 (set_attr "prefix_extra" "1")
8375 (set_attr "mode" "TI")])
8377 (define_expand "ssse3_pmulhrswv4hi3"
8378 [(set (match_operand:V4HI 0 "register_operand" "")
8385 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8387 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8389 (const_vector:V4HI [(const_int 1) (const_int 1)
8390 (const_int 1) (const_int 1)]))
8393 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8395 (define_insn "*ssse3_pmulhrswv4hi3"
8396 [(set (match_operand:V4HI 0 "register_operand" "=y")
8403 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8405 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8407 (const_vector:V4HI [(const_int 1) (const_int 1)
8408 (const_int 1) (const_int 1)]))
8410 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8411 "pmulhrsw\t{%2, %0|%0, %2}"
8412 [(set_attr "type" "sseimul")
8413 (set_attr "prefix_extra" "1")
8414 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8415 (set_attr "mode" "DI")])
8417 (define_insn "*avx_pshufbv16qi3"
8418 [(set (match_operand:V16QI 0 "register_operand" "=x")
8419 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8420 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8423 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8424 [(set_attr "type" "sselog1")
8425 (set_attr "prefix_extra" "1")
8426 (set_attr "prefix" "vex")
8427 (set_attr "mode" "TI")])
8429 (define_insn "ssse3_pshufbv16qi3"
8430 [(set (match_operand:V16QI 0 "register_operand" "=x")
8431 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8432 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8435 "pshufb\t{%2, %0|%0, %2}";
8436 [(set_attr "type" "sselog1")
8437 (set_attr "prefix_data16" "1")
8438 (set_attr "prefix_extra" "1")
8439 (set_attr "mode" "TI")])
8441 (define_insn "ssse3_pshufbv8qi3"
8442 [(set (match_operand:V8QI 0 "register_operand" "=y")
8443 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8444 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8447 "pshufb\t{%2, %0|%0, %2}";
8448 [(set_attr "type" "sselog1")
8449 (set_attr "prefix_extra" "1")
8450 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8451 (set_attr "mode" "DI")])
8453 (define_insn "*avx_psign<mode>3"
8454 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8456 [(match_operand:SSEMODE124 1 "register_operand" "x")
8457 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8460 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8461 [(set_attr "type" "sselog1")
8462 (set_attr "prefix_extra" "1")
8463 (set_attr "prefix" "vex")
8464 (set_attr "mode" "TI")])
8466 (define_insn "ssse3_psign<mode>3"
8467 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8469 [(match_operand:SSEMODE124 1 "register_operand" "0")
8470 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8473 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8474 [(set_attr "type" "sselog1")
8475 (set_attr "prefix_data16" "1")
8476 (set_attr "prefix_extra" "1")
8477 (set_attr "mode" "TI")])
8479 (define_insn "ssse3_psign<mode>3"
8480 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8482 [(match_operand:MMXMODEI 1 "register_operand" "0")
8483 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8486 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8487 [(set_attr "type" "sselog1")
8488 (set_attr "prefix_extra" "1")
8489 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8490 (set_attr "mode" "DI")])
8492 (define_insn "*avx_palignrti"
8493 [(set (match_operand:TI 0 "register_operand" "=x")
8494 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8495 (match_operand:TI 2 "nonimmediate_operand" "xm")
8496 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8500 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8501 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8503 [(set_attr "type" "sseishft")
8504 (set_attr "prefix_extra" "1")
8505 (set_attr "length_immediate" "1")
8506 (set_attr "prefix" "vex")
8507 (set_attr "mode" "TI")])
8509 (define_insn "ssse3_palignrti"
8510 [(set (match_operand:TI 0 "register_operand" "=x")
8511 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8512 (match_operand:TI 2 "nonimmediate_operand" "xm")
8513 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8517 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8518 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8520 [(set_attr "type" "sseishft")
8521 (set_attr "atom_unit" "sishuf")
8522 (set_attr "prefix_data16" "1")
8523 (set_attr "prefix_extra" "1")
8524 (set_attr "length_immediate" "1")
8525 (set_attr "mode" "TI")])
8527 (define_insn "ssse3_palignrdi"
8528 [(set (match_operand:DI 0 "register_operand" "=y")
8529 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8530 (match_operand:DI 2 "nonimmediate_operand" "ym")
8531 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8535 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8536 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8538 [(set_attr "type" "sseishft")
8539 (set_attr "atom_unit" "sishuf")
8540 (set_attr "prefix_extra" "1")
8541 (set_attr "length_immediate" "1")
8542 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8543 (set_attr "mode" "DI")])
8545 (define_insn "abs<mode>2"
8546 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8547 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8549 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8550 [(set_attr "type" "sselog1")
8551 (set_attr "prefix_data16" "1")
8552 (set_attr "prefix_extra" "1")
8553 (set_attr "prefix" "maybe_vex")
8554 (set_attr "mode" "TI")])
8556 (define_insn "abs<mode>2"
8557 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8558 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8560 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8561 [(set_attr "type" "sselog1")
8562 (set_attr "prefix_rep" "0")
8563 (set_attr "prefix_extra" "1")
8564 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8565 (set_attr "mode" "DI")])
8567 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8569 ;; AMD SSE4A instructions
8571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8573 (define_insn "sse4a_movnt<mode>"
8574 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8576 [(match_operand:MODEF 1 "register_operand" "x")]
8579 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8580 [(set_attr "type" "ssemov")
8581 (set_attr "mode" "<MODE>")])
8583 (define_insn "sse4a_vmmovnt<mode>"
8584 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8585 (unspec:<ssescalarmode>
8586 [(vec_select:<ssescalarmode>
8587 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8588 (parallel [(const_int 0)]))]
8591 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
8592 [(set_attr "type" "ssemov")
8593 (set_attr "mode" "<ssescalarmode>")])
8595 (define_insn "sse4a_extrqi"
8596 [(set (match_operand:V2DI 0 "register_operand" "=x")
8597 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8598 (match_operand 2 "const_int_operand" "")
8599 (match_operand 3 "const_int_operand" "")]
8602 "extrq\t{%3, %2, %0|%0, %2, %3}"
8603 [(set_attr "type" "sse")
8604 (set_attr "prefix_data16" "1")
8605 (set_attr "length_immediate" "2")
8606 (set_attr "mode" "TI")])
8608 (define_insn "sse4a_extrq"
8609 [(set (match_operand:V2DI 0 "register_operand" "=x")
8610 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8611 (match_operand:V16QI 2 "register_operand" "x")]
8614 "extrq\t{%2, %0|%0, %2}"
8615 [(set_attr "type" "sse")
8616 (set_attr "prefix_data16" "1")
8617 (set_attr "mode" "TI")])
8619 (define_insn "sse4a_insertqi"
8620 [(set (match_operand:V2DI 0 "register_operand" "=x")
8621 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8622 (match_operand:V2DI 2 "register_operand" "x")
8623 (match_operand 3 "const_int_operand" "")
8624 (match_operand 4 "const_int_operand" "")]
8627 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8628 [(set_attr "type" "sseins")
8629 (set_attr "prefix_data16" "0")
8630 (set_attr "prefix_rep" "1")
8631 (set_attr "length_immediate" "2")
8632 (set_attr "mode" "TI")])
8634 (define_insn "sse4a_insertq"
8635 [(set (match_operand:V2DI 0 "register_operand" "=x")
8636 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8637 (match_operand:V2DI 2 "register_operand" "x")]
8640 "insertq\t{%2, %0|%0, %2}"
8641 [(set_attr "type" "sseins")
8642 (set_attr "prefix_data16" "0")
8643 (set_attr "prefix_rep" "1")
8644 (set_attr "mode" "TI")])
8646 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8648 ;; Intel SSE4.1 instructions
8650 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8652 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
8653 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8654 (vec_merge:AVXMODEF2P
8655 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8656 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8657 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8659 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8660 [(set_attr "type" "ssemov")
8661 (set_attr "prefix_extra" "1")
8662 (set_attr "length_immediate" "1")
8663 (set_attr "prefix" "vex")
8664 (set_attr "mode" "<avxvecmode>")])
8666 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
8667 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8669 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8670 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8671 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8674 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8675 [(set_attr "type" "ssemov")
8676 (set_attr "prefix_extra" "1")
8677 (set_attr "length_immediate" "1")
8678 (set_attr "prefix" "vex")
8679 (set_attr "mode" "<avxvecmode>")])
8681 (define_insn "sse4_1_blend<ssemodesuffix>"
8682 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8683 (vec_merge:SSEMODEF2P
8684 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8685 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8686 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8688 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
8689 [(set_attr "type" "ssemov")
8690 (set_attr "prefix_data16" "1")
8691 (set_attr "prefix_extra" "1")
8692 (set_attr "length_immediate" "1")
8693 (set_attr "mode" "<MODE>")])
8695 (define_insn "sse4_1_blendv<ssemodesuffix>"
8696 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8698 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8699 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8700 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8703 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
8704 [(set_attr "type" "ssemov")
8705 (set_attr "prefix_data16" "1")
8706 (set_attr "prefix_extra" "1")
8707 (set_attr "mode" "<MODE>")])
8709 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
8710 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8712 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8713 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8714 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8717 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8718 [(set_attr "type" "ssemul")
8719 (set_attr "prefix" "vex")
8720 (set_attr "prefix_extra" "1")
8721 (set_attr "length_immediate" "1")
8722 (set_attr "mode" "<avxvecmode>")])
8724 (define_insn "sse4_1_dp<ssemodesuffix>"
8725 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8727 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8728 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8729 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8732 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
8733 [(set_attr "type" "ssemul")
8734 (set_attr "prefix_data16" "1")
8735 (set_attr "prefix_extra" "1")
8736 (set_attr "length_immediate" "1")
8737 (set_attr "mode" "<MODE>")])
8739 (define_insn "sse4_1_movntdqa"
8740 [(set (match_operand:V2DI 0 "register_operand" "=x")
8741 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8744 "%vmovntdqa\t{%1, %0|%0, %1}"
8745 [(set_attr "type" "ssemov")
8746 (set_attr "prefix_extra" "1")
8747 (set_attr "prefix" "maybe_vex")
8748 (set_attr "mode" "TI")])
8750 (define_insn "*avx_mpsadbw"
8751 [(set (match_operand:V16QI 0 "register_operand" "=x")
8752 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8753 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8754 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8757 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8758 [(set_attr "type" "sselog1")
8759 (set_attr "prefix" "vex")
8760 (set_attr "prefix_extra" "1")
8761 (set_attr "length_immediate" "1")
8762 (set_attr "mode" "TI")])
8764 (define_insn "sse4_1_mpsadbw"
8765 [(set (match_operand:V16QI 0 "register_operand" "=x")
8766 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8767 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8768 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8771 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
8772 [(set_attr "type" "sselog1")
8773 (set_attr "prefix_extra" "1")
8774 (set_attr "length_immediate" "1")
8775 (set_attr "mode" "TI")])
8777 (define_insn "*avx_packusdw"
8778 [(set (match_operand:V8HI 0 "register_operand" "=x")
8781 (match_operand:V4SI 1 "register_operand" "x"))
8783 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
8785 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8786 [(set_attr "type" "sselog")
8787 (set_attr "prefix_extra" "1")
8788 (set_attr "prefix" "vex")
8789 (set_attr "mode" "TI")])
8791 (define_insn "sse4_1_packusdw"
8792 [(set (match_operand:V8HI 0 "register_operand" "=x")
8795 (match_operand:V4SI 1 "register_operand" "0"))
8797 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
8799 "packusdw\t{%2, %0|%0, %2}"
8800 [(set_attr "type" "sselog")
8801 (set_attr "prefix_extra" "1")
8802 (set_attr "mode" "TI")])
8804 (define_insn "*avx_pblendvb"
8805 [(set (match_operand:V16QI 0 "register_operand" "=x")
8806 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8807 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8808 (match_operand:V16QI 3 "register_operand" "x")]
8811 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8812 [(set_attr "type" "ssemov")
8813 (set_attr "prefix_extra" "1")
8814 (set_attr "length_immediate" "1")
8815 (set_attr "prefix" "vex")
8816 (set_attr "mode" "TI")])
8818 (define_insn "sse4_1_pblendvb"
8819 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
8820 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
8821 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
8822 (match_operand:V16QI 3 "register_operand" "Yz")]
8825 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
8826 [(set_attr "type" "ssemov")
8827 (set_attr "prefix_extra" "1")
8828 (set_attr "mode" "TI")])
8830 (define_insn "*avx_pblendw"
8831 [(set (match_operand:V8HI 0 "register_operand" "=x")
8833 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8834 (match_operand:V8HI 1 "register_operand" "x")
8835 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8837 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8838 [(set_attr "type" "ssemov")
8839 (set_attr "prefix" "vex")
8840 (set_attr "prefix_extra" "1")
8841 (set_attr "length_immediate" "1")
8842 (set_attr "mode" "TI")])
8844 (define_insn "sse4_1_pblendw"
8845 [(set (match_operand:V8HI 0 "register_operand" "=x")
8847 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8848 (match_operand:V8HI 1 "register_operand" "0")
8849 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8851 "pblendw\t{%3, %2, %0|%0, %2, %3}"
8852 [(set_attr "type" "ssemov")
8853 (set_attr "prefix_extra" "1")
8854 (set_attr "length_immediate" "1")
8855 (set_attr "mode" "TI")])
8857 (define_insn "sse4_1_phminposuw"
8858 [(set (match_operand:V8HI 0 "register_operand" "=x")
8859 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8860 UNSPEC_PHMINPOSUW))]
8862 "%vphminposuw\t{%1, %0|%0, %1}"
8863 [(set_attr "type" "sselog1")
8864 (set_attr "prefix_extra" "1")
8865 (set_attr "prefix" "maybe_vex")
8866 (set_attr "mode" "TI")])
8868 (define_insn "sse4_1_<code>v8qiv8hi2"
8869 [(set (match_operand:V8HI 0 "register_operand" "=x")
8872 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8873 (parallel [(const_int 0)
8882 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8883 [(set_attr "type" "ssemov")
8884 (set_attr "prefix_extra" "1")
8885 (set_attr "prefix" "maybe_vex")
8886 (set_attr "mode" "TI")])
8888 (define_insn "sse4_1_<code>v4qiv4si2"
8889 [(set (match_operand:V4SI 0 "register_operand" "=x")
8892 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8893 (parallel [(const_int 0)
8898 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
8899 [(set_attr "type" "ssemov")
8900 (set_attr "prefix_extra" "1")
8901 (set_attr "prefix" "maybe_vex")
8902 (set_attr "mode" "TI")])
8904 (define_insn "sse4_1_<code>v4hiv4si2"
8905 [(set (match_operand:V4SI 0 "register_operand" "=x")
8908 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8909 (parallel [(const_int 0)
8914 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8915 [(set_attr "type" "ssemov")
8916 (set_attr "prefix_extra" "1")
8917 (set_attr "prefix" "maybe_vex")
8918 (set_attr "mode" "TI")])
8920 (define_insn "sse4_1_<code>v2qiv2di2"
8921 [(set (match_operand:V2DI 0 "register_operand" "=x")
8924 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8925 (parallel [(const_int 0)
8928 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
8929 [(set_attr "type" "ssemov")
8930 (set_attr "prefix_extra" "1")
8931 (set_attr "prefix" "maybe_vex")
8932 (set_attr "mode" "TI")])
8934 (define_insn "sse4_1_<code>v2hiv2di2"
8935 [(set (match_operand:V2DI 0 "register_operand" "=x")
8938 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8939 (parallel [(const_int 0)
8942 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
8943 [(set_attr "type" "ssemov")
8944 (set_attr "prefix_extra" "1")
8945 (set_attr "prefix" "maybe_vex")
8946 (set_attr "mode" "TI")])
8948 (define_insn "sse4_1_<code>v2siv2di2"
8949 [(set (match_operand:V2DI 0 "register_operand" "=x")
8952 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8953 (parallel [(const_int 0)
8956 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8957 [(set_attr "type" "ssemov")
8958 (set_attr "prefix_extra" "1")
8959 (set_attr "prefix" "maybe_vex")
8960 (set_attr "mode" "TI")])
8962 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8963 ;; setting FLAGS_REG. But it is not a really compare instruction.
8964 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
8965 [(set (reg:CC FLAGS_REG)
8966 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
8967 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
8970 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8971 [(set_attr "type" "ssecomi")
8972 (set_attr "prefix_extra" "1")
8973 (set_attr "prefix" "vex")
8974 (set_attr "mode" "<MODE>")])
8976 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8977 ;; But it is not a really compare instruction.
8978 (define_insn "avx_ptest256"
8979 [(set (reg:CC FLAGS_REG)
8980 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8981 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8984 "vptest\t{%1, %0|%0, %1}"
8985 [(set_attr "type" "ssecomi")
8986 (set_attr "prefix_extra" "1")
8987 (set_attr "prefix" "vex")
8988 (set_attr "mode" "OI")])
8990 (define_insn "sse4_1_ptest"
8991 [(set (reg:CC FLAGS_REG)
8992 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8993 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8996 "%vptest\t{%1, %0|%0, %1}"
8997 [(set_attr "type" "ssecomi")
8998 (set_attr "prefix_extra" "1")
8999 (set_attr "prefix" "maybe_vex")
9000 (set_attr "mode" "TI")])
9002 (define_insn "avx_round<ssemodesuffix>256"
9003 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9004 (unspec:AVX256MODEF2P
9005 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9006 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9009 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9010 [(set_attr "type" "ssecvt")
9011 (set_attr "prefix_extra" "1")
9012 (set_attr "length_immediate" "1")
9013 (set_attr "prefix" "vex")
9014 (set_attr "mode" "<MODE>")])
9016 (define_insn "sse4_1_round<ssemodesuffix>"
9017 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9019 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9020 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9023 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9024 [(set_attr "type" "ssecvt")
9025 (set_attr "prefix_data16" "1")
9026 (set_attr "prefix_extra" "1")
9027 (set_attr "length_immediate" "1")
9028 (set_attr "prefix" "maybe_vex")
9029 (set_attr "mode" "<MODE>")])
9031 (define_insn "*avx_round<ssescalarmodesuffix>"
9032 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9033 (vec_merge:SSEMODEF2P
9035 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9036 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9038 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9041 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9042 [(set_attr "type" "ssecvt")
9043 (set_attr "prefix_extra" "1")
9044 (set_attr "length_immediate" "1")
9045 (set_attr "prefix" "vex")
9046 (set_attr "mode" "<MODE>")])
9048 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9049 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9050 (vec_merge:SSEMODEF2P
9052 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9053 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9055 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9058 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9059 [(set_attr "type" "ssecvt")
9060 (set_attr "prefix_data16" "1")
9061 (set_attr "prefix_extra" "1")
9062 (set_attr "length_immediate" "1")
9063 (set_attr "mode" "<MODE>")])
9065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9067 ;; Intel SSE4.2 string/text processing instructions
9069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9071 (define_insn_and_split "sse4_2_pcmpestr"
9072 [(set (match_operand:SI 0 "register_operand" "=c,c")
9074 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9075 (match_operand:SI 3 "register_operand" "a,a")
9076 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9077 (match_operand:SI 5 "register_operand" "d,d")
9078 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9080 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9088 (set (reg:CC FLAGS_REG)
9097 && can_create_pseudo_p ()"
9102 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9103 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9104 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9107 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9108 operands[3], operands[4],
9109 operands[5], operands[6]));
9111 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9112 operands[3], operands[4],
9113 operands[5], operands[6]));
9114 if (flags && !(ecx || xmm0))
9115 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9116 operands[2], operands[3],
9117 operands[4], operands[5],
9121 [(set_attr "type" "sselog")
9122 (set_attr "prefix_data16" "1")
9123 (set_attr "prefix_extra" "1")
9124 (set_attr "length_immediate" "1")
9125 (set_attr "memory" "none,load")
9126 (set_attr "mode" "TI")])
9128 (define_insn "sse4_2_pcmpestri"
9129 [(set (match_operand:SI 0 "register_operand" "=c,c")
9131 [(match_operand:V16QI 1 "register_operand" "x,x")
9132 (match_operand:SI 2 "register_operand" "a,a")
9133 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9134 (match_operand:SI 4 "register_operand" "d,d")
9135 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9137 (set (reg:CC FLAGS_REG)
9146 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9147 [(set_attr "type" "sselog")
9148 (set_attr "prefix_data16" "1")
9149 (set_attr "prefix_extra" "1")
9150 (set_attr "prefix" "maybe_vex")
9151 (set_attr "length_immediate" "1")
9152 (set_attr "memory" "none,load")
9153 (set_attr "mode" "TI")])
9155 (define_insn "sse4_2_pcmpestrm"
9156 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9158 [(match_operand:V16QI 1 "register_operand" "x,x")
9159 (match_operand:SI 2 "register_operand" "a,a")
9160 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9161 (match_operand:SI 4 "register_operand" "d,d")
9162 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9164 (set (reg:CC FLAGS_REG)
9173 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9174 [(set_attr "type" "sselog")
9175 (set_attr "prefix_data16" "1")
9176 (set_attr "prefix_extra" "1")
9177 (set_attr "length_immediate" "1")
9178 (set_attr "prefix" "maybe_vex")
9179 (set_attr "memory" "none,load")
9180 (set_attr "mode" "TI")])
9182 (define_insn "sse4_2_pcmpestr_cconly"
9183 [(set (reg:CC FLAGS_REG)
9185 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9186 (match_operand:SI 3 "register_operand" "a,a,a,a")
9187 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9188 (match_operand:SI 5 "register_operand" "d,d,d,d")
9189 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9191 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9192 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9195 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9196 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9197 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9198 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9199 [(set_attr "type" "sselog")
9200 (set_attr "prefix_data16" "1")
9201 (set_attr "prefix_extra" "1")
9202 (set_attr "length_immediate" "1")
9203 (set_attr "memory" "none,load,none,load")
9204 (set_attr "prefix" "maybe_vex")
9205 (set_attr "mode" "TI")])
9207 (define_insn_and_split "sse4_2_pcmpistr"
9208 [(set (match_operand:SI 0 "register_operand" "=c,c")
9210 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9211 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9212 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9214 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9220 (set (reg:CC FLAGS_REG)
9227 && can_create_pseudo_p ()"
9232 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9233 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9234 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9237 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9238 operands[3], operands[4]));
9240 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9241 operands[3], operands[4]));
9242 if (flags && !(ecx || xmm0))
9243 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9244 operands[2], operands[3],
9248 [(set_attr "type" "sselog")
9249 (set_attr "prefix_data16" "1")
9250 (set_attr "prefix_extra" "1")
9251 (set_attr "length_immediate" "1")
9252 (set_attr "memory" "none,load")
9253 (set_attr "mode" "TI")])
9255 (define_insn "sse4_2_pcmpistri"
9256 [(set (match_operand:SI 0 "register_operand" "=c,c")
9258 [(match_operand:V16QI 1 "register_operand" "x,x")
9259 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9260 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9262 (set (reg:CC FLAGS_REG)
9269 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9270 [(set_attr "type" "sselog")
9271 (set_attr "prefix_data16" "1")
9272 (set_attr "prefix_extra" "1")
9273 (set_attr "length_immediate" "1")
9274 (set_attr "prefix" "maybe_vex")
9275 (set_attr "memory" "none,load")
9276 (set_attr "mode" "TI")])
9278 (define_insn "sse4_2_pcmpistrm"
9279 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9281 [(match_operand:V16QI 1 "register_operand" "x,x")
9282 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9283 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9285 (set (reg:CC FLAGS_REG)
9292 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9293 [(set_attr "type" "sselog")
9294 (set_attr "prefix_data16" "1")
9295 (set_attr "prefix_extra" "1")
9296 (set_attr "length_immediate" "1")
9297 (set_attr "prefix" "maybe_vex")
9298 (set_attr "memory" "none,load")
9299 (set_attr "mode" "TI")])
9301 (define_insn "sse4_2_pcmpistr_cconly"
9302 [(set (reg:CC FLAGS_REG)
9304 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9305 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9306 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9308 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9309 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9312 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9313 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9314 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9315 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9316 [(set_attr "type" "sselog")
9317 (set_attr "prefix_data16" "1")
9318 (set_attr "prefix_extra" "1")
9319 (set_attr "length_immediate" "1")
9320 (set_attr "memory" "none,load,none,load")
9321 (set_attr "prefix" "maybe_vex")
9322 (set_attr "mode" "TI")])
9324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9330 ;; XOP parallel integer multiply/add instructions.
9331 ;; Note the XOP multiply/add instructions
9332 ;; a[i] = b[i] * c[i] + d[i];
9333 ;; do not allow the value being added to be a memory operation.
9334 (define_insn "xop_pmacsww"
9335 [(set (match_operand:V8HI 0 "register_operand" "=x")
9338 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9339 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
9340 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
9342 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9343 [(set_attr "type" "ssemuladd")
9344 (set_attr "mode" "TI")])
9346 (define_insn "xop_pmacssww"
9347 [(set (match_operand:V8HI 0 "register_operand" "=x")
9349 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9350 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
9351 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
9353 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9354 [(set_attr "type" "ssemuladd")
9355 (set_attr "mode" "TI")])
9357 (define_insn "xop_pmacsdd"
9358 [(set (match_operand:V4SI 0 "register_operand" "=x")
9361 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9362 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
9363 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9365 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9366 [(set_attr "type" "ssemuladd")
9367 (set_attr "mode" "TI")])
9369 (define_insn "xop_pmacssdd"
9370 [(set (match_operand:V4SI 0 "register_operand" "=x")
9372 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9373 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
9374 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9376 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9377 [(set_attr "type" "ssemuladd")
9378 (set_attr "mode" "TI")])
9380 (define_insn "xop_pmacssdql"
9381 [(set (match_operand:V2DI 0 "register_operand" "=x")
9386 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9387 (parallel [(const_int 1)
9390 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9391 (parallel [(const_int 1)
9393 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9395 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9396 [(set_attr "type" "ssemuladd")
9397 (set_attr "mode" "TI")])
9399 (define_insn "xop_pmacssdqh"
9400 [(set (match_operand:V2DI 0 "register_operand" "=x")
9405 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9406 (parallel [(const_int 0)
9410 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9411 (parallel [(const_int 0)
9413 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9415 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9416 [(set_attr "type" "ssemuladd")
9417 (set_attr "mode" "TI")])
9419 (define_insn "xop_pmacsdql"
9420 [(set (match_operand:V2DI 0 "register_operand" "=x")
9425 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9426 (parallel [(const_int 1)
9430 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9431 (parallel [(const_int 1)
9433 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9435 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9436 [(set_attr "type" "ssemuladd")
9437 (set_attr "mode" "TI")])
9439 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
9440 ;; fake it with a multiply/add. In general, we expect the define_split to
9441 ;; occur before register allocation, so we have to handle the corner case where
9442 ;; the target is the same as operands 1/2
9443 (define_insn_and_split "xop_mulv2div2di3_low"
9444 [(set (match_operand:V2DI 0 "register_operand" "=&x")
9448 (match_operand:V4SI 1 "register_operand" "%x")
9449 (parallel [(const_int 1)
9453 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9454 (parallel [(const_int 1)
9455 (const_int 3)])))))]
9458 "&& reload_completed"
9467 (parallel [(const_int 1)
9472 (parallel [(const_int 1)
9476 operands[3] = CONST0_RTX (V2DImode);
9478 [(set_attr "type" "ssemul")
9479 (set_attr "mode" "TI")])
9481 (define_insn "xop_pmacsdqh"
9482 [(set (match_operand:V2DI 0 "register_operand" "=x")
9487 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9488 (parallel [(const_int 0)
9492 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9493 (parallel [(const_int 0)
9495 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9497 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9498 [(set_attr "type" "ssemuladd")
9499 (set_attr "mode" "TI")])
9501 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
9502 ;; fake it with a multiply/add. In general, we expect the define_split to
9503 ;; occur before register allocation, so we have to handle the corner case where
9504 ;; the target is the same as either operands[1] or operands[2]
9505 (define_insn_and_split "xop_mulv2div2di3_high"
9506 [(set (match_operand:V2DI 0 "register_operand" "=&x")
9510 (match_operand:V4SI 1 "register_operand" "%x")
9511 (parallel [(const_int 0)
9515 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9516 (parallel [(const_int 0)
9517 (const_int 2)])))))]
9520 "&& reload_completed"
9529 (parallel [(const_int 0)
9534 (parallel [(const_int 0)
9538 operands[3] = CONST0_RTX (V2DImode);
9540 [(set_attr "type" "ssemul")
9541 (set_attr "mode" "TI")])
9543 ;; XOP parallel integer multiply/add instructions for the intrinisics
9544 (define_insn "xop_pmacsswd"
9545 [(set (match_operand:V4SI 0 "register_operand" "=x")
9550 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9551 (parallel [(const_int 1)
9557 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9558 (parallel [(const_int 1)
9562 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9564 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9565 [(set_attr "type" "ssemuladd")
9566 (set_attr "mode" "TI")])
9568 (define_insn "xop_pmacswd"
9569 [(set (match_operand:V4SI 0 "register_operand" "=x")
9574 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9575 (parallel [(const_int 1)
9581 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9582 (parallel [(const_int 1)
9586 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9588 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9589 [(set_attr "type" "ssemuladd")
9590 (set_attr "mode" "TI")])
9592 (define_insn "xop_pmadcsswd"
9593 [(set (match_operand:V4SI 0 "register_operand" "=x")
9599 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9600 (parallel [(const_int 0)
9606 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9607 (parallel [(const_int 0)
9615 (parallel [(const_int 1)
9622 (parallel [(const_int 1)
9626 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9628 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9629 [(set_attr "type" "ssemuladd")
9630 (set_attr "mode" "TI")])
9632 (define_insn "xop_pmadcswd"
9633 [(set (match_operand:V4SI 0 "register_operand" "=x")
9639 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9640 (parallel [(const_int 0)
9646 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9647 (parallel [(const_int 0)
9655 (parallel [(const_int 1)
9662 (parallel [(const_int 1)
9666 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9668 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9669 [(set_attr "type" "ssemuladd")
9670 (set_attr "mode" "TI")])
9672 ;; XOP parallel XMM conditional moves
9673 (define_insn "xop_pcmov_<mode>"
9674 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
9675 (if_then_else:SSEMODE
9676 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
9677 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
9678 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
9680 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9681 [(set_attr "type" "sse4arg")])
9683 (define_insn "xop_pcmov_<mode>256"
9684 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
9685 (if_then_else:AVX256MODE
9686 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
9687 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
9688 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
9690 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9691 [(set_attr "type" "sse4arg")])
9693 ;; XOP horizontal add/subtract instructions
9694 (define_insn "xop_phaddbw"
9695 [(set (match_operand:V8HI 0 "register_operand" "=x")
9699 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9700 (parallel [(const_int 0)
9711 (parallel [(const_int 1)
9718 (const_int 15)])))))]
9720 "vphaddbw\t{%1, %0|%0, %1}"
9721 [(set_attr "type" "sseiadd1")])
9723 (define_insn "xop_phaddbd"
9724 [(set (match_operand:V4SI 0 "register_operand" "=x")
9729 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9730 (parallel [(const_int 0)
9737 (parallel [(const_int 1)
9745 (parallel [(const_int 2)
9752 (parallel [(const_int 3)
9755 (const_int 15)]))))))]
9757 "vphaddbd\t{%1, %0|%0, %1}"
9758 [(set_attr "type" "sseiadd1")])
9760 (define_insn "xop_phaddbq"
9761 [(set (match_operand:V2DI 0 "register_operand" "=x")
9767 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9768 (parallel [(const_int 0)
9773 (parallel [(const_int 1)
9779 (parallel [(const_int 2)
9784 (parallel [(const_int 3)
9791 (parallel [(const_int 8)
9796 (parallel [(const_int 9)
9802 (parallel [(const_int 10)
9807 (parallel [(const_int 11)
9808 (const_int 15)])))))))]
9810 "vphaddbq\t{%1, %0|%0, %1}"
9811 [(set_attr "type" "sseiadd1")])
9813 (define_insn "xop_phaddwd"
9814 [(set (match_operand:V4SI 0 "register_operand" "=x")
9818 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9819 (parallel [(const_int 0)
9826 (parallel [(const_int 1)
9829 (const_int 7)])))))]
9831 "vphaddwd\t{%1, %0|%0, %1}"
9832 [(set_attr "type" "sseiadd1")])
9834 (define_insn "xop_phaddwq"
9835 [(set (match_operand:V2DI 0 "register_operand" "=x")
9840 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9841 (parallel [(const_int 0)
9846 (parallel [(const_int 1)
9852 (parallel [(const_int 2)
9857 (parallel [(const_int 3)
9858 (const_int 7)]))))))]
9860 "vphaddwq\t{%1, %0|%0, %1}"
9861 [(set_attr "type" "sseiadd1")])
9863 (define_insn "xop_phadddq"
9864 [(set (match_operand:V2DI 0 "register_operand" "=x")
9868 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9869 (parallel [(const_int 0)
9874 (parallel [(const_int 1)
9875 (const_int 3)])))))]
9877 "vphadddq\t{%1, %0|%0, %1}"
9878 [(set_attr "type" "sseiadd1")])
9880 (define_insn "xop_phaddubw"
9881 [(set (match_operand:V8HI 0 "register_operand" "=x")
9885 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9886 (parallel [(const_int 0)
9897 (parallel [(const_int 1)
9904 (const_int 15)])))))]
9906 "vphaddubw\t{%1, %0|%0, %1}"
9907 [(set_attr "type" "sseiadd1")])
9909 (define_insn "xop_phaddubd"
9910 [(set (match_operand:V4SI 0 "register_operand" "=x")
9915 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9916 (parallel [(const_int 0)
9923 (parallel [(const_int 1)
9931 (parallel [(const_int 2)
9938 (parallel [(const_int 3)
9941 (const_int 15)]))))))]
9943 "vphaddubd\t{%1, %0|%0, %1}"
9944 [(set_attr "type" "sseiadd1")])
9946 (define_insn "xop_phaddubq"
9947 [(set (match_operand:V2DI 0 "register_operand" "=x")
9953 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9954 (parallel [(const_int 0)
9959 (parallel [(const_int 1)
9965 (parallel [(const_int 2)
9970 (parallel [(const_int 3)
9977 (parallel [(const_int 8)
9982 (parallel [(const_int 9)
9988 (parallel [(const_int 10)
9993 (parallel [(const_int 11)
9994 (const_int 15)])))))))]
9996 "vphaddubq\t{%1, %0|%0, %1}"
9997 [(set_attr "type" "sseiadd1")])
9999 (define_insn "xop_phadduwd"
10000 [(set (match_operand:V4SI 0 "register_operand" "=x")
10004 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10005 (parallel [(const_int 0)
10012 (parallel [(const_int 1)
10015 (const_int 7)])))))]
10017 "vphadduwd\t{%1, %0|%0, %1}"
10018 [(set_attr "type" "sseiadd1")])
10020 (define_insn "xop_phadduwq"
10021 [(set (match_operand:V2DI 0 "register_operand" "=x")
10026 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10027 (parallel [(const_int 0)
10032 (parallel [(const_int 1)
10038 (parallel [(const_int 2)
10043 (parallel [(const_int 3)
10044 (const_int 7)]))))))]
10046 "vphadduwq\t{%1, %0|%0, %1}"
10047 [(set_attr "type" "sseiadd1")])
10049 (define_insn "xop_phaddudq"
10050 [(set (match_operand:V2DI 0 "register_operand" "=x")
10054 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10055 (parallel [(const_int 0)
10060 (parallel [(const_int 1)
10061 (const_int 3)])))))]
10063 "vphaddudq\t{%1, %0|%0, %1}"
10064 [(set_attr "type" "sseiadd1")])
10066 (define_insn "xop_phsubbw"
10067 [(set (match_operand:V8HI 0 "register_operand" "=x")
10071 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10072 (parallel [(const_int 0)
10083 (parallel [(const_int 1)
10090 (const_int 15)])))))]
10092 "vphsubbw\t{%1, %0|%0, %1}"
10093 [(set_attr "type" "sseiadd1")])
10095 (define_insn "xop_phsubwd"
10096 [(set (match_operand:V4SI 0 "register_operand" "=x")
10100 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10101 (parallel [(const_int 0)
10108 (parallel [(const_int 1)
10111 (const_int 7)])))))]
10113 "vphsubwd\t{%1, %0|%0, %1}"
10114 [(set_attr "type" "sseiadd1")])
10116 (define_insn "xop_phsubdq"
10117 [(set (match_operand:V2DI 0 "register_operand" "=x")
10121 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10122 (parallel [(const_int 0)
10127 (parallel [(const_int 1)
10128 (const_int 3)])))))]
10130 "vphsubdq\t{%1, %0|%0, %1}"
10131 [(set_attr "type" "sseiadd1")])
10133 ;; XOP permute instructions
10134 (define_insn "xop_pperm"
10135 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10137 [(match_operand:V16QI 1 "register_operand" "x,x")
10138 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10139 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10140 UNSPEC_XOP_PERMUTE))]
10141 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10142 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10143 [(set_attr "type" "sse4arg")
10144 (set_attr "mode" "TI")])
10146 ;; XOP pack instructions that combine two vectors into a smaller vector
10147 (define_insn "xop_pperm_pack_v2di_v4si"
10148 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10151 (match_operand:V2DI 1 "register_operand" "x,x"))
10153 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10154 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10155 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10156 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10157 [(set_attr "type" "sse4arg")
10158 (set_attr "mode" "TI")])
10160 (define_insn "xop_pperm_pack_v4si_v8hi"
10161 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10164 (match_operand:V4SI 1 "register_operand" "x,x"))
10166 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10167 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10168 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10169 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10170 [(set_attr "type" "sse4arg")
10171 (set_attr "mode" "TI")])
10173 (define_insn "xop_pperm_pack_v8hi_v16qi"
10174 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10177 (match_operand:V8HI 1 "register_operand" "x,x"))
10179 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10180 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10181 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10182 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10183 [(set_attr "type" "sse4arg")
10184 (set_attr "mode" "TI")])
10186 ;; XOP packed rotate instructions
10187 (define_expand "rotl<mode>3"
10188 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10189 (rotate:SSEMODE1248
10190 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10191 (match_operand:SI 2 "general_operand")))]
10194 /* If we were given a scalar, convert it to parallel */
10195 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10197 rtvec vs = rtvec_alloc (<ssescalarnum>);
10198 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10199 rtx reg = gen_reg_rtx (<MODE>mode);
10200 rtx op2 = operands[2];
10203 if (GET_MODE (op2) != <ssescalarmode>mode)
10205 op2 = gen_reg_rtx (<ssescalarmode>mode);
10206 convert_move (op2, operands[2], false);
10209 for (i = 0; i < <ssescalarnum>; i++)
10210 RTVEC_ELT (vs, i) = op2;
10212 emit_insn (gen_vec_init<mode> (reg, par));
10213 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10218 (define_expand "rotr<mode>3"
10219 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10220 (rotatert:SSEMODE1248
10221 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10222 (match_operand:SI 2 "general_operand")))]
10225 /* If we were given a scalar, convert it to parallel */
10226 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10228 rtvec vs = rtvec_alloc (<ssescalarnum>);
10229 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10230 rtx neg = gen_reg_rtx (<MODE>mode);
10231 rtx reg = gen_reg_rtx (<MODE>mode);
10232 rtx op2 = operands[2];
10235 if (GET_MODE (op2) != <ssescalarmode>mode)
10237 op2 = gen_reg_rtx (<ssescalarmode>mode);
10238 convert_move (op2, operands[2], false);
10241 for (i = 0; i < <ssescalarnum>; i++)
10242 RTVEC_ELT (vs, i) = op2;
10244 emit_insn (gen_vec_init<mode> (reg, par));
10245 emit_insn (gen_neg<mode>2 (neg, reg));
10246 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10251 (define_insn "xop_rotl<mode>3"
10252 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10253 (rotate:SSEMODE1248
10254 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10255 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10257 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10258 [(set_attr "type" "sseishft")
10259 (set_attr "length_immediate" "1")
10260 (set_attr "mode" "TI")])
10262 (define_insn "xop_rotr<mode>3"
10263 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10264 (rotatert:SSEMODE1248
10265 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10266 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10269 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10270 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10272 [(set_attr "type" "sseishft")
10273 (set_attr "length_immediate" "1")
10274 (set_attr "mode" "TI")])
10276 (define_expand "vrotr<mode>3"
10277 [(match_operand:SSEMODE1248 0 "register_operand" "")
10278 (match_operand:SSEMODE1248 1 "register_operand" "")
10279 (match_operand:SSEMODE1248 2 "register_operand" "")]
10282 rtx reg = gen_reg_rtx (<MODE>mode);
10283 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10284 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10288 (define_expand "vrotl<mode>3"
10289 [(match_operand:SSEMODE1248 0 "register_operand" "")
10290 (match_operand:SSEMODE1248 1 "register_operand" "")
10291 (match_operand:SSEMODE1248 2 "register_operand" "")]
10294 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10298 (define_insn "xop_vrotl<mode>3"
10299 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10300 (if_then_else:SSEMODE1248
10302 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10304 (rotate:SSEMODE1248
10305 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10307 (rotatert:SSEMODE1248
10309 (neg:SSEMODE1248 (match_dup 2)))))]
10310 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10311 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10312 [(set_attr "type" "sseishft")
10313 (set_attr "prefix_data16" "0")
10314 (set_attr "prefix_extra" "2")
10315 (set_attr "mode" "TI")])
10317 ;; XOP packed shift instructions.
10318 ;; FIXME: add V2DI back in
10319 (define_expand "vlshr<mode>3"
10320 [(match_operand:SSEMODE124 0 "register_operand" "")
10321 (match_operand:SSEMODE124 1 "register_operand" "")
10322 (match_operand:SSEMODE124 2 "register_operand" "")]
10325 rtx neg = gen_reg_rtx (<MODE>mode);
10326 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10327 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
10331 (define_expand "vashr<mode>3"
10332 [(match_operand:SSEMODE124 0 "register_operand" "")
10333 (match_operand:SSEMODE124 1 "register_operand" "")
10334 (match_operand:SSEMODE124 2 "register_operand" "")]
10337 rtx neg = gen_reg_rtx (<MODE>mode);
10338 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10339 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
10343 (define_expand "vashl<mode>3"
10344 [(match_operand:SSEMODE124 0 "register_operand" "")
10345 (match_operand:SSEMODE124 1 "register_operand" "")
10346 (match_operand:SSEMODE124 2 "register_operand" "")]
10349 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
10353 (define_insn "xop_ashl<mode>3"
10354 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10355 (if_then_else:SSEMODE1248
10357 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10359 (ashift:SSEMODE1248
10360 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10362 (ashiftrt:SSEMODE1248
10364 (neg:SSEMODE1248 (match_dup 2)))))]
10365 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10366 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10367 [(set_attr "type" "sseishft")
10368 (set_attr "prefix_data16" "0")
10369 (set_attr "prefix_extra" "2")
10370 (set_attr "mode" "TI")])
10372 (define_insn "xop_lshl<mode>3"
10373 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10374 (if_then_else:SSEMODE1248
10376 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10378 (ashift:SSEMODE1248
10379 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10381 (lshiftrt:SSEMODE1248
10383 (neg:SSEMODE1248 (match_dup 2)))))]
10384 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10385 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10386 [(set_attr "type" "sseishft")
10387 (set_attr "prefix_data16" "0")
10388 (set_attr "prefix_extra" "2")
10389 (set_attr "mode" "TI")])
10391 ;; SSE2 doesn't have some shift varients, so define versions for XOP
10392 (define_expand "ashlv16qi3"
10393 [(match_operand:V16QI 0 "register_operand" "")
10394 (match_operand:V16QI 1 "register_operand" "")
10395 (match_operand:SI 2 "nonmemory_operand" "")]
10398 rtvec vs = rtvec_alloc (16);
10399 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10400 rtx reg = gen_reg_rtx (V16QImode);
10402 for (i = 0; i < 16; i++)
10403 RTVEC_ELT (vs, i) = operands[2];
10405 emit_insn (gen_vec_initv16qi (reg, par));
10406 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
10410 (define_expand "lshlv16qi3"
10411 [(match_operand:V16QI 0 "register_operand" "")
10412 (match_operand:V16QI 1 "register_operand" "")
10413 (match_operand:SI 2 "nonmemory_operand" "")]
10416 rtvec vs = rtvec_alloc (16);
10417 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10418 rtx reg = gen_reg_rtx (V16QImode);
10420 for (i = 0; i < 16; i++)
10421 RTVEC_ELT (vs, i) = operands[2];
10423 emit_insn (gen_vec_initv16qi (reg, par));
10424 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
10428 (define_expand "ashrv16qi3"
10429 [(match_operand:V16QI 0 "register_operand" "")
10430 (match_operand:V16QI 1 "register_operand" "")
10431 (match_operand:SI 2 "nonmemory_operand" "")]
10434 rtvec vs = rtvec_alloc (16);
10435 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10436 rtx reg = gen_reg_rtx (V16QImode);
10438 rtx ele = ((CONST_INT_P (operands[2]))
10439 ? GEN_INT (- INTVAL (operands[2]))
10442 for (i = 0; i < 16; i++)
10443 RTVEC_ELT (vs, i) = ele;
10445 emit_insn (gen_vec_initv16qi (reg, par));
10447 if (!CONST_INT_P (operands[2]))
10449 rtx neg = gen_reg_rtx (V16QImode);
10450 emit_insn (gen_negv16qi2 (neg, reg));
10451 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
10454 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
10459 (define_expand "ashrv2di3"
10460 [(match_operand:V2DI 0 "register_operand" "")
10461 (match_operand:V2DI 1 "register_operand" "")
10462 (match_operand:DI 2 "nonmemory_operand" "")]
10465 rtvec vs = rtvec_alloc (2);
10466 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
10467 rtx reg = gen_reg_rtx (V2DImode);
10470 if (CONST_INT_P (operands[2]))
10471 ele = GEN_INT (- INTVAL (operands[2]));
10472 else if (GET_MODE (operands[2]) != DImode)
10474 rtx move = gen_reg_rtx (DImode);
10475 ele = gen_reg_rtx (DImode);
10476 convert_move (move, operands[2], false);
10477 emit_insn (gen_negdi2 (ele, move));
10481 ele = gen_reg_rtx (DImode);
10482 emit_insn (gen_negdi2 (ele, operands[2]));
10485 RTVEC_ELT (vs, 0) = ele;
10486 RTVEC_ELT (vs, 1) = ele;
10487 emit_insn (gen_vec_initv2di (reg, par));
10488 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
10492 ;; XOP FRCZ support
10493 (define_insn "xop_frcz<mode>2"
10494 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
10496 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
10499 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
10500 [(set_attr "type" "ssecvt1")
10501 (set_attr "mode" "<MODE>")])
10504 (define_expand "xop_vmfrcz<mode>2"
10505 [(set (match_operand:SSEMODEF2P 0 "register_operand")
10506 (vec_merge:SSEMODEF2P
10508 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
10514 operands[3] = CONST0_RTX (<MODE>mode);
10517 (define_insn "*xop_vmfrcz_<mode>"
10518 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10519 (vec_merge:SSEMODEF2P
10521 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
10523 (match_operand:SSEMODEF2P 2 "const0_operand")
10526 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
10527 [(set_attr "type" "ssecvt1")
10528 (set_attr "mode" "<MODE>")])
10530 (define_insn "xop_maskcmp<mode>3"
10531 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10532 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
10533 [(match_operand:SSEMODE1248 2 "register_operand" "x")
10534 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
10536 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
10537 [(set_attr "type" "sse4arg")
10538 (set_attr "prefix_data16" "0")
10539 (set_attr "prefix_rep" "0")
10540 (set_attr "prefix_extra" "2")
10541 (set_attr "length_immediate" "1")
10542 (set_attr "mode" "TI")])
10544 (define_insn "xop_maskcmp_uns<mode>3"
10545 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10546 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
10547 [(match_operand:SSEMODE1248 2 "register_operand" "x")
10548 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
10550 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
10551 [(set_attr "type" "ssecmp")
10552 (set_attr "prefix_data16" "0")
10553 (set_attr "prefix_rep" "0")
10554 (set_attr "prefix_extra" "2")
10555 (set_attr "length_immediate" "1")
10556 (set_attr "mode" "TI")])
10558 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
10559 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
10560 ;; the exact instruction generated for the intrinsic.
10561 (define_insn "xop_maskcmp_uns2<mode>3"
10562 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10563 (unspec:SSEMODE1248
10564 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
10565 [(match_operand:SSEMODE1248 2 "register_operand" "x")
10566 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
10567 UNSPEC_XOP_UNSIGNED_CMP))]
10569 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
10570 [(set_attr "type" "ssecmp")
10571 (set_attr "prefix_data16" "0")
10572 (set_attr "prefix_extra" "2")
10573 (set_attr "length_immediate" "1")
10574 (set_attr "mode" "TI")])
10576 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
10577 ;; being added here to be complete.
10578 (define_insn "xop_pcom_tf<mode>3"
10579 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10580 (unspec:SSEMODE1248
10581 [(match_operand:SSEMODE1248 1 "register_operand" "x")
10582 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
10583 (match_operand:SI 3 "const_int_operand" "n")]
10584 UNSPEC_XOP_TRUEFALSE))]
10587 return ((INTVAL (operands[3]) != 0)
10588 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10589 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
10591 [(set_attr "type" "ssecmp")
10592 (set_attr "prefix_data16" "0")
10593 (set_attr "prefix_extra" "2")
10594 (set_attr "length_immediate" "1")
10595 (set_attr "mode" "TI")])
10597 (define_insn "xop_vpermil2<mode>3"
10598 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
10600 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
10601 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
10602 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
10603 (match_operand:SI 4 "const_0_to_3_operand" "n")]
10606 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
10607 [(set_attr "type" "sse4arg")
10608 (set_attr "length_immediate" "1")
10609 (set_attr "mode" "<MODE>")])
10611 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10612 (define_insn "*avx_aesenc"
10613 [(set (match_operand:V2DI 0 "register_operand" "=x")
10614 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
10615 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10617 "TARGET_AES && TARGET_AVX"
10618 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
10619 [(set_attr "type" "sselog1")
10620 (set_attr "prefix_extra" "1")
10621 (set_attr "prefix" "vex")
10622 (set_attr "mode" "TI")])
10624 (define_insn "aesenc"
10625 [(set (match_operand:V2DI 0 "register_operand" "=x")
10626 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
10627 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10630 "aesenc\t{%2, %0|%0, %2}"
10631 [(set_attr "type" "sselog1")
10632 (set_attr "prefix_extra" "1")
10633 (set_attr "mode" "TI")])
10635 (define_insn "*avx_aesenclast"
10636 [(set (match_operand:V2DI 0 "register_operand" "=x")
10637 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
10638 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10639 UNSPEC_AESENCLAST))]
10640 "TARGET_AES && TARGET_AVX"
10641 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
10642 [(set_attr "type" "sselog1")
10643 (set_attr "prefix_extra" "1")
10644 (set_attr "prefix" "vex")
10645 (set_attr "mode" "TI")])
10647 (define_insn "aesenclast"
10648 [(set (match_operand:V2DI 0 "register_operand" "=x")
10649 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
10650 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10651 UNSPEC_AESENCLAST))]
10653 "aesenclast\t{%2, %0|%0, %2}"
10654 [(set_attr "type" "sselog1")
10655 (set_attr "prefix_extra" "1")
10656 (set_attr "mode" "TI")])
10658 (define_insn "*avx_aesdec"
10659 [(set (match_operand:V2DI 0 "register_operand" "=x")
10660 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
10661 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10663 "TARGET_AES && TARGET_AVX"
10664 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
10665 [(set_attr "type" "sselog1")
10666 (set_attr "prefix_extra" "1")
10667 (set_attr "prefix" "vex")
10668 (set_attr "mode" "TI")])
10670 (define_insn "aesdec"
10671 [(set (match_operand:V2DI 0 "register_operand" "=x")
10672 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
10673 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10676 "aesdec\t{%2, %0|%0, %2}"
10677 [(set_attr "type" "sselog1")
10678 (set_attr "prefix_extra" "1")
10679 (set_attr "mode" "TI")])
10681 (define_insn "*avx_aesdeclast"
10682 [(set (match_operand:V2DI 0 "register_operand" "=x")
10683 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
10684 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10685 UNSPEC_AESDECLAST))]
10686 "TARGET_AES && TARGET_AVX"
10687 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
10688 [(set_attr "type" "sselog1")
10689 (set_attr "prefix_extra" "1")
10690 (set_attr "prefix" "vex")
10691 (set_attr "mode" "TI")])
10693 (define_insn "aesdeclast"
10694 [(set (match_operand:V2DI 0 "register_operand" "=x")
10695 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
10696 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
10697 UNSPEC_AESDECLAST))]
10699 "aesdeclast\t{%2, %0|%0, %2}"
10700 [(set_attr "type" "sselog1")
10701 (set_attr "prefix_extra" "1")
10702 (set_attr "mode" "TI")])
10704 (define_insn "aesimc"
10705 [(set (match_operand:V2DI 0 "register_operand" "=x")
10706 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10709 "%vaesimc\t{%1, %0|%0, %1}"
10710 [(set_attr "type" "sselog1")
10711 (set_attr "prefix_extra" "1")
10712 (set_attr "prefix" "maybe_vex")
10713 (set_attr "mode" "TI")])
10715 (define_insn "aeskeygenassist"
10716 [(set (match_operand:V2DI 0 "register_operand" "=x")
10717 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
10718 (match_operand:SI 2 "const_0_to_255_operand" "n")]
10719 UNSPEC_AESKEYGENASSIST))]
10721 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
10722 [(set_attr "type" "sselog1")
10723 (set_attr "prefix_extra" "1")
10724 (set_attr "length_immediate" "1")
10725 (set_attr "prefix" "maybe_vex")
10726 (set_attr "mode" "TI")])
10728 (define_insn "*vpclmulqdq"
10729 [(set (match_operand:V2DI 0 "register_operand" "=x")
10730 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
10731 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
10732 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10734 "TARGET_PCLMUL && TARGET_AVX"
10735 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10736 [(set_attr "type" "sselog1")
10737 (set_attr "prefix_extra" "1")
10738 (set_attr "length_immediate" "1")
10739 (set_attr "prefix" "vex")
10740 (set_attr "mode" "TI")])
10742 (define_insn "pclmulqdq"
10743 [(set (match_operand:V2DI 0 "register_operand" "=x")
10744 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
10745 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
10746 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10749 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
10750 [(set_attr "type" "sselog1")
10751 (set_attr "prefix_extra" "1")
10752 (set_attr "length_immediate" "1")
10753 (set_attr "mode" "TI")])
10755 (define_expand "avx_vzeroall"
10756 [(match_par_dup 0 [(const_int 0)])]
10759 int nregs = TARGET_64BIT ? 16 : 8;
10762 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
10764 XVECEXP (operands[0], 0, 0)
10765 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
10768 for (regno = 0; regno < nregs; regno++)
10769 XVECEXP (operands[0], 0, regno + 1)
10770 = gen_rtx_SET (VOIDmode,
10771 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
10772 CONST0_RTX (V8SImode));
10775 (define_insn "*avx_vzeroall"
10776 [(match_parallel 0 "vzeroall_operation"
10777 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
10780 [(set_attr "type" "sse")
10781 (set_attr "modrm" "0")
10782 (set_attr "memory" "none")
10783 (set_attr "prefix" "vex")
10784 (set_attr "mode" "OI")])
10786 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
10787 ;; if the upper 128bits are unused.
10788 (define_insn "avx_vzeroupper"
10789 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
10790 UNSPECV_VZEROUPPER)]
10793 [(set_attr "type" "sse")
10794 (set_attr "modrm" "0")
10795 (set_attr "memory" "none")
10796 (set_attr "prefix" "vex")
10797 (set_attr "mode" "OI")])
10799 (define_insn_and_split "vec_dup<mode>"
10800 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
10801 (vec_duplicate:AVX256MODE24P
10802 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
10805 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
10807 "&& reload_completed && REG_P (operands[1])"
10808 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
10809 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
10810 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
10811 [(set_attr "type" "ssemov")
10812 (set_attr "prefix_extra" "1")
10813 (set_attr "prefix" "vex")
10814 (set_attr "mode" "V8SF")])
10816 (define_insn "avx_vbroadcastf128_<mode>"
10817 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
10818 (vec_concat:AVX256MODE
10819 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
10823 vbroadcastf128\t{%1, %0|%0, %1}
10824 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
10825 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
10826 [(set_attr "type" "ssemov,sselog1,sselog1")
10827 (set_attr "prefix_extra" "1")
10828 (set_attr "length_immediate" "0,1,1")
10829 (set_attr "prefix" "vex")
10830 (set_attr "mode" "V4SF,V8SF,V8SF")])
10832 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
10833 ;; If it so happens that the input is in memory, use vbroadcast.
10834 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
10835 (define_insn "*avx_vperm_broadcast_v4sf"
10836 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
10838 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
10839 (match_parallel 2 "avx_vbroadcast_operand"
10840 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10843 int elt = INTVAL (operands[3]);
10844 switch (which_alternative)
10848 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
10849 return "vbroadcastss\t{%1, %0|%0, %1}";
10851 operands[2] = GEN_INT (elt * 0x55);
10852 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
10854 gcc_unreachable ();
10857 [(set_attr "type" "ssemov,ssemov,sselog1")
10858 (set_attr "prefix_extra" "1")
10859 (set_attr "length_immediate" "0,0,1")
10860 (set_attr "prefix" "vex")
10861 (set_attr "mode" "SF,SF,V4SF")])
10863 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
10864 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
10865 (vec_select:AVX256MODEF2P
10866 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
10867 (match_parallel 2 "avx_vbroadcast_operand"
10868 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10871 "&& reload_completed"
10872 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
10874 rtx op0 = operands[0], op1 = operands[1];
10875 int elt = INTVAL (operands[3]);
10881 /* Shuffle element we care about into all elements of the 128-bit lane.
10882 The other lane gets shuffled too, but we don't care. */
10883 if (<MODE>mode == V4DFmode)
10884 mask = (elt & 1 ? 15 : 0);
10886 mask = (elt & 3) * 0x55;
10887 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
10889 /* Shuffle the lane we care about into both lanes of the dest. */
10890 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
10891 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10895 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
10896 elt * GET_MODE_SIZE (<avxscalarmode>mode));
10899 (define_expand "avx_vpermil<mode>"
10900 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
10901 (vec_select:AVXMODEFDP
10902 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
10903 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10906 int mask = INTVAL (operands[2]);
10907 rtx perm[<ssescalarnum>];
10909 perm[0] = GEN_INT (mask & 1);
10910 perm[1] = GEN_INT ((mask >> 1) & 1);
10911 if (<MODE>mode == V4DFmode)
10913 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10914 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10918 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10921 (define_expand "avx_vpermil<mode>"
10922 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
10923 (vec_select:AVXMODEFSP
10924 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
10925 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10928 int mask = INTVAL (operands[2]);
10929 rtx perm[<ssescalarnum>];
10931 perm[0] = GEN_INT (mask & 3);
10932 perm[1] = GEN_INT ((mask >> 2) & 3);
10933 perm[2] = GEN_INT ((mask >> 4) & 3);
10934 perm[3] = GEN_INT ((mask >> 6) & 3);
10935 if (<MODE>mode == V8SFmode)
10937 perm[4] = GEN_INT ((mask & 3) + 4);
10938 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10939 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10940 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10944 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10947 (define_insn "*avx_vpermilp<mode>"
10948 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
10949 (vec_select:AVXMODEF2P
10950 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
10951 (match_parallel 2 "avx_vpermilp_<mode>_operand"
10952 [(match_operand 3 "const_int_operand" "")])))]
10955 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10956 operands[2] = GEN_INT (mask);
10957 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10959 [(set_attr "type" "sselog")
10960 (set_attr "prefix_extra" "1")
10961 (set_attr "length_immediate" "1")
10962 (set_attr "prefix" "vex")
10963 (set_attr "mode" "<MODE>")])
10965 (define_insn "avx_vpermilvar<mode>3"
10966 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
10968 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
10969 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
10972 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10973 [(set_attr "type" "sselog")
10974 (set_attr "prefix_extra" "1")
10975 (set_attr "prefix" "vex")
10976 (set_attr "mode" "<MODE>")])
10978 (define_expand "avx_vperm2f128<mode>3"
10979 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
10980 (unspec:AVX256MODE2P
10981 [(match_operand:AVX256MODE2P 1 "register_operand" "")
10982 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
10983 (match_operand:SI 3 "const_0_to_255_operand" "")]
10984 UNSPEC_VPERMIL2F128))]
10987 int mask = INTVAL (operands[3]);
10988 if ((mask & 0x88) == 0)
10990 rtx perm[<ssescalarnum>], t1, t2;
10991 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10993 base = (mask & 3) * nelt2;
10994 for (i = 0; i < nelt2; ++i)
10995 perm[i] = GEN_INT (base + i);
10997 base = ((mask >> 4) & 3) * nelt2;
10998 for (i = 0; i < nelt2; ++i)
10999 perm[i + nelt2] = GEN_INT (base + i);
11001 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11002 operands[1], operands[2]);
11003 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11004 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11005 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11011 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11012 ;; means that in order to represent this properly in rtl we'd have to
11013 ;; nest *another* vec_concat with a zero operand and do the select from
11014 ;; a 4x wide vector. That doesn't seem very nice.
11015 (define_insn "*avx_vperm2f128<mode>_full"
11016 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11017 (unspec:AVX256MODE2P
11018 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11019 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11020 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11021 UNSPEC_VPERMIL2F128))]
11023 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11024 [(set_attr "type" "sselog")
11025 (set_attr "prefix_extra" "1")
11026 (set_attr "length_immediate" "1")
11027 (set_attr "prefix" "vex")
11028 (set_attr "mode" "V8SF")])
11030 (define_insn "*avx_vperm2f128<mode>_nozero"
11031 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11032 (vec_select:AVX256MODE2P
11033 (vec_concat:<ssedoublesizemode>
11034 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11035 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11036 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11037 [(match_operand 4 "const_int_operand" "")])))]
11040 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11041 operands[3] = GEN_INT (mask);
11042 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11044 [(set_attr "type" "sselog")
11045 (set_attr "prefix_extra" "1")
11046 (set_attr "length_immediate" "1")
11047 (set_attr "prefix" "vex")
11048 (set_attr "mode" "V8SF")])
11050 (define_expand "avx_vinsertf128<mode>"
11051 [(match_operand:AVX256MODE 0 "register_operand" "")
11052 (match_operand:AVX256MODE 1 "register_operand" "")
11053 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11054 (match_operand:SI 3 "const_0_to_1_operand" "")]
11057 rtx (*insn)(rtx, rtx, rtx);
11059 switch (INTVAL (operands[3]))
11062 insn = gen_vec_set_lo_<mode>;
11065 insn = gen_vec_set_hi_<mode>;
11068 gcc_unreachable ();
11071 emit_insn (insn (operands[0], operands[1], operands[2]));
11075 (define_insn "vec_set_lo_<mode>"
11076 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11077 (vec_concat:AVX256MODE4P
11078 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11079 (vec_select:<avxhalfvecmode>
11080 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11081 (parallel [(const_int 2) (const_int 3)]))))]
11083 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11084 [(set_attr "type" "sselog")
11085 (set_attr "prefix_extra" "1")
11086 (set_attr "length_immediate" "1")
11087 (set_attr "prefix" "vex")
11088 (set_attr "mode" "V8SF")])
11090 (define_insn "vec_set_hi_<mode>"
11091 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11092 (vec_concat:AVX256MODE4P
11093 (vec_select:<avxhalfvecmode>
11094 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11095 (parallel [(const_int 0) (const_int 1)]))
11096 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11098 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11099 [(set_attr "type" "sselog")
11100 (set_attr "prefix_extra" "1")
11101 (set_attr "length_immediate" "1")
11102 (set_attr "prefix" "vex")
11103 (set_attr "mode" "V8SF")])
11105 (define_insn "vec_set_lo_<mode>"
11106 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11107 (vec_concat:AVX256MODE8P
11108 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11109 (vec_select:<avxhalfvecmode>
11110 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11111 (parallel [(const_int 4) (const_int 5)
11112 (const_int 6) (const_int 7)]))))]
11114 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11115 [(set_attr "type" "sselog")
11116 (set_attr "prefix_extra" "1")
11117 (set_attr "length_immediate" "1")
11118 (set_attr "prefix" "vex")
11119 (set_attr "mode" "V8SF")])
11121 (define_insn "vec_set_hi_<mode>"
11122 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11123 (vec_concat:AVX256MODE8P
11124 (vec_select:<avxhalfvecmode>
11125 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11126 (parallel [(const_int 0) (const_int 1)
11127 (const_int 2) (const_int 3)]))
11128 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11130 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11131 [(set_attr "type" "sselog")
11132 (set_attr "prefix_extra" "1")
11133 (set_attr "length_immediate" "1")
11134 (set_attr "prefix" "vex")
11135 (set_attr "mode" "V8SF")])
11137 (define_insn "vec_set_lo_v16hi"
11138 [(set (match_operand:V16HI 0 "register_operand" "=x")
11140 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11142 (match_operand:V16HI 1 "register_operand" "x")
11143 (parallel [(const_int 8) (const_int 9)
11144 (const_int 10) (const_int 11)
11145 (const_int 12) (const_int 13)
11146 (const_int 14) (const_int 15)]))))]
11148 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11149 [(set_attr "type" "sselog")
11150 (set_attr "prefix_extra" "1")
11151 (set_attr "length_immediate" "1")
11152 (set_attr "prefix" "vex")
11153 (set_attr "mode" "V8SF")])
11155 (define_insn "vec_set_hi_v16hi"
11156 [(set (match_operand:V16HI 0 "register_operand" "=x")
11159 (match_operand:V16HI 1 "register_operand" "x")
11160 (parallel [(const_int 0) (const_int 1)
11161 (const_int 2) (const_int 3)
11162 (const_int 4) (const_int 5)
11163 (const_int 6) (const_int 7)]))
11164 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11166 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11167 [(set_attr "type" "sselog")
11168 (set_attr "prefix_extra" "1")
11169 (set_attr "length_immediate" "1")
11170 (set_attr "prefix" "vex")
11171 (set_attr "mode" "V8SF")])
11173 (define_insn "vec_set_lo_v32qi"
11174 [(set (match_operand:V32QI 0 "register_operand" "=x")
11176 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11178 (match_operand:V32QI 1 "register_operand" "x")
11179 (parallel [(const_int 16) (const_int 17)
11180 (const_int 18) (const_int 19)
11181 (const_int 20) (const_int 21)
11182 (const_int 22) (const_int 23)
11183 (const_int 24) (const_int 25)
11184 (const_int 26) (const_int 27)
11185 (const_int 28) (const_int 29)
11186 (const_int 30) (const_int 31)]))))]
11188 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11189 [(set_attr "type" "sselog")
11190 (set_attr "prefix_extra" "1")
11191 (set_attr "length_immediate" "1")
11192 (set_attr "prefix" "vex")
11193 (set_attr "mode" "V8SF")])
11195 (define_insn "vec_set_hi_v32qi"
11196 [(set (match_operand:V32QI 0 "register_operand" "=x")
11199 (match_operand:V32QI 1 "register_operand" "x")
11200 (parallel [(const_int 0) (const_int 1)
11201 (const_int 2) (const_int 3)
11202 (const_int 4) (const_int 5)
11203 (const_int 6) (const_int 7)
11204 (const_int 8) (const_int 9)
11205 (const_int 10) (const_int 11)
11206 (const_int 12) (const_int 13)
11207 (const_int 14) (const_int 15)]))
11208 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11210 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11211 [(set_attr "type" "sselog")
11212 (set_attr "prefix_extra" "1")
11213 (set_attr "length_immediate" "1")
11214 (set_attr "prefix" "vex")
11215 (set_attr "mode" "V8SF")])
11217 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
11218 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11220 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11221 (match_operand:<avxpermvecmode> 2 "register_operand" "x")
11225 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11226 [(set_attr "type" "sselog1")
11227 (set_attr "prefix_extra" "1")
11228 (set_attr "prefix" "vex")
11229 (set_attr "mode" "<MODE>")])
11231 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
11232 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11234 [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
11235 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11237 UNSPEC_MASKSTORE))]
11239 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11240 [(set_attr "type" "sselog1")
11241 (set_attr "prefix_extra" "1")
11242 (set_attr "prefix" "vex")
11243 (set_attr "mode" "<MODE>")])
11245 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11246 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11247 (unspec:AVX256MODE2P
11248 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11252 "&& reload_completed"
11255 rtx op1 = operands[1];
11257 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11259 op1 = gen_lowpart (<MODE>mode, op1);
11260 emit_move_insn (operands[0], op1);
11264 (define_expand "vec_init<mode>"
11265 [(match_operand:AVX256MODE 0 "register_operand" "")
11266 (match_operand 1 "" "")]
11269 ix86_expand_vector_init (false, operands[0], operands[1]);
11273 (define_insn "*vec_concat<mode>_avx"
11274 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11275 (vec_concat:AVX256MODE
11276 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11277 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11280 switch (which_alternative)
11283 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11285 switch (get_attr_mode (insn))
11288 return "vmovaps\t{%1, %x0|%x0, %1}";
11290 return "vmovapd\t{%1, %x0|%x0, %1}";
11292 return "vmovdqa\t{%1, %x0|%x0, %1}";
11295 gcc_unreachable ();
11298 [(set_attr "type" "sselog,ssemov")
11299 (set_attr "prefix_extra" "1,*")
11300 (set_attr "length_immediate" "1,*")
11301 (set_attr "prefix" "vex")
11302 (set_attr "mode" "<avxvecmode>")])
11304 (define_insn "vcvtph2ps"
11305 [(set (match_operand:V4SF 0 "register_operand" "=x")
11307 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11309 (parallel [(const_int 0) (const_int 1)
11310 (const_int 1) (const_int 2)])))]
11312 "vcvtph2ps\t{%1, %0|%0, %1}"
11313 [(set_attr "type" "ssecvt")
11314 (set_attr "prefix" "vex")
11315 (set_attr "mode" "V4SF")])
11317 (define_insn "*vcvtph2ps_load"
11318 [(set (match_operand:V4SF 0 "register_operand" "=x")
11319 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11320 UNSPEC_VCVTPH2PS))]
11322 "vcvtph2ps\t{%1, %0|%0, %1}"
11323 [(set_attr "type" "ssecvt")
11324 (set_attr "prefix" "vex")
11325 (set_attr "mode" "V8SF")])
11327 (define_insn "vcvtph2ps256"
11328 [(set (match_operand:V8SF 0 "register_operand" "=x")
11329 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11330 UNSPEC_VCVTPH2PS))]
11332 "vcvtph2ps\t{%1, %0|%0, %1}"
11333 [(set_attr "type" "ssecvt")
11334 (set_attr "prefix" "vex")
11335 (set_attr "mode" "V8SF")])
11337 (define_expand "vcvtps2ph"
11338 [(set (match_operand:V8HI 0 "register_operand" "")
11340 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
11341 (match_operand:SI 2 "immediate_operand" "")]
11345 "operands[3] = CONST0_RTX (V4HImode);")
11347 (define_insn "*vcvtps2ph"
11348 [(set (match_operand:V8HI 0 "register_operand" "=x")
11350 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11351 (match_operand:SI 2 "immediate_operand" "N")]
11353 (match_operand:V4HI 3 "const0_operand" "")))]
11355 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11356 [(set_attr "type" "ssecvt")
11357 (set_attr "prefix" "vex")
11358 (set_attr "mode" "V4SF")])
11360 (define_insn "*vcvtps2ph_store"
11361 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11362 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11363 (match_operand:SI 2 "immediate_operand" "N")]
11364 UNSPEC_VCVTPS2PH))]
11366 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11367 [(set_attr "type" "ssecvt")
11368 (set_attr "prefix" "vex")
11369 (set_attr "mode" "V4SF")])
11371 (define_insn "vcvtps2ph256"
11372 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
11373 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
11374 (match_operand:SI 2 "immediate_operand" "N")]
11375 UNSPEC_VCVTPS2PH))]
11377 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11378 [(set_attr "type" "ssecvt")
11379 (set_attr "prefix" "vex")
11380 (set_attr "mode" "V8SF")])