1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Mapping from float mode to required SSE level
68 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
70 ;; Mapping from integer vector mode to mnemonic suffix
71 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
73 ;; Mapping of the sse5 suffix
74 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
75 (V4SF "ps") (V2DF "pd")])
76 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
77 (V4SF "ss") (V2DF "sd")])
78 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
80 ;; Mapping of the max integer size for sse5 rotate immediate constraint
81 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
83 ;; Mapping of vector modes back to the scalar modes
84 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
85 (V16QI "QI") (V8HI "HI")
86 (V4SI "SI") (V2DI "DI")])
88 ;; Mapping of vector modes to a vector mode of double size
89 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
90 (V4SF "V8SF") (V4SI "V8SI")])
92 ;; Number of scalar elements in each vector type
93 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
94 (V16QI "16") (V8HI "8")
95 (V4SI "4") (V2DI "2")])
98 (define_mode_attr avxvecmode
99 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
100 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
101 (V8SF "V8SF") (V4DF "V4DF")])
102 (define_mode_attr avxvecpsmode
103 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
104 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
105 (define_mode_attr avxhalfvecmode
106 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
107 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
108 (define_mode_attr avxscalarmode
109 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
110 (V8SF "SF") (V4DF "DF")])
111 (define_mode_attr avxcvtvecmode
112 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
113 (define_mode_attr avxpermvecmode
114 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
115 (define_mode_attr avxmodesuffixf2c
116 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
117 (define_mode_attr avxmodesuffixp
118 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
120 (define_mode_attr avxmodesuffixs
121 [(V16QI "b") (V8HI "w") (V4SI "d")])
122 (define_mode_attr avxmodesuffix
123 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
124 (V8SI "256") (V8SF "256") (V4DF "256")])
126 ;; Mapping of immediate bits for blend instructions
127 (define_mode_attr blendbits
128 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
130 ;; Mapping of immediate bits for vpermil instructions
131 (define_mode_attr vpermilbits
132 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
134 ;; Mapping of immediate bits for pinsr instructions
135 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
137 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
139 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
145 (define_expand "mov<mode>"
146 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
147 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
150 ix86_expand_vector_move (<MODE>mode, operands);
154 (define_insn "*avx_mov<mode>_internal"
155 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
156 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
158 && (register_operand (operands[0], <MODE>mode)
159 || register_operand (operands[1], <MODE>mode))"
161 switch (which_alternative)
164 return standard_sse_constant_opcode (insn, operands[1]);
167 switch (get_attr_mode (insn))
171 return "vmovaps\t{%1, %0|%0, %1}";
174 return "vmovapd\t{%1, %0|%0, %1}";
176 return "vmovdqa\t{%1, %0|%0, %1}";
182 [(set_attr "type" "sselog1,ssemov,ssemov")
183 (set_attr "prefix" "vex")
184 (set_attr "mode" "<avxvecmode>")])
186 ;; All of these patterns are enabled for SSE1 as well as SSE2.
187 ;; This is essential for maintaining stable calling conventions.
189 (define_expand "mov<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move (<MODE>mode, operands);
198 (define_insn "*mov<mode>_internal"
199 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
200 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
202 && (register_operand (operands[0], <MODE>mode)
203 || register_operand (operands[1], <MODE>mode))"
205 switch (which_alternative)
208 return standard_sse_constant_opcode (insn, operands[1]);
211 switch (get_attr_mode (insn))
214 return "movaps\t{%1, %0|%0, %1}";
216 return "movapd\t{%1, %0|%0, %1}";
218 return "movdqa\t{%1, %0|%0, %1}";
224 [(set_attr "type" "sselog1,ssemov,ssemov")
226 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
227 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
228 (and (eq_attr "alternative" "2")
229 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
231 (const_string "V4SF")
232 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
233 (const_string "V4SF")
234 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
235 (const_string "V2DF")
237 (const_string "TI")))])
239 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
240 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
241 ;; from memory, we'd prefer to load the memory directly into the %xmm
242 ;; register. To facilitate this happy circumstance, this pattern won't
243 ;; split until after register allocation. If the 64-bit value didn't
244 ;; come from memory, this is the best we can do. This is much better
245 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
248 (define_insn_and_split "movdi_to_sse"
250 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
251 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
252 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
253 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
255 "&& reload_completed"
258 if (register_operand (operands[1], DImode))
260 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
261 Assemble the 64-bit DImode value in an xmm register. */
262 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
263 gen_rtx_SUBREG (SImode, operands[1], 0)));
264 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
265 gen_rtx_SUBREG (SImode, operands[1], 4)));
266 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
268 else if (memory_operand (operands[1], DImode))
269 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
275 [(set (match_operand:V4SF 0 "register_operand" "")
276 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
277 "TARGET_SSE && reload_completed"
280 (vec_duplicate:V4SF (match_dup 1))
284 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
285 operands[2] = CONST0_RTX (V4SFmode);
289 [(set (match_operand:V2DF 0 "register_operand" "")
290 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
291 "TARGET_SSE2 && reload_completed"
292 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
294 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
295 operands[2] = CONST0_RTX (DFmode);
298 (define_expand "push<mode>1"
299 [(match_operand:AVX256MODE 0 "register_operand" "")]
302 ix86_expand_push (<MODE>mode, operands[0]);
306 (define_expand "push<mode>1"
307 [(match_operand:SSEMODE 0 "register_operand" "")]
310 ix86_expand_push (<MODE>mode, operands[0]);
314 (define_expand "movmisalign<mode>"
315 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
316 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
319 ix86_expand_vector_move_misalign (<MODE>mode, operands);
323 (define_expand "movmisalign<mode>"
324 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
325 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
328 ix86_expand_vector_move_misalign (<MODE>mode, operands);
332 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
333 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
335 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
337 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
338 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
339 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
340 [(set_attr "type" "ssemov")
341 (set_attr "prefix" "vex")
342 (set_attr "mode" "<MODE>")])
344 (define_insn "sse2_movq128"
345 [(set (match_operand:V2DI 0 "register_operand" "=x")
348 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
349 (parallel [(const_int 0)]))
352 "%vmovq\t{%1, %0|%0, %1}"
353 [(set_attr "type" "ssemov")
354 (set_attr "prefix" "maybe_vex")
355 (set_attr "mode" "TI")])
357 (define_insn "<sse>_movup<ssemodesuffixf2c>"
358 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
360 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
362 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
365 [(set_attr "type" "ssemov")
366 (set_attr "mode" "<MODE>")])
368 (define_insn "avx_movdqu<avxmodesuffix>"
369 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
371 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
373 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
374 "vmovdqu\t{%1, %0|%0, %1}"
375 [(set_attr "type" "ssemov")
376 (set_attr "prefix" "vex")
377 (set_attr "mode" "<avxvecmode>")])
379 (define_insn "sse2_movdqu"
380 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
381 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
383 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
384 "movdqu\t{%1, %0|%0, %1}"
385 [(set_attr "type" "ssemov")
386 (set_attr "prefix_data16" "1")
387 (set_attr "mode" "TI")])
389 (define_insn "avx_movnt<mode>"
390 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
392 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
394 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
395 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
396 [(set_attr "type" "ssemov")
397 (set_attr "prefix" "vex")
398 (set_attr "mode" "<MODE>")])
400 (define_insn "<sse>_movnt<mode>"
401 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
403 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
405 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
406 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
407 [(set_attr "type" "ssemov")
408 (set_attr "mode" "<MODE>")])
410 (define_insn "avx_movnt<mode>"
411 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
413 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
416 "vmovntdq\t{%1, %0|%0, %1}"
417 [(set_attr "type" "ssecvt")
418 (set_attr "prefix" "vex")
419 (set_attr "mode" "<avxvecmode>")])
421 (define_insn "sse2_movntv2di"
422 [(set (match_operand:V2DI 0 "memory_operand" "=m")
423 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
426 "movntdq\t{%1, %0|%0, %1}"
427 [(set_attr "type" "ssecvt")
428 (set_attr "prefix_data16" "1")
429 (set_attr "mode" "TI")])
431 (define_insn "sse2_movntsi"
432 [(set (match_operand:SI 0 "memory_operand" "=m")
433 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
436 "movnti\t{%1, %0|%0, %1}"
437 [(set_attr "type" "ssecvt")
438 (set_attr "mode" "V2DF")])
440 (define_insn "avx_lddqu<avxmodesuffix>"
441 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
443 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
446 "vlddqu\t{%1, %0|%0, %1}"
447 [(set_attr "type" "ssecvt")
448 (set_attr "prefix" "vex")
449 (set_attr "mode" "<avxvecmode>")])
451 (define_insn "sse3_lddqu"
452 [(set (match_operand:V16QI 0 "register_operand" "=x")
453 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
456 "lddqu\t{%1, %0|%0, %1}"
457 [(set_attr "type" "ssecvt")
458 (set_attr "prefix_rep" "1")
459 (set_attr "mode" "TI")])
461 ; Expand patterns for non-temporal stores. At the moment, only those
462 ; that directly map to insns are defined; it would be possible to
463 ; define patterns for other modes that would expand to several insns.
465 (define_expand "storent<mode>"
466 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
468 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
470 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
473 (define_expand "storent<mode>"
474 [(set (match_operand:MODEF 0 "memory_operand" "")
476 [(match_operand:MODEF 1 "register_operand" "")]
481 (define_expand "storentv2di"
482 [(set (match_operand:V2DI 0 "memory_operand" "")
483 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
488 (define_expand "storentsi"
489 [(set (match_operand:SI 0 "memory_operand" "")
490 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
497 ;; Parallel floating point arithmetic
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 (define_expand "<code><mode>2"
502 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
504 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
505 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
508 (define_expand "<plusminus_insn><mode>3"
509 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
510 (plusminus:AVX256MODEF2P
511 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
512 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
513 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
514 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
516 (define_insn "*avx_<plusminus_insn><mode>3"
517 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
518 (plusminus:AVXMODEF2P
519 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
520 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
521 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
522 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
523 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
524 [(set_attr "type" "sseadd")
525 (set_attr "prefix" "vex")
526 (set_attr "mode" "<avxvecmode>")])
528 (define_expand "<plusminus_insn><mode>3"
529 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
530 (plusminus:SSEMODEF2P
531 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
532 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
533 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
534 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
536 (define_insn "*<plusminus_insn><mode>3"
537 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
538 (plusminus:SSEMODEF2P
539 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
540 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
541 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
542 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
543 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
544 [(set_attr "type" "sseadd")
545 (set_attr "mode" "<MODE>")])
547 (define_insn "*avx_vm<plusminus_insn><mode>3"
548 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
549 (vec_merge:SSEMODEF2P
550 (plusminus:SSEMODEF2P
551 (match_operand:SSEMODEF2P 1 "register_operand" "x")
552 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
555 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
556 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
557 [(set_attr "type" "sseadd")
558 (set_attr "prefix" "vex")
559 (set_attr "mode" "<ssescalarmode>")])
561 (define_insn "<sse>_vm<plusminus_insn><mode>3"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
563 (vec_merge:SSEMODEF2P
564 (plusminus:SSEMODEF2P
565 (match_operand:SSEMODEF2P 1 "register_operand" "0")
566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
569 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
570 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
571 [(set_attr "type" "sseadd")
572 (set_attr "mode" "<ssescalarmode>")])
574 (define_expand "mul<mode>3"
575 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
577 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
578 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
579 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
580 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
582 (define_insn "*avx_mul<mode>3"
583 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
585 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
586 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
587 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
588 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
589 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "type" "ssemul")
591 (set_attr "prefix" "vex")
592 (set_attr "mode" "<avxvecmode>")])
594 (define_expand "mul<mode>3"
595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
597 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
598 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
599 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
600 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
602 (define_insn "*mul<mode>3"
603 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
605 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
606 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
607 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
608 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
609 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
610 [(set_attr "type" "ssemul")
611 (set_attr "mode" "<MODE>")])
613 (define_insn "*avx_vmmul<mode>3"
614 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
615 (vec_merge:SSEMODEF2P
617 (match_operand:SSEMODEF2P 1 "register_operand" "x")
618 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
621 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
622 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "type" "ssemul")
624 (set_attr "prefix" "vex")
625 (set_attr "mode" "<ssescalarmode>")])
627 (define_insn "<sse>_vmmul<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
629 (vec_merge:SSEMODEF2P
631 (match_operand:SSEMODEF2P 1 "register_operand" "0")
632 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
635 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
636 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
637 [(set_attr "type" "ssemul")
638 (set_attr "mode" "<ssescalarmode>")])
640 (define_expand "divv8sf3"
641 [(set (match_operand:V8SF 0 "register_operand" "")
642 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
643 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
646 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
648 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
649 && flag_finite_math_only && !flag_trapping_math
650 && flag_unsafe_math_optimizations)
652 ix86_emit_swdivsf (operands[0], operands[1],
653 operands[2], V8SFmode);
658 (define_expand "divv4df3"
659 [(set (match_operand:V4DF 0 "register_operand" "")
660 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
661 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
663 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
665 (define_insn "avx_div<mode>3"
666 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
668 (match_operand:AVXMODEF2P 1 "register_operand" "x")
669 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
670 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
671 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
672 [(set_attr "type" "ssediv")
673 (set_attr "prefix" "vex")
674 (set_attr "mode" "<MODE>")])
676 (define_expand "divv4sf3"
677 [(set (match_operand:V4SF 0 "register_operand" "")
678 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
679 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
682 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
683 && flag_finite_math_only && !flag_trapping_math
684 && flag_unsafe_math_optimizations)
686 ix86_emit_swdivsf (operands[0], operands[1],
687 operands[2], V4SFmode);
692 (define_expand "divv2df3"
693 [(set (match_operand:V2DF 0 "register_operand" "")
694 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
695 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
699 (define_insn "*avx_div<mode>3"
700 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
702 (match_operand:SSEMODEF2P 1 "register_operand" "x")
703 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
704 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
705 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
706 [(set_attr "type" "ssediv")
707 (set_attr "prefix" "vex")
708 (set_attr "mode" "<MODE>")])
710 (define_insn "<sse>_div<mode>3"
711 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
713 (match_operand:SSEMODEF2P 1 "register_operand" "0")
714 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
715 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
716 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
717 [(set_attr "type" "ssediv")
718 (set_attr "mode" "<MODE>")])
720 (define_insn "*avx_vmdiv<mode>3"
721 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
722 (vec_merge:SSEMODEF2P
724 (match_operand:SSEMODEF2P 1 "register_operand" "x")
725 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
728 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
729 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
730 [(set_attr "type" "ssediv")
731 (set_attr "prefix" "vex")
732 (set_attr "mode" "<ssescalarmode>")])
734 (define_insn "<sse>_vmdiv<mode>3"
735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
736 (vec_merge:SSEMODEF2P
738 (match_operand:SSEMODEF2P 1 "register_operand" "0")
739 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
742 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
743 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
744 [(set_attr "type" "ssediv")
745 (set_attr "mode" "<ssescalarmode>")])
747 (define_insn "avx_rcpv8sf2"
748 [(set (match_operand:V8SF 0 "register_operand" "=x")
750 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
752 "vrcpps\t{%1, %0|%0, %1}"
753 [(set_attr "type" "sse")
754 (set_attr "prefix" "vex")
755 (set_attr "mode" "V8SF")])
757 (define_insn "sse_rcpv4sf2"
758 [(set (match_operand:V4SF 0 "register_operand" "=x")
760 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
762 "%vrcpps\t{%1, %0|%0, %1}"
763 [(set_attr "type" "sse")
764 (set_attr "prefix" "maybe_vex")
765 (set_attr "mode" "V4SF")])
767 (define_insn "*avx_vmrcpv4sf2"
768 [(set (match_operand:V4SF 0 "register_operand" "=x")
770 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
772 (match_operand:V4SF 2 "register_operand" "x")
775 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
776 [(set_attr "type" "sse")
777 (set_attr "prefix" "vex")
778 (set_attr "mode" "SF")])
780 (define_insn "sse_vmrcpv4sf2"
781 [(set (match_operand:V4SF 0 "register_operand" "=x")
783 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
785 (match_operand:V4SF 2 "register_operand" "0")
788 "rcpss\t{%1, %0|%0, %1}"
789 [(set_attr "type" "sse")
790 (set_attr "mode" "SF")])
792 (define_expand "sqrtv8sf2"
793 [(set (match_operand:V8SF 0 "register_operand" "")
794 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
797 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
798 && flag_finite_math_only && !flag_trapping_math
799 && flag_unsafe_math_optimizations)
801 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
806 (define_insn "avx_sqrtv8sf2"
807 [(set (match_operand:V8SF 0 "register_operand" "=x")
808 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
810 "vsqrtps\t{%1, %0|%0, %1}"
811 [(set_attr "type" "sse")
812 (set_attr "prefix" "vex")
813 (set_attr "mode" "V8SF")])
815 (define_expand "sqrtv4sf2"
816 [(set (match_operand:V4SF 0 "register_operand" "")
817 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
820 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
821 && flag_finite_math_only && !flag_trapping_math
822 && flag_unsafe_math_optimizations)
824 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
829 (define_insn "sse_sqrtv4sf2"
830 [(set (match_operand:V4SF 0 "register_operand" "=x")
831 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
833 "%vsqrtps\t{%1, %0|%0, %1}"
834 [(set_attr "type" "sse")
835 (set_attr "prefix" "maybe_vex")
836 (set_attr "mode" "V4SF")])
838 (define_insn "sqrtv4df2"
839 [(set (match_operand:V4DF 0 "register_operand" "=x")
840 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
842 "vsqrtpd\t{%1, %0|%0, %1}"
843 [(set_attr "type" "sse")
844 (set_attr "prefix" "vex")
845 (set_attr "mode" "V4DF")])
847 (define_insn "sqrtv2df2"
848 [(set (match_operand:V2DF 0 "register_operand" "=x")
849 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
851 "%vsqrtpd\t{%1, %0|%0, %1}"
852 [(set_attr "type" "sse")
853 (set_attr "prefix" "maybe_vex")
854 (set_attr "mode" "V2DF")])
856 (define_insn "*avx_vmsqrt<mode>2"
857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
858 (vec_merge:SSEMODEF2P
860 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
861 (match_operand:SSEMODEF2P 2 "register_operand" "x")
863 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
864 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
865 [(set_attr "type" "sse")
866 (set_attr "prefix" "vex")
867 (set_attr "mode" "<ssescalarmode>")])
869 (define_insn "<sse>_vmsqrt<mode>2"
870 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
871 (vec_merge:SSEMODEF2P
873 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
874 (match_operand:SSEMODEF2P 2 "register_operand" "0")
876 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
877 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
878 [(set_attr "type" "sse")
879 (set_attr "mode" "<ssescalarmode>")])
881 (define_expand "rsqrtv8sf2"
882 [(set (match_operand:V8SF 0 "register_operand" "")
884 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
885 "TARGET_AVX && TARGET_SSE_MATH"
887 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
891 (define_insn "avx_rsqrtv8sf2"
892 [(set (match_operand:V8SF 0 "register_operand" "=x")
894 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
896 "vrsqrtps\t{%1, %0|%0, %1}"
897 [(set_attr "type" "sse")
898 (set_attr "prefix" "vex")
899 (set_attr "mode" "V8SF")])
901 (define_expand "rsqrtv4sf2"
902 [(set (match_operand:V4SF 0 "register_operand" "")
904 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
907 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
911 (define_insn "sse_rsqrtv4sf2"
912 [(set (match_operand:V4SF 0 "register_operand" "=x")
914 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
916 "%vrsqrtps\t{%1, %0|%0, %1}"
917 [(set_attr "type" "sse")
918 (set_attr "prefix" "maybe_vex")
919 (set_attr "mode" "V4SF")])
921 (define_insn "*avx_vmrsqrtv4sf2"
922 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
926 (match_operand:V4SF 2 "register_operand" "x")
929 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
930 [(set_attr "type" "sse")
931 (set_attr "prefix" "vex")
932 (set_attr "mode" "SF")])
934 (define_insn "sse_vmrsqrtv4sf2"
935 [(set (match_operand:V4SF 0 "register_operand" "=x")
937 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
939 (match_operand:V4SF 2 "register_operand" "0")
942 "rsqrtss\t{%1, %0|%0, %1}"
943 [(set_attr "type" "sse")
944 (set_attr "mode" "SF")])
946 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
947 ;; isn't really correct, as those rtl operators aren't defined when
948 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
950 (define_expand "<code><mode>3"
951 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
952 (smaxmin:AVX256MODEF2P
953 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
954 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
955 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
957 if (!flag_finite_math_only)
958 operands[1] = force_reg (<MODE>mode, operands[1]);
959 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
962 (define_expand "<code><mode>3"
963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
965 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
966 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
967 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
969 if (!flag_finite_math_only)
970 operands[1] = force_reg (<MODE>mode, operands[1]);
971 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
974 (define_insn "*avx_<code><mode>3_finite"
975 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
977 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
978 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
979 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
980 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
981 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
982 [(set_attr "type" "sseadd")
983 (set_attr "prefix" "vex")
984 (set_attr "mode" "<MODE>")])
986 (define_insn "*<code><mode>3_finite"
987 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
989 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
990 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
991 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
992 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
993 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
994 [(set_attr "type" "sseadd")
995 (set_attr "mode" "<MODE>")])
997 (define_insn "*avx_<code><mode>3"
998 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1000 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1001 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1002 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1003 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1004 [(set_attr "type" "sseadd")
1005 (set_attr "prefix" "vex")
1006 (set_attr "mode" "<avxvecmode>")])
1008 (define_insn "*<code><mode>3"
1009 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1011 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1012 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1013 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1014 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1015 [(set_attr "type" "sseadd")
1016 (set_attr "mode" "<MODE>")])
1018 (define_insn "*avx_vm<code><mode>3"
1019 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1020 (vec_merge:SSEMODEF2P
1022 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1023 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1026 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1027 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1028 [(set_attr "type" "sse")
1029 (set_attr "prefix" "vex")
1030 (set_attr "mode" "<ssescalarmode>")])
1032 (define_insn "<sse>_vm<code><mode>3"
1033 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1034 (vec_merge:SSEMODEF2P
1036 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1037 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1040 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1041 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1042 [(set_attr "type" "sse")
1043 (set_attr "mode" "<ssescalarmode>")])
1045 ;; These versions of the min/max patterns implement exactly the operations
1046 ;; min = (op1 < op2 ? op1 : op2)
1047 ;; max = (!(op1 < op2) ? op1 : op2)
1048 ;; Their operands are not commutative, and thus they may be used in the
1049 ;; presence of -0.0 and NaN.
1051 (define_insn "*avx_ieee_smin<mode>3"
1052 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1054 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1055 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1057 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1058 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1059 [(set_attr "type" "sseadd")
1060 (set_attr "prefix" "vex")
1061 (set_attr "mode" "<avxvecmode>")])
1063 (define_insn "*avx_ieee_smax<mode>3"
1064 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1066 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1067 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1069 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1070 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1071 [(set_attr "type" "sseadd")
1072 (set_attr "prefix" "vex")
1073 (set_attr "mode" "<avxvecmode>")])
1075 (define_insn "*ieee_smin<mode>3"
1076 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1078 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1079 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1081 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1082 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1083 [(set_attr "type" "sseadd")
1084 (set_attr "mode" "<MODE>")])
1086 (define_insn "*ieee_smax<mode>3"
1087 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1089 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1090 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1092 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1093 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1094 [(set_attr "type" "sseadd")
1095 (set_attr "mode" "<MODE>")])
1097 (define_insn "avx_addsubv8sf3"
1098 [(set (match_operand:V8SF 0 "register_operand" "=x")
1101 (match_operand:V8SF 1 "register_operand" "x")
1102 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1103 (minus:V8SF (match_dup 1) (match_dup 2))
1106 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1107 [(set_attr "type" "sseadd")
1108 (set_attr "prefix" "vex")
1109 (set_attr "mode" "V8SF")])
1111 (define_insn "avx_addsubv4df3"
1112 [(set (match_operand:V4DF 0 "register_operand" "=x")
1115 (match_operand:V4DF 1 "register_operand" "x")
1116 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1117 (minus:V4DF (match_dup 1) (match_dup 2))
1120 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1121 [(set_attr "type" "sseadd")
1122 (set_attr "prefix" "vex")
1123 (set_attr "mode" "V4DF")])
1125 (define_insn "*avx_addsubv4sf3"
1126 [(set (match_operand:V4SF 0 "register_operand" "=x")
1129 (match_operand:V4SF 1 "register_operand" "x")
1130 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1131 (minus:V4SF (match_dup 1) (match_dup 2))
1134 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1135 [(set_attr "type" "sseadd")
1136 (set_attr "prefix" "vex")
1137 (set_attr "mode" "V4SF")])
1139 (define_insn "sse3_addsubv4sf3"
1140 [(set (match_operand:V4SF 0 "register_operand" "=x")
1143 (match_operand:V4SF 1 "register_operand" "0")
1144 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1145 (minus:V4SF (match_dup 1) (match_dup 2))
1148 "addsubps\t{%2, %0|%0, %2}"
1149 [(set_attr "type" "sseadd")
1150 (set_attr "prefix_rep" "1")
1151 (set_attr "mode" "V4SF")])
1153 (define_insn "*avx_addsubv2df3"
1154 [(set (match_operand:V2DF 0 "register_operand" "=x")
1157 (match_operand:V2DF 1 "register_operand" "x")
1158 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1159 (minus:V2DF (match_dup 1) (match_dup 2))
1162 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1163 [(set_attr "type" "sseadd")
1164 (set_attr "prefix" "vex")
1165 (set_attr "mode" "V2DF")])
1167 (define_insn "sse3_addsubv2df3"
1168 [(set (match_operand:V2DF 0 "register_operand" "=x")
1171 (match_operand:V2DF 1 "register_operand" "0")
1172 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1173 (minus:V2DF (match_dup 1) (match_dup 2))
1176 "addsubpd\t{%2, %0|%0, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "mode" "V2DF")])
1180 (define_insn "avx_h<plusminus_insn>v4df3"
1181 [(set (match_operand:V4DF 0 "register_operand" "=x")
1186 (match_operand:V4DF 1 "register_operand" "x")
1187 (parallel [(const_int 0)]))
1188 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1190 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1191 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1195 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1196 (parallel [(const_int 0)]))
1197 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1199 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1200 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1202 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1203 [(set_attr "type" "sseadd")
1204 (set_attr "prefix" "vex")
1205 (set_attr "mode" "V4DF")])
1207 (define_insn "avx_h<plusminus_insn>v8sf3"
1208 [(set (match_operand:V8SF 0 "register_operand" "=x")
1214 (match_operand:V8SF 1 "register_operand" "x")
1215 (parallel [(const_int 0)]))
1216 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1218 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1219 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1223 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1224 (parallel [(const_int 0)]))
1225 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1227 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1228 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1232 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1233 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1235 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1239 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1240 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1242 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1243 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1245 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1246 [(set_attr "type" "sseadd")
1247 (set_attr "prefix" "vex")
1248 (set_attr "mode" "V8SF")])
1250 (define_insn "*avx_h<plusminus_insn>v4sf3"
1251 [(set (match_operand:V4SF 0 "register_operand" "=x")
1256 (match_operand:V4SF 1 "register_operand" "x")
1257 (parallel [(const_int 0)]))
1258 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1260 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1261 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1265 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1266 (parallel [(const_int 0)]))
1267 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1269 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1270 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1272 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1273 [(set_attr "type" "sseadd")
1274 (set_attr "prefix" "vex")
1275 (set_attr "mode" "V4SF")])
1277 (define_insn "sse3_h<plusminus_insn>v4sf3"
1278 [(set (match_operand:V4SF 0 "register_operand" "=x")
1283 (match_operand:V4SF 1 "register_operand" "0")
1284 (parallel [(const_int 0)]))
1285 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1287 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1288 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1292 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1293 (parallel [(const_int 0)]))
1294 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1296 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1297 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1299 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1300 [(set_attr "type" "sseadd")
1301 (set_attr "prefix_rep" "1")
1302 (set_attr "mode" "V4SF")])
1304 (define_insn "*avx_h<plusminus_insn>v2df3"
1305 [(set (match_operand:V2DF 0 "register_operand" "=x")
1309 (match_operand:V2DF 1 "register_operand" "x")
1310 (parallel [(const_int 0)]))
1311 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1314 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1315 (parallel [(const_int 0)]))
1316 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1318 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1319 [(set_attr "type" "sseadd")
1320 (set_attr "prefix" "vex")
1321 (set_attr "mode" "V2DF")])
1323 (define_insn "sse3_h<plusminus_insn>v2df3"
1324 [(set (match_operand:V2DF 0 "register_operand" "=x")
1328 (match_operand:V2DF 1 "register_operand" "0")
1329 (parallel [(const_int 0)]))
1330 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1333 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1334 (parallel [(const_int 0)]))
1335 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1337 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "sseadd")
1339 (set_attr "mode" "V2DF")])
1341 (define_expand "reduc_splus_v4sf"
1342 [(match_operand:V4SF 0 "register_operand" "")
1343 (match_operand:V4SF 1 "register_operand" "")]
1348 rtx tmp = gen_reg_rtx (V4SFmode);
1349 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1350 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1353 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1357 (define_expand "reduc_splus_v2df"
1358 [(match_operand:V2DF 0 "register_operand" "")
1359 (match_operand:V2DF 1 "register_operand" "")]
1362 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1366 (define_expand "reduc_smax_v4sf"
1367 [(match_operand:V4SF 0 "register_operand" "")
1368 (match_operand:V4SF 1 "register_operand" "")]
1371 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1375 (define_expand "reduc_smin_v4sf"
1376 [(match_operand:V4SF 0 "register_operand" "")
1377 (match_operand:V4SF 1 "register_operand" "")]
1380 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1386 ;; Parallel floating point comparisons
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1391 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1393 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1394 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1395 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1398 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1399 [(set_attr "type" "ssecmp")
1400 (set_attr "prefix" "vex")
1401 (set_attr "mode" "<MODE>")])
1403 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1404 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1405 (vec_merge:SSEMODEF2P
1407 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1408 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1409 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1414 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1415 [(set_attr "type" "ssecmp")
1416 (set_attr "prefix" "vex")
1417 (set_attr "mode" "<ssescalarmode>")])
1419 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1420 ;; may generate 256bit vector compare instructions.
1421 (define_insn "*avx_maskcmp<mode>3"
1422 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1423 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1424 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1425 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1426 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1427 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1428 [(set_attr "type" "ssecmp")
1429 (set_attr "prefix" "vex")
1430 (set_attr "mode" "<avxvecmode>")])
1432 (define_insn "<sse>_maskcmp<mode>3"
1433 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1434 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1435 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1436 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1437 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1439 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1440 [(set_attr "type" "ssecmp")
1441 (set_attr "mode" "<MODE>")])
1443 (define_insn "<sse>_vmmaskcmp<mode>3"
1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1445 (vec_merge:SSEMODEF2P
1446 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1447 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1448 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1451 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1452 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1453 [(set_attr "type" "ssecmp")
1454 (set_attr "mode" "<ssescalarmode>")])
1456 (define_insn "<sse>_comi"
1457 [(set (reg:CCFP FLAGS_REG)
1460 (match_operand:<ssevecmode> 0 "register_operand" "x")
1461 (parallel [(const_int 0)]))
1463 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1464 (parallel [(const_int 0)]))))]
1465 "SSE_FLOAT_MODE_P (<MODE>mode)"
1466 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1467 [(set_attr "type" "ssecomi")
1468 (set_attr "prefix" "maybe_vex")
1469 (set_attr "mode" "<MODE>")])
1471 (define_insn "<sse>_ucomi"
1472 [(set (reg:CCFPU FLAGS_REG)
1475 (match_operand:<ssevecmode> 0 "register_operand" "x")
1476 (parallel [(const_int 0)]))
1478 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1479 (parallel [(const_int 0)]))))]
1480 "SSE_FLOAT_MODE_P (<MODE>mode)"
1481 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1482 [(set_attr "type" "ssecomi")
1483 (set_attr "prefix" "maybe_vex")
1484 (set_attr "mode" "<MODE>")])
1486 (define_expand "vcond<mode>"
1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1488 (if_then_else:SSEMODEF2P
1489 (match_operator 3 ""
1490 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1491 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1492 (match_operand:SSEMODEF2P 1 "general_operand" "")
1493 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1494 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1496 if (ix86_expand_fp_vcond (operands))
1502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1504 ;; Parallel floating point logical operations
1506 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1508 (define_insn "avx_andnot<mode>3"
1509 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1512 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1513 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1514 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1515 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1516 [(set_attr "type" "sselog")
1517 (set_attr "prefix" "vex")
1518 (set_attr "mode" "<avxvecmode>")])
1520 (define_insn "<sse>_andnot<mode>3"
1521 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1524 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1525 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1526 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1527 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1528 [(set_attr "type" "sselog")
1529 (set_attr "mode" "<MODE>")])
1531 (define_expand "<code><mode>3"
1532 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1533 (plogic:AVX256MODEF2P
1534 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1535 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1536 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1537 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1539 (define_insn "*avx_<code><mode>3"
1540 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1542 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1543 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1544 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1545 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1546 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1547 [(set_attr "type" "sselog")
1548 (set_attr "prefix" "vex")
1549 (set_attr "mode" "<avxvecmode>")])
1551 (define_expand "<code><mode>3"
1552 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1554 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1555 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1556 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1557 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1559 (define_insn "*<code><mode>3"
1560 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1562 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1563 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1564 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1565 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1566 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1567 [(set_attr "type" "sselog")
1568 (set_attr "mode" "<MODE>")])
1570 ;; Also define scalar versions. These are used for abs, neg, and
1571 ;; conditional move. Using subregs into vector modes causes register
1572 ;; allocation lossage. These patterns do not allow memory operands
1573 ;; because the native instructions read the full 128-bits.
1575 (define_insn "*avx_andnot<mode>3"
1576 [(set (match_operand:MODEF 0 "register_operand" "=x")
1579 (match_operand:MODEF 1 "register_operand" "x"))
1580 (match_operand:MODEF 2 "register_operand" "x")))]
1581 "AVX_FLOAT_MODE_P (<MODE>mode)"
1582 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1583 [(set_attr "type" "sselog")
1584 (set_attr "prefix" "vex")
1585 (set_attr "mode" "<ssevecmode>")])
1587 (define_insn "*andnot<mode>3"
1588 [(set (match_operand:MODEF 0 "register_operand" "=x")
1591 (match_operand:MODEF 1 "register_operand" "0"))
1592 (match_operand:MODEF 2 "register_operand" "x")))]
1593 "SSE_FLOAT_MODE_P (<MODE>mode)"
1594 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1595 [(set_attr "type" "sselog")
1596 (set_attr "mode" "<ssevecmode>")])
1598 (define_insn "*avx_<code><mode>3"
1599 [(set (match_operand:MODEF 0 "register_operand" "=x")
1601 (match_operand:MODEF 1 "register_operand" "x")
1602 (match_operand:MODEF 2 "register_operand" "x")))]
1603 "AVX_FLOAT_MODE_P (<MODE>mode)"
1604 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1605 [(set_attr "type" "sselog")
1606 (set_attr "prefix" "vex")
1607 (set_attr "mode" "<ssevecmode>")])
1609 (define_insn "*<code><mode>3"
1610 [(set (match_operand:MODEF 0 "register_operand" "=x")
1612 (match_operand:MODEF 1 "register_operand" "0")
1613 (match_operand:MODEF 2 "register_operand" "x")))]
1614 "SSE_FLOAT_MODE_P (<MODE>mode)"
1615 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1616 [(set_attr "type" "sselog")
1617 (set_attr "mode" "<ssevecmode>")])
1619 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1621 ;; SSE5 floating point multiply/accumulate instructions This includes the
1622 ;; scalar version of the instructions as well as the vector
1624 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1626 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1627 ;; combine to generate a multiply/add with two memory references. We then
1628 ;; split this insn, into loading up the destination register with one of the
1629 ;; memory operations. If we don't manage to split the insn, reload will
1630 ;; generate the appropriate moves. The reason this is needed, is that combine
1631 ;; has already folded one of the memory references into both the multiply and
1632 ;; add insns, and it can't generate a new pseudo. I.e.:
1633 ;; (set (reg1) (mem (addr1)))
1634 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1635 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1637 (define_insn "sse5_fmadd<mode>4"
1638 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1641 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1642 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1643 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1644 "TARGET_SSE5 && TARGET_FUSED_MADD
1645 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1646 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1647 [(set_attr "type" "ssemuladd")
1648 (set_attr "mode" "<MODE>")])
1650 ;; Split fmadd with two memory operands into a load and the fmadd.
1652 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1655 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1656 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1657 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1659 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1660 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1661 && !reg_mentioned_p (operands[0], operands[1])
1662 && !reg_mentioned_p (operands[0], operands[2])
1663 && !reg_mentioned_p (operands[0], operands[3])"
1666 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1667 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1668 operands[2], operands[3]));
1672 ;; For the scalar operations, use operand1 for the upper words that aren't
1673 ;; modified, so restrict the forms that are generated.
1674 ;; Scalar version of fmadd
1675 (define_insn "sse5_vmfmadd<mode>4"
1676 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1677 (vec_merge:SSEMODEF2P
1680 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1681 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1682 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1685 "TARGET_SSE5 && TARGET_FUSED_MADD
1686 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1687 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1688 [(set_attr "type" "ssemuladd")
1689 (set_attr "mode" "<MODE>")])
1691 ;; Floating multiply and subtract
1692 ;; Allow two memory operands the same as fmadd
1693 (define_insn "sse5_fmsub<mode>4"
1694 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1697 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1698 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1699 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1700 "TARGET_SSE5 && TARGET_FUSED_MADD
1701 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1702 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1703 [(set_attr "type" "ssemuladd")
1704 (set_attr "mode" "<MODE>")])
1706 ;; Split fmsub with two memory operands into a load and the fmsub.
1708 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1711 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1712 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1713 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1715 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1716 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1717 && !reg_mentioned_p (operands[0], operands[1])
1718 && !reg_mentioned_p (operands[0], operands[2])
1719 && !reg_mentioned_p (operands[0], operands[3])"
1722 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1723 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1724 operands[2], operands[3]));
1728 ;; For the scalar operations, use operand1 for the upper words that aren't
1729 ;; modified, so restrict the forms that are generated.
1730 ;; Scalar version of fmsub
1731 (define_insn "sse5_vmfmsub<mode>4"
1732 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1733 (vec_merge:SSEMODEF2P
1736 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1737 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1738 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1741 "TARGET_SSE5 && TARGET_FUSED_MADD
1742 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1743 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1744 [(set_attr "type" "ssemuladd")
1745 (set_attr "mode" "<MODE>")])
1747 ;; Floating point negative multiply and add
1748 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1749 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1750 ;; Allow two memory operands to help in optimizing.
1751 (define_insn "sse5_fnmadd<mode>4"
1752 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1754 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1756 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1757 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1758 "TARGET_SSE5 && TARGET_FUSED_MADD
1759 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1760 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1761 [(set_attr "type" "ssemuladd")
1762 (set_attr "mode" "<MODE>")])
1764 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1766 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1768 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1770 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1771 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1773 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1774 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1775 && !reg_mentioned_p (operands[0], operands[1])
1776 && !reg_mentioned_p (operands[0], operands[2])
1777 && !reg_mentioned_p (operands[0], operands[3])"
1780 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1781 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1782 operands[2], operands[3]));
1786 ;; For the scalar operations, use operand1 for the upper words that aren't
1787 ;; modified, so restrict the forms that are generated.
1788 ;; Scalar version of fnmadd
1789 (define_insn "sse5_vmfnmadd<mode>4"
1790 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1791 (vec_merge:SSEMODEF2P
1793 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1795 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1796 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1799 "TARGET_SSE5 && TARGET_FUSED_MADD
1800 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1801 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1802 [(set_attr "type" "ssemuladd")
1803 (set_attr "mode" "<MODE>")])
1805 ;; Floating point negative multiply and subtract
1806 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1807 ;; Allow 2 memory operands to help with optimization
1808 (define_insn "sse5_fnmsub<mode>4"
1809 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1813 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1814 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1815 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1816 "TARGET_SSE5 && TARGET_FUSED_MADD
1817 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1818 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1819 [(set_attr "type" "ssemuladd")
1820 (set_attr "mode" "<MODE>")])
1822 ;; Split fnmsub with two memory operands into a load and the fmsub.
1824 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1828 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1829 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1830 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1832 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1833 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1834 && !reg_mentioned_p (operands[0], operands[1])
1835 && !reg_mentioned_p (operands[0], operands[2])
1836 && !reg_mentioned_p (operands[0], operands[3])"
1839 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1840 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1841 operands[2], operands[3]));
1845 ;; For the scalar operations, use operand1 for the upper words that aren't
1846 ;; modified, so restrict the forms that are generated.
1847 ;; Scalar version of fnmsub
1848 (define_insn "sse5_vmfnmsub<mode>4"
1849 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1850 (vec_merge:SSEMODEF2P
1854 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1855 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1856 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1859 "TARGET_SSE5 && TARGET_FUSED_MADD
1860 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1861 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1862 [(set_attr "type" "ssemuladd")
1863 (set_attr "mode" "<MODE>")])
1865 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1866 ;; even if the user used -mno-fused-madd
1867 ;; Parallel instructions. During instruction generation, just default
1868 ;; to registers, and let combine later build the appropriate instruction.
1869 (define_expand "sse5i_fmadd<mode>4"
1870 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1874 (match_operand:SSEMODEF2P 1 "register_operand" "")
1875 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1876 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1877 UNSPEC_SSE5_INTRINSIC))]
1880 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1881 if (TARGET_FUSED_MADD)
1883 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1884 operands[2], operands[3]));
1889 (define_insn "*sse5i_fmadd<mode>4"
1890 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1894 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1895 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1896 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1897 UNSPEC_SSE5_INTRINSIC))]
1898 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1899 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1900 [(set_attr "type" "ssemuladd")
1901 (set_attr "mode" "<MODE>")])
1903 (define_expand "sse5i_fmsub<mode>4"
1904 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1908 (match_operand:SSEMODEF2P 1 "register_operand" "")
1909 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1910 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1911 UNSPEC_SSE5_INTRINSIC))]
1914 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1915 if (TARGET_FUSED_MADD)
1917 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1918 operands[2], operands[3]));
1923 (define_insn "*sse5i_fmsub<mode>4"
1924 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1928 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1929 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1930 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1931 UNSPEC_SSE5_INTRINSIC))]
1932 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1933 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1934 [(set_attr "type" "ssemuladd")
1935 (set_attr "mode" "<MODE>")])
1937 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1938 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1939 (define_expand "sse5i_fnmadd<mode>4"
1940 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1943 (match_operand:SSEMODEF2P 3 "register_operand" "")
1945 (match_operand:SSEMODEF2P 1 "register_operand" "")
1946 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1947 UNSPEC_SSE5_INTRINSIC))]
1950 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1951 if (TARGET_FUSED_MADD)
1953 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1954 operands[2], operands[3]));
1959 (define_insn "*sse5i_fnmadd<mode>4"
1960 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1963 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1965 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1966 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1967 UNSPEC_SSE5_INTRINSIC))]
1968 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1969 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1970 [(set_attr "type" "ssemuladd")
1971 (set_attr "mode" "<MODE>")])
1973 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1974 (define_expand "sse5i_fnmsub<mode>4"
1975 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1980 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1981 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1982 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1983 UNSPEC_SSE5_INTRINSIC))]
1986 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1987 if (TARGET_FUSED_MADD)
1989 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1990 operands[2], operands[3]));
1995 (define_insn "*sse5i_fnmsub<mode>4"
1996 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2001 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2002 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2003 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2004 UNSPEC_SSE5_INTRINSIC))]
2005 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2006 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2007 [(set_attr "type" "ssemuladd")
2008 (set_attr "mode" "<MODE>")])
2010 ;; Scalar instructions
2011 (define_expand "sse5i_vmfmadd<mode>4"
2012 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2014 [(vec_merge:SSEMODEF2P
2017 (match_operand:SSEMODEF2P 1 "register_operand" "")
2018 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2019 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2022 UNSPEC_SSE5_INTRINSIC))]
2025 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2026 if (TARGET_FUSED_MADD)
2028 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2029 operands[2], operands[3]));
2034 ;; For the scalar operations, use operand1 for the upper words that aren't
2035 ;; modified, so restrict the forms that are accepted.
2036 (define_insn "*sse5i_vmfmadd<mode>4"
2037 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2039 [(vec_merge:SSEMODEF2P
2042 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2043 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2044 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2047 UNSPEC_SSE5_INTRINSIC))]
2048 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2049 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2050 [(set_attr "type" "ssemuladd")
2051 (set_attr "mode" "<ssescalarmode>")])
2053 (define_expand "sse5i_vmfmsub<mode>4"
2054 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2056 [(vec_merge:SSEMODEF2P
2059 (match_operand:SSEMODEF2P 1 "register_operand" "")
2060 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2061 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2064 UNSPEC_SSE5_INTRINSIC))]
2067 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2068 if (TARGET_FUSED_MADD)
2070 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2071 operands[2], operands[3]));
2076 (define_insn "*sse5i_vmfmsub<mode>4"
2077 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2079 [(vec_merge:SSEMODEF2P
2082 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2083 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2084 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2087 UNSPEC_SSE5_INTRINSIC))]
2088 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2089 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2090 [(set_attr "type" "ssemuladd")
2091 (set_attr "mode" "<ssescalarmode>")])
2093 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2094 (define_expand "sse5i_vmfnmadd<mode>4"
2095 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2097 [(vec_merge:SSEMODEF2P
2099 (match_operand:SSEMODEF2P 3 "register_operand" "")
2101 (match_operand:SSEMODEF2P 1 "register_operand" "")
2102 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2105 UNSPEC_SSE5_INTRINSIC))]
2108 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2109 if (TARGET_FUSED_MADD)
2111 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2112 operands[2], operands[3]));
2117 (define_insn "*sse5i_vmfnmadd<mode>4"
2118 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2120 [(vec_merge:SSEMODEF2P
2122 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2124 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2125 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2128 UNSPEC_SSE5_INTRINSIC))]
2129 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2130 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2131 [(set_attr "type" "ssemuladd")
2132 (set_attr "mode" "<ssescalarmode>")])
2134 (define_expand "sse5i_vmfnmsub<mode>4"
2135 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2137 [(vec_merge:SSEMODEF2P
2141 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2142 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2143 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2146 UNSPEC_SSE5_INTRINSIC))]
2149 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2150 if (TARGET_FUSED_MADD)
2152 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2153 operands[2], operands[3]));
2158 (define_insn "*sse5i_vmfnmsub<mode>4"
2159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2161 [(vec_merge:SSEMODEF2P
2165 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2166 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2167 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2170 UNSPEC_SSE5_INTRINSIC))]
2171 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2172 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "<ssescalarmode>")])
2176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2178 ;; Parallel single-precision floating point conversion operations
2180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2182 (define_insn "sse_cvtpi2ps"
2183 [(set (match_operand:V4SF 0 "register_operand" "=x")
2186 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2187 (match_operand:V4SF 1 "register_operand" "0")
2190 "cvtpi2ps\t{%2, %0|%0, %2}"
2191 [(set_attr "type" "ssecvt")
2192 (set_attr "mode" "V4SF")])
2194 (define_insn "sse_cvtps2pi"
2195 [(set (match_operand:V2SI 0 "register_operand" "=y")
2197 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2199 (parallel [(const_int 0) (const_int 1)])))]
2201 "cvtps2pi\t{%1, %0|%0, %1}"
2202 [(set_attr "type" "ssecvt")
2203 (set_attr "unit" "mmx")
2204 (set_attr "mode" "DI")])
2206 (define_insn "sse_cvttps2pi"
2207 [(set (match_operand:V2SI 0 "register_operand" "=y")
2209 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2210 (parallel [(const_int 0) (const_int 1)])))]
2212 "cvttps2pi\t{%1, %0|%0, %1}"
2213 [(set_attr "type" "ssecvt")
2214 (set_attr "unit" "mmx")
2215 (set_attr "mode" "SF")])
2217 (define_insn "*avx_cvtsi2ss"
2218 [(set (match_operand:V4SF 0 "register_operand" "=x")
2221 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2222 (match_operand:V4SF 1 "register_operand" "x")
2225 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2226 [(set_attr "type" "sseicvt")
2227 (set_attr "prefix" "vex")
2228 (set_attr "mode" "SF")])
2230 (define_insn "sse_cvtsi2ss"
2231 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2234 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2235 (match_operand:V4SF 1 "register_operand" "0,0")
2238 "cvtsi2ss\t{%2, %0|%0, %2}"
2239 [(set_attr "type" "sseicvt")
2240 (set_attr "athlon_decode" "vector,double")
2241 (set_attr "amdfam10_decode" "vector,double")
2242 (set_attr "mode" "SF")])
2244 (define_insn "*avx_cvtsi2ssq"
2245 [(set (match_operand:V4SF 0 "register_operand" "=x")
2248 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2249 (match_operand:V4SF 1 "register_operand" "x")
2251 "TARGET_AVX && TARGET_64BIT"
2252 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "type" "sseicvt")
2254 (set_attr "prefix" "vex")
2255 (set_attr "mode" "SF")])
2257 (define_insn "sse_cvtsi2ssq"
2258 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2261 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2262 (match_operand:V4SF 1 "register_operand" "0,0")
2264 "TARGET_SSE && TARGET_64BIT"
2265 "cvtsi2ssq\t{%2, %0|%0, %2}"
2266 [(set_attr "type" "sseicvt")
2267 (set_attr "athlon_decode" "vector,double")
2268 (set_attr "amdfam10_decode" "vector,double")
2269 (set_attr "mode" "SF")])
2271 (define_insn "sse_cvtss2si"
2272 [(set (match_operand:SI 0 "register_operand" "=r,r")
2275 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2276 (parallel [(const_int 0)]))]
2277 UNSPEC_FIX_NOTRUNC))]
2279 "%vcvtss2si\t{%1, %0|%0, %1}"
2280 [(set_attr "type" "sseicvt")
2281 (set_attr "athlon_decode" "double,vector")
2282 (set_attr "prefix_rep" "1")
2283 (set_attr "prefix" "maybe_vex")
2284 (set_attr "mode" "SI")])
2286 (define_insn "sse_cvtss2si_2"
2287 [(set (match_operand:SI 0 "register_operand" "=r,r")
2288 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2289 UNSPEC_FIX_NOTRUNC))]
2291 "%vcvtss2si\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "sseicvt")
2293 (set_attr "athlon_decode" "double,vector")
2294 (set_attr "amdfam10_decode" "double,double")
2295 (set_attr "prefix_rep" "1")
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "SI")])
2299 (define_insn "sse_cvtss2siq"
2300 [(set (match_operand:DI 0 "register_operand" "=r,r")
2303 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2304 (parallel [(const_int 0)]))]
2305 UNSPEC_FIX_NOTRUNC))]
2306 "TARGET_SSE && TARGET_64BIT"
2307 "%vcvtss2siq\t{%1, %0|%0, %1}"
2308 [(set_attr "type" "sseicvt")
2309 (set_attr "athlon_decode" "double,vector")
2310 (set_attr "prefix_rep" "1")
2311 (set_attr "prefix" "maybe_vex")
2312 (set_attr "mode" "DI")])
2314 (define_insn "sse_cvtss2siq_2"
2315 [(set (match_operand:DI 0 "register_operand" "=r,r")
2316 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2317 UNSPEC_FIX_NOTRUNC))]
2318 "TARGET_SSE && TARGET_64BIT"
2319 "%vcvtss2siq\t{%1, %0|%0, %1}"
2320 [(set_attr "type" "sseicvt")
2321 (set_attr "athlon_decode" "double,vector")
2322 (set_attr "amdfam10_decode" "double,double")
2323 (set_attr "prefix_rep" "1")
2324 (set_attr "prefix" "maybe_vex")
2325 (set_attr "mode" "DI")])
2327 (define_insn "sse_cvttss2si"
2328 [(set (match_operand:SI 0 "register_operand" "=r,r")
2331 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2332 (parallel [(const_int 0)]))))]
2334 "%vcvttss2si\t{%1, %0|%0, %1}"
2335 [(set_attr "type" "sseicvt")
2336 (set_attr "athlon_decode" "double,vector")
2337 (set_attr "amdfam10_decode" "double,double")
2338 (set_attr "prefix_rep" "1")
2339 (set_attr "prefix" "maybe_vex")
2340 (set_attr "mode" "SI")])
2342 (define_insn "sse_cvttss2siq"
2343 [(set (match_operand:DI 0 "register_operand" "=r,r")
2346 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2347 (parallel [(const_int 0)]))))]
2348 "TARGET_SSE && TARGET_64BIT"
2349 "%vcvttss2siq\t{%1, %0|%0, %1}"
2350 [(set_attr "type" "sseicvt")
2351 (set_attr "athlon_decode" "double,vector")
2352 (set_attr "amdfam10_decode" "double,double")
2353 (set_attr "prefix_rep" "1")
2354 (set_attr "prefix" "maybe_vex")
2355 (set_attr "mode" "DI")])
2357 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2358 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2359 (float:AVXMODEDCVTDQ2PS
2360 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2362 "vcvtdq2ps\t{%1, %0|%0, %1}"
2363 [(set_attr "type" "ssecvt")
2364 (set_attr "prefix" "vex")
2365 (set_attr "mode" "<avxvecmode>")])
2367 (define_insn "sse2_cvtdq2ps"
2368 [(set (match_operand:V4SF 0 "register_operand" "=x")
2369 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2371 "cvtdq2ps\t{%1, %0|%0, %1}"
2372 [(set_attr "type" "ssecvt")
2373 (set_attr "mode" "V4SF")])
2375 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2376 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2377 (unspec:AVXMODEDCVTPS2DQ
2378 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2379 UNSPEC_FIX_NOTRUNC))]
2381 "vcvtps2dq\t{%1, %0|%0, %1}"
2382 [(set_attr "type" "ssecvt")
2383 (set_attr "prefix" "vex")
2384 (set_attr "mode" "<avxvecmode>")])
2386 (define_insn "sse2_cvtps2dq"
2387 [(set (match_operand:V4SI 0 "register_operand" "=x")
2388 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2389 UNSPEC_FIX_NOTRUNC))]
2391 "cvtps2dq\t{%1, %0|%0, %1}"
2392 [(set_attr "type" "ssecvt")
2393 (set_attr "prefix_data16" "1")
2394 (set_attr "mode" "TI")])
2396 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2397 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2398 (fix:AVXMODEDCVTPS2DQ
2399 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2401 "vcvttps2dq\t{%1, %0|%0, %1}"
2402 [(set_attr "type" "ssecvt")
2403 (set_attr "prefix" "vex")
2404 (set_attr "mode" "<avxvecmode>")])
2406 (define_insn "sse2_cvttps2dq"
2407 [(set (match_operand:V4SI 0 "register_operand" "=x")
2408 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2410 "cvttps2dq\t{%1, %0|%0, %1}"
2411 [(set_attr "type" "ssecvt")
2412 (set_attr "prefix_rep" "1")
2413 (set_attr "mode" "TI")])
2415 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2417 ;; Parallel double-precision floating point conversion operations
2419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2421 (define_insn "sse2_cvtpi2pd"
2422 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2423 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2425 "cvtpi2pd\t{%1, %0|%0, %1}"
2426 [(set_attr "type" "ssecvt")
2427 (set_attr "unit" "mmx,*")
2428 (set_attr "mode" "V2DF")])
2430 (define_insn "sse2_cvtpd2pi"
2431 [(set (match_operand:V2SI 0 "register_operand" "=y")
2432 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2433 UNSPEC_FIX_NOTRUNC))]
2435 "cvtpd2pi\t{%1, %0|%0, %1}"
2436 [(set_attr "type" "ssecvt")
2437 (set_attr "unit" "mmx")
2438 (set_attr "prefix_data16" "1")
2439 (set_attr "mode" "DI")])
2441 (define_insn "sse2_cvttpd2pi"
2442 [(set (match_operand:V2SI 0 "register_operand" "=y")
2443 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2445 "cvttpd2pi\t{%1, %0|%0, %1}"
2446 [(set_attr "type" "ssecvt")
2447 (set_attr "unit" "mmx")
2448 (set_attr "prefix_data16" "1")
2449 (set_attr "mode" "TI")])
2451 (define_insn "*avx_cvtsi2sd"
2452 [(set (match_operand:V2DF 0 "register_operand" "=x")
2455 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2456 (match_operand:V2DF 1 "register_operand" "x")
2459 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2460 [(set_attr "type" "sseicvt")
2461 (set_attr "prefix" "vex")
2462 (set_attr "mode" "DF")])
2464 (define_insn "sse2_cvtsi2sd"
2465 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2468 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2469 (match_operand:V2DF 1 "register_operand" "0,0")
2472 "cvtsi2sd\t{%2, %0|%0, %2}"
2473 [(set_attr "type" "sseicvt")
2474 (set_attr "mode" "DF")
2475 (set_attr "athlon_decode" "double,direct")
2476 (set_attr "amdfam10_decode" "vector,double")])
2478 (define_insn "*avx_cvtsi2sdq"
2479 [(set (match_operand:V2DF 0 "register_operand" "=x")
2482 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2483 (match_operand:V2DF 1 "register_operand" "x")
2485 "TARGET_AVX && TARGET_64BIT"
2486 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2487 [(set_attr "type" "sseicvt")
2488 (set_attr "prefix" "vex")
2489 (set_attr "mode" "DF")])
2491 (define_insn "sse2_cvtsi2sdq"
2492 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2495 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2496 (match_operand:V2DF 1 "register_operand" "0,0")
2498 "TARGET_SSE2 && TARGET_64BIT"
2499 "cvtsi2sdq\t{%2, %0|%0, %2}"
2500 [(set_attr "type" "sseicvt")
2501 (set_attr "mode" "DF")
2502 (set_attr "athlon_decode" "double,direct")
2503 (set_attr "amdfam10_decode" "vector,double")])
2505 (define_insn "sse2_cvtsd2si"
2506 [(set (match_operand:SI 0 "register_operand" "=r,r")
2509 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2510 (parallel [(const_int 0)]))]
2511 UNSPEC_FIX_NOTRUNC))]
2513 "%vcvtsd2si\t{%1, %0|%0, %1}"
2514 [(set_attr "type" "sseicvt")
2515 (set_attr "athlon_decode" "double,vector")
2516 (set_attr "prefix_rep" "1")
2517 (set_attr "prefix" "maybe_vex")
2518 (set_attr "mode" "SI")])
2520 (define_insn "sse2_cvtsd2si_2"
2521 [(set (match_operand:SI 0 "register_operand" "=r,r")
2522 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2523 UNSPEC_FIX_NOTRUNC))]
2525 "%vcvtsd2si\t{%1, %0|%0, %1}"
2526 [(set_attr "type" "sseicvt")
2527 (set_attr "athlon_decode" "double,vector")
2528 (set_attr "amdfam10_decode" "double,double")
2529 (set_attr "prefix_rep" "1")
2530 (set_attr "prefix" "maybe_vex")
2531 (set_attr "mode" "SI")])
2533 (define_insn "sse2_cvtsd2siq"
2534 [(set (match_operand:DI 0 "register_operand" "=r,r")
2537 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2538 (parallel [(const_int 0)]))]
2539 UNSPEC_FIX_NOTRUNC))]
2540 "TARGET_SSE2 && TARGET_64BIT"
2541 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2542 [(set_attr "type" "sseicvt")
2543 (set_attr "athlon_decode" "double,vector")
2544 (set_attr "prefix_rep" "1")
2545 (set_attr "prefix" "maybe_vex")
2546 (set_attr "mode" "DI")])
2548 (define_insn "sse2_cvtsd2siq_2"
2549 [(set (match_operand:DI 0 "register_operand" "=r,r")
2550 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2551 UNSPEC_FIX_NOTRUNC))]
2552 "TARGET_SSE2 && TARGET_64BIT"
2553 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2554 [(set_attr "type" "sseicvt")
2555 (set_attr "athlon_decode" "double,vector")
2556 (set_attr "amdfam10_decode" "double,double")
2557 (set_attr "prefix_rep" "1")
2558 (set_attr "prefix" "maybe_vex")
2559 (set_attr "mode" "DI")])
2561 (define_insn "sse2_cvttsd2si"
2562 [(set (match_operand:SI 0 "register_operand" "=r,r")
2565 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2566 (parallel [(const_int 0)]))))]
2568 "%vcvttsd2si\t{%1, %0|%0, %1}"
2569 [(set_attr "type" "sseicvt")
2570 (set_attr "prefix_rep" "1")
2571 (set_attr "prefix" "maybe_vex")
2572 (set_attr "mode" "SI")
2573 (set_attr "athlon_decode" "double,vector")
2574 (set_attr "amdfam10_decode" "double,double")])
2576 (define_insn "sse2_cvttsd2siq"
2577 [(set (match_operand:DI 0 "register_operand" "=r,r")
2580 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2581 (parallel [(const_int 0)]))))]
2582 "TARGET_SSE2 && TARGET_64BIT"
2583 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2584 [(set_attr "type" "sseicvt")
2585 (set_attr "prefix_rep" "1")
2586 (set_attr "prefix" "maybe_vex")
2587 (set_attr "mode" "DI")
2588 (set_attr "athlon_decode" "double,vector")
2589 (set_attr "amdfam10_decode" "double,double")])
2591 (define_insn "avx_cvtdq2pd256"
2592 [(set (match_operand:V4DF 0 "register_operand" "=x")
2593 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2595 "vcvtdq2pd\t{%1, %0|%0, %1}"
2596 [(set_attr "type" "ssecvt")
2597 (set_attr "prefix" "vex")
2598 (set_attr "mode" "V4DF")])
2600 (define_insn "sse2_cvtdq2pd"
2601 [(set (match_operand:V2DF 0 "register_operand" "=x")
2604 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2605 (parallel [(const_int 0) (const_int 1)]))))]
2607 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2608 [(set_attr "type" "ssecvt")
2609 (set_attr "prefix" "maybe_vex")
2610 (set_attr "mode" "V2DF")])
2612 (define_insn "avx_cvtpd2dq256"
2613 [(set (match_operand:V4SI 0 "register_operand" "=x")
2614 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2615 UNSPEC_FIX_NOTRUNC))]
2617 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2618 [(set_attr "type" "ssecvt")
2619 (set_attr "prefix" "vex")
2620 (set_attr "mode" "OI")])
2622 (define_expand "sse2_cvtpd2dq"
2623 [(set (match_operand:V4SI 0 "register_operand" "")
2625 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2629 "operands[2] = CONST0_RTX (V2SImode);")
2631 (define_insn "*sse2_cvtpd2dq"
2632 [(set (match_operand:V4SI 0 "register_operand" "=x")
2634 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2636 (match_operand:V2SI 2 "const0_operand" "")))]
2638 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2639 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2640 [(set_attr "type" "ssecvt")
2641 (set_attr "prefix_rep" "1")
2642 (set_attr "prefix" "maybe_vex")
2643 (set_attr "mode" "TI")
2644 (set_attr "amdfam10_decode" "double")])
2646 (define_insn "avx_cvttpd2dq256"
2647 [(set (match_operand:V4SI 0 "register_operand" "=x")
2648 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2650 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2651 [(set_attr "type" "ssecvt")
2652 (set_attr "prefix" "vex")
2653 (set_attr "mode" "OI")])
2655 (define_expand "sse2_cvttpd2dq"
2656 [(set (match_operand:V4SI 0 "register_operand" "")
2658 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2661 "operands[2] = CONST0_RTX (V2SImode);")
2663 (define_insn "*sse2_cvttpd2dq"
2664 [(set (match_operand:V4SI 0 "register_operand" "=x")
2666 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2667 (match_operand:V2SI 2 "const0_operand" "")))]
2669 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2670 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2671 [(set_attr "type" "ssecvt")
2672 (set_attr "prefix_rep" "1")
2673 (set_attr "prefix" "maybe_vex")
2674 (set_attr "mode" "TI")
2675 (set_attr "amdfam10_decode" "double")])
2677 (define_insn "*avx_cvtsd2ss"
2678 [(set (match_operand:V4SF 0 "register_operand" "=x")
2681 (float_truncate:V2SF
2682 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2683 (match_operand:V4SF 1 "register_operand" "x")
2686 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2687 [(set_attr "type" "ssecvt")
2688 (set_attr "prefix" "vex")
2689 (set_attr "mode" "SF")])
2691 (define_insn "sse2_cvtsd2ss"
2692 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2695 (float_truncate:V2SF
2696 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2697 (match_operand:V4SF 1 "register_operand" "0,0")
2700 "cvtsd2ss\t{%2, %0|%0, %2}"
2701 [(set_attr "type" "ssecvt")
2702 (set_attr "athlon_decode" "vector,double")
2703 (set_attr "amdfam10_decode" "vector,double")
2704 (set_attr "mode" "SF")])
2706 (define_insn "*avx_cvtss2sd"
2707 [(set (match_operand:V2DF 0 "register_operand" "=x")
2711 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2712 (parallel [(const_int 0) (const_int 1)])))
2713 (match_operand:V2DF 1 "register_operand" "x")
2716 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2717 [(set_attr "type" "ssecvt")
2718 (set_attr "prefix" "vex")
2719 (set_attr "mode" "DF")])
2721 (define_insn "sse2_cvtss2sd"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2726 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2727 (parallel [(const_int 0) (const_int 1)])))
2728 (match_operand:V2DF 1 "register_operand" "0,0")
2731 "cvtss2sd\t{%2, %0|%0, %2}"
2732 [(set_attr "type" "ssecvt")
2733 (set_attr "amdfam10_decode" "vector,double")
2734 (set_attr "mode" "DF")])
2736 (define_insn "avx_cvtpd2ps256"
2737 [(set (match_operand:V4SF 0 "register_operand" "=x")
2738 (float_truncate:V4SF
2739 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2741 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2742 [(set_attr "type" "ssecvt")
2743 (set_attr "prefix" "vex")
2744 (set_attr "mode" "V4SF")])
2746 (define_expand "sse2_cvtpd2ps"
2747 [(set (match_operand:V4SF 0 "register_operand" "")
2749 (float_truncate:V2SF
2750 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2753 "operands[2] = CONST0_RTX (V2SFmode);")
2755 (define_insn "*sse2_cvtpd2ps"
2756 [(set (match_operand:V4SF 0 "register_operand" "=x")
2758 (float_truncate:V2SF
2759 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2760 (match_operand:V2SF 2 "const0_operand" "")))]
2762 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2763 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2764 [(set_attr "type" "ssecvt")
2765 (set_attr "prefix_data16" "1")
2766 (set_attr "prefix" "maybe_vex")
2767 (set_attr "mode" "V4SF")
2768 (set_attr "amdfam10_decode" "double")])
2770 (define_insn "avx_cvtps2pd256"
2771 [(set (match_operand:V4DF 0 "register_operand" "=x")
2773 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2775 "vcvtps2pd\t{%1, %0|%0, %1}"
2776 [(set_attr "type" "ssecvt")
2777 (set_attr "prefix" "vex")
2778 (set_attr "mode" "V4DF")])
2780 (define_insn "sse2_cvtps2pd"
2781 [(set (match_operand:V2DF 0 "register_operand" "=x")
2784 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2785 (parallel [(const_int 0) (const_int 1)]))))]
2787 "%vcvtps2pd\t{%1, %0|%0, %1}"
2788 [(set_attr "type" "ssecvt")
2789 (set_attr "prefix" "maybe_vex")
2790 (set_attr "mode" "V2DF")
2791 (set_attr "amdfam10_decode" "direct")])
2793 (define_expand "vec_unpacks_hi_v4sf"
2798 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2799 (parallel [(const_int 6)
2803 (set (match_operand:V2DF 0 "register_operand" "")
2807 (parallel [(const_int 0) (const_int 1)]))))]
2810 operands[2] = gen_reg_rtx (V4SFmode);
2813 (define_expand "vec_unpacks_lo_v4sf"
2814 [(set (match_operand:V2DF 0 "register_operand" "")
2817 (match_operand:V4SF 1 "nonimmediate_operand" "")
2818 (parallel [(const_int 0) (const_int 1)]))))]
2821 (define_expand "vec_unpacks_float_hi_v8hi"
2822 [(match_operand:V4SF 0 "register_operand" "")
2823 (match_operand:V8HI 1 "register_operand" "")]
2826 rtx tmp = gen_reg_rtx (V4SImode);
2828 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2829 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2833 (define_expand "vec_unpacks_float_lo_v8hi"
2834 [(match_operand:V4SF 0 "register_operand" "")
2835 (match_operand:V8HI 1 "register_operand" "")]
2838 rtx tmp = gen_reg_rtx (V4SImode);
2840 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2841 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2845 (define_expand "vec_unpacku_float_hi_v8hi"
2846 [(match_operand:V4SF 0 "register_operand" "")
2847 (match_operand:V8HI 1 "register_operand" "")]
2850 rtx tmp = gen_reg_rtx (V4SImode);
2852 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2853 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2857 (define_expand "vec_unpacku_float_lo_v8hi"
2858 [(match_operand:V4SF 0 "register_operand" "")
2859 (match_operand:V8HI 1 "register_operand" "")]
2862 rtx tmp = gen_reg_rtx (V4SImode);
2864 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2865 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2869 (define_expand "vec_unpacks_float_hi_v4si"
2872 (match_operand:V4SI 1 "nonimmediate_operand" "")
2873 (parallel [(const_int 2)
2877 (set (match_operand:V2DF 0 "register_operand" "")
2881 (parallel [(const_int 0) (const_int 1)]))))]
2884 operands[2] = gen_reg_rtx (V4SImode);
2887 (define_expand "vec_unpacks_float_lo_v4si"
2888 [(set (match_operand:V2DF 0 "register_operand" "")
2891 (match_operand:V4SI 1 "nonimmediate_operand" "")
2892 (parallel [(const_int 0) (const_int 1)]))))]
2895 (define_expand "vec_pack_trunc_v2df"
2896 [(match_operand:V4SF 0 "register_operand" "")
2897 (match_operand:V2DF 1 "nonimmediate_operand" "")
2898 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2903 r1 = gen_reg_rtx (V4SFmode);
2904 r2 = gen_reg_rtx (V4SFmode);
2906 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2907 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2908 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2912 (define_expand "vec_pack_sfix_trunc_v2df"
2913 [(match_operand:V4SI 0 "register_operand" "")
2914 (match_operand:V2DF 1 "nonimmediate_operand" "")
2915 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2920 r1 = gen_reg_rtx (V4SImode);
2921 r2 = gen_reg_rtx (V4SImode);
2923 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2924 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2925 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2926 gen_lowpart (V2DImode, r1),
2927 gen_lowpart (V2DImode, r2)));
2931 (define_expand "vec_pack_sfix_v2df"
2932 [(match_operand:V4SI 0 "register_operand" "")
2933 (match_operand:V2DF 1 "nonimmediate_operand" "")
2934 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2939 r1 = gen_reg_rtx (V4SImode);
2940 r2 = gen_reg_rtx (V4SImode);
2942 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2943 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2944 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2945 gen_lowpart (V2DImode, r1),
2946 gen_lowpart (V2DImode, r2)));
2950 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2952 ;; Parallel single-precision floating point element swizzling
2954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2956 (define_expand "sse_movhlps_exp"
2957 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2960 (match_operand:V4SF 1 "nonimmediate_operand" "")
2961 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2962 (parallel [(const_int 6)
2967 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2969 (define_insn "*avx_movhlps"
2970 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2973 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
2974 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2975 (parallel [(const_int 6)
2979 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2981 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2982 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2983 vmovhps\t{%2, %0|%0, %2}"
2984 [(set_attr "type" "ssemov")
2985 (set_attr "prefix" "vex")
2986 (set_attr "mode" "V4SF,V2SF,V2SF")])
2988 (define_insn "sse_movhlps"
2989 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2992 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2993 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2994 (parallel [(const_int 6)
2998 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3000 movhlps\t{%2, %0|%0, %2}
3001 movlps\t{%H2, %0|%0, %H2}
3002 movhps\t{%2, %0|%0, %2}"
3003 [(set_attr "type" "ssemov")
3004 (set_attr "mode" "V4SF,V2SF,V2SF")])
3006 (define_expand "sse_movlhps_exp"
3007 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3010 (match_operand:V4SF 1 "nonimmediate_operand" "")
3011 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3012 (parallel [(const_int 0)
3017 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3019 (define_insn "*avx_movlhps"
3020 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3023 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3024 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3025 (parallel [(const_int 0)
3029 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3031 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3032 vmovhps\t{%2, %1, %0|%0, %1, %2}
3033 vmovlps\t{%2, %H0|%H0, %2}"
3034 [(set_attr "type" "ssemov")
3035 (set_attr "prefix" "vex")
3036 (set_attr "mode" "V4SF,V2SF,V2SF")])
3038 (define_insn "sse_movlhps"
3039 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3042 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3043 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3044 (parallel [(const_int 0)
3048 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3050 movlhps\t{%2, %0|%0, %2}
3051 movhps\t{%2, %0|%0, %2}
3052 movlps\t{%2, %H0|%H0, %2}"
3053 [(set_attr "type" "ssemov")
3054 (set_attr "mode" "V4SF,V2SF,V2SF")])
3056 (define_insn "avx_unpckhps256"
3057 [(set (match_operand:V8SF 0 "register_operand" "=x")
3060 (match_operand:V8SF 1 "register_operand" "x")
3061 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3062 (parallel [(const_int 2) (const_int 10)
3063 (const_int 3) (const_int 11)
3064 (const_int 6) (const_int 14)
3065 (const_int 7) (const_int 15)])))]
3067 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3068 [(set_attr "type" "sselog")
3069 (set_attr "prefix" "vex")
3070 (set_attr "mode" "V8SF")])
3072 (define_insn "*avx_unpckhps"
3073 [(set (match_operand:V4SF 0 "register_operand" "=x")
3076 (match_operand:V4SF 1 "register_operand" "x")
3077 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3078 (parallel [(const_int 2) (const_int 6)
3079 (const_int 3) (const_int 7)])))]
3081 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3082 [(set_attr "type" "sselog")
3083 (set_attr "prefix" "vex")
3084 (set_attr "mode" "V4SF")])
3086 (define_insn "sse_unpckhps"
3087 [(set (match_operand:V4SF 0 "register_operand" "=x")
3090 (match_operand:V4SF 1 "register_operand" "0")
3091 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3092 (parallel [(const_int 2) (const_int 6)
3093 (const_int 3) (const_int 7)])))]
3095 "unpckhps\t{%2, %0|%0, %2}"
3096 [(set_attr "type" "sselog")
3097 (set_attr "mode" "V4SF")])
3099 (define_insn "avx_unpcklps256"
3100 [(set (match_operand:V8SF 0 "register_operand" "=x")
3103 (match_operand:V8SF 1 "register_operand" "x")
3104 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3105 (parallel [(const_int 0) (const_int 8)
3106 (const_int 1) (const_int 9)
3107 (const_int 4) (const_int 12)
3108 (const_int 5) (const_int 13)])))]
3110 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3111 [(set_attr "type" "sselog")
3112 (set_attr "prefix" "vex")
3113 (set_attr "mode" "V8SF")])
3115 (define_insn "*avx_unpcklps"
3116 [(set (match_operand:V4SF 0 "register_operand" "=x")
3119 (match_operand:V4SF 1 "register_operand" "x")
3120 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3121 (parallel [(const_int 0) (const_int 4)
3122 (const_int 1) (const_int 5)])))]
3124 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3125 [(set_attr "type" "sselog")
3126 (set_attr "prefix" "vex")
3127 (set_attr "mode" "V4SF")])
3129 (define_insn "sse_unpcklps"
3130 [(set (match_operand:V4SF 0 "register_operand" "=x")
3133 (match_operand:V4SF 1 "register_operand" "0")
3134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3135 (parallel [(const_int 0) (const_int 4)
3136 (const_int 1) (const_int 5)])))]
3138 "unpcklps\t{%2, %0|%0, %2}"
3139 [(set_attr "type" "sselog")
3140 (set_attr "mode" "V4SF")])
3142 ;; These are modeled with the same vec_concat as the others so that we
3143 ;; capture users of shufps that can use the new instructions
3144 (define_insn "avx_movshdup256"
3145 [(set (match_operand:V8SF 0 "register_operand" "=x")
3148 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3150 (parallel [(const_int 1) (const_int 1)
3151 (const_int 3) (const_int 3)
3152 (const_int 5) (const_int 5)
3153 (const_int 7) (const_int 7)])))]
3155 "vmovshdup\t{%1, %0|%0, %1}"
3156 [(set_attr "type" "sse")
3157 (set_attr "prefix" "vex")
3158 (set_attr "mode" "V8SF")])
3160 (define_insn "sse3_movshdup"
3161 [(set (match_operand:V4SF 0 "register_operand" "=x")
3164 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3166 (parallel [(const_int 1)
3171 "%vmovshdup\t{%1, %0|%0, %1}"
3172 [(set_attr "type" "sse")
3173 (set_attr "prefix_rep" "1")
3174 (set_attr "prefix" "maybe_vex")
3175 (set_attr "mode" "V4SF")])
3177 (define_insn "avx_movsldup256"
3178 [(set (match_operand:V8SF 0 "register_operand" "=x")
3181 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3183 (parallel [(const_int 0) (const_int 0)
3184 (const_int 2) (const_int 2)
3185 (const_int 4) (const_int 4)
3186 (const_int 6) (const_int 6)])))]
3188 "vmovsldup\t{%1, %0|%0, %1}"
3189 [(set_attr "type" "sse")
3190 (set_attr "prefix" "vex")
3191 (set_attr "mode" "V8SF")])
3193 (define_insn "sse3_movsldup"
3194 [(set (match_operand:V4SF 0 "register_operand" "=x")
3197 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3199 (parallel [(const_int 0)
3204 "%vmovsldup\t{%1, %0|%0, %1}"
3205 [(set_attr "type" "sse")
3206 (set_attr "prefix_rep" "1")
3207 (set_attr "prefix" "maybe_vex")
3208 (set_attr "mode" "V4SF")])
3210 (define_expand "avx_shufps256"
3211 [(match_operand:V8SF 0 "register_operand" "")
3212 (match_operand:V8SF 1 "register_operand" "")
3213 (match_operand:V8SF 2 "nonimmediate_operand" "")
3214 (match_operand:SI 3 "const_int_operand" "")]
3217 int mask = INTVAL (operands[3]);
3218 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3219 GEN_INT ((mask >> 0) & 3),
3220 GEN_INT ((mask >> 2) & 3),
3221 GEN_INT (((mask >> 4) & 3) + 8),
3222 GEN_INT (((mask >> 6) & 3) + 8),
3223 GEN_INT (((mask >> 0) & 3) + 4),
3224 GEN_INT (((mask >> 2) & 3) + 4),
3225 GEN_INT (((mask >> 4) & 3) + 12),
3226 GEN_INT (((mask >> 6) & 3) + 12)));
3230 ;; One bit in mask selects 2 elements.
3231 (define_insn "avx_shufps256_1"
3232 [(set (match_operand:V8SF 0 "register_operand" "=x")
3235 (match_operand:V8SF 1 "register_operand" "x")
3236 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3237 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3238 (match_operand 4 "const_0_to_3_operand" "")
3239 (match_operand 5 "const_8_to_11_operand" "")
3240 (match_operand 6 "const_8_to_11_operand" "")
3241 (match_operand 7 "const_4_to_7_operand" "")
3242 (match_operand 8 "const_4_to_7_operand" "")
3243 (match_operand 9 "const_12_to_15_operand" "")
3244 (match_operand 10 "const_12_to_15_operand" "")])))]
3246 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3247 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3248 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3249 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3252 mask = INTVAL (operands[3]);
3253 mask |= INTVAL (operands[4]) << 2;
3254 mask |= (INTVAL (operands[5]) - 8) << 4;
3255 mask |= (INTVAL (operands[6]) - 8) << 6;
3256 operands[3] = GEN_INT (mask);
3258 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3260 [(set_attr "type" "sselog")
3261 (set_attr "prefix" "vex")
3262 (set_attr "mode" "V8SF")])
3264 (define_expand "sse_shufps"
3265 [(match_operand:V4SF 0 "register_operand" "")
3266 (match_operand:V4SF 1 "register_operand" "")
3267 (match_operand:V4SF 2 "nonimmediate_operand" "")
3268 (match_operand:SI 3 "const_int_operand" "")]
3271 int mask = INTVAL (operands[3]);
3272 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3273 GEN_INT ((mask >> 0) & 3),
3274 GEN_INT ((mask >> 2) & 3),
3275 GEN_INT (((mask >> 4) & 3) + 4),
3276 GEN_INT (((mask >> 6) & 3) + 4)));
3280 (define_insn "*avx_shufps_<mode>"
3281 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3282 (vec_select:SSEMODE4S
3283 (vec_concat:<ssedoublesizemode>
3284 (match_operand:SSEMODE4S 1 "register_operand" "x")
3285 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3286 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3287 (match_operand 4 "const_0_to_3_operand" "")
3288 (match_operand 5 "const_4_to_7_operand" "")
3289 (match_operand 6 "const_4_to_7_operand" "")])))]
3293 mask |= INTVAL (operands[3]) << 0;
3294 mask |= INTVAL (operands[4]) << 2;
3295 mask |= (INTVAL (operands[5]) - 4) << 4;
3296 mask |= (INTVAL (operands[6]) - 4) << 6;
3297 operands[3] = GEN_INT (mask);
3299 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3301 [(set_attr "type" "sselog")
3302 (set_attr "prefix" "vex")
3303 (set_attr "mode" "V4SF")])
3305 (define_insn "sse_shufps_<mode>"
3306 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3307 (vec_select:SSEMODE4S
3308 (vec_concat:<ssedoublesizemode>
3309 (match_operand:SSEMODE4S 1 "register_operand" "0")
3310 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3311 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3312 (match_operand 4 "const_0_to_3_operand" "")
3313 (match_operand 5 "const_4_to_7_operand" "")
3314 (match_operand 6 "const_4_to_7_operand" "")])))]
3318 mask |= INTVAL (operands[3]) << 0;
3319 mask |= INTVAL (operands[4]) << 2;
3320 mask |= (INTVAL (operands[5]) - 4) << 4;
3321 mask |= (INTVAL (operands[6]) - 4) << 6;
3322 operands[3] = GEN_INT (mask);
3324 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3326 [(set_attr "type" "sselog")
3327 (set_attr "mode" "V4SF")])
3329 (define_insn "sse_storehps"
3330 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3332 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3333 (parallel [(const_int 2) (const_int 3)])))]
3336 %vmovhps\t{%1, %0|%0, %1}
3337 %vmovhlps\t{%1, %d0|%d0, %1}
3338 %vmovlps\t{%H1, %d0|%d0, %H1}"
3339 [(set_attr "type" "ssemov")
3340 (set_attr "prefix" "maybe_vex")
3341 (set_attr "mode" "V2SF,V4SF,V2SF")])
3343 (define_expand "sse_loadhps_exp"
3344 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3347 (match_operand:V4SF 1 "nonimmediate_operand" "")
3348 (parallel [(const_int 0) (const_int 1)]))
3349 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3351 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3353 (define_insn "*avx_loadhps"
3354 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3357 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3358 (parallel [(const_int 0) (const_int 1)]))
3359 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3362 vmovhps\t{%2, %1, %0|%0, %1, %2}
3363 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3364 vmovlps\t{%2, %H0|%H0, %2}"
3365 [(set_attr "type" "ssemov")
3366 (set_attr "prefix" "vex")
3367 (set_attr "mode" "V2SF,V4SF,V2SF")])
3369 (define_insn "sse_loadhps"
3370 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3373 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3374 (parallel [(const_int 0) (const_int 1)]))
3375 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3378 movhps\t{%2, %0|%0, %2}
3379 movlhps\t{%2, %0|%0, %2}
3380 movlps\t{%2, %H0|%H0, %2}"
3381 [(set_attr "type" "ssemov")
3382 (set_attr "mode" "V2SF,V4SF,V2SF")])
3384 (define_insn "*avx_storelps"
3385 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3387 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3388 (parallel [(const_int 0) (const_int 1)])))]
3391 vmovlps\t{%1, %0|%0, %1}
3392 vmovaps\t{%1, %0|%0, %1}
3393 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3394 [(set_attr "type" "ssemov")
3395 (set_attr "prefix" "vex")
3396 (set_attr "mode" "V2SF,V2DF,V2SF")])
3398 (define_insn "sse_storelps"
3399 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3401 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3402 (parallel [(const_int 0) (const_int 1)])))]
3405 movlps\t{%1, %0|%0, %1}
3406 movaps\t{%1, %0|%0, %1}
3407 movlps\t{%1, %0|%0, %1}"
3408 [(set_attr "type" "ssemov")
3409 (set_attr "mode" "V2SF,V4SF,V2SF")])
3411 (define_expand "sse_loadlps_exp"
3412 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3414 (match_operand:V2SF 2 "nonimmediate_operand" "")
3416 (match_operand:V4SF 1 "nonimmediate_operand" "")
3417 (parallel [(const_int 2) (const_int 3)]))))]
3419 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3421 (define_insn "*avx_loadlps"
3422 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3424 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3426 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3427 (parallel [(const_int 2) (const_int 3)]))))]
3430 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3431 vmovlps\t{%2, %1, %0|%0, %1, %2}
3432 vmovlps\t{%2, %0|%0, %2}"
3433 [(set_attr "type" "sselog,ssemov,ssemov")
3434 (set_attr "prefix" "vex")
3435 (set_attr "mode" "V4SF,V2SF,V2SF")])
3437 (define_insn "sse_loadlps"
3438 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3440 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3442 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3443 (parallel [(const_int 2) (const_int 3)]))))]
3446 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3447 movlps\t{%2, %0|%0, %2}
3448 movlps\t{%2, %0|%0, %2}"
3449 [(set_attr "type" "sselog,ssemov,ssemov")
3450 (set_attr "mode" "V4SF,V2SF,V2SF")])
3452 (define_insn "*avx_movss"
3453 [(set (match_operand:V4SF 0 "register_operand" "=x")
3455 (match_operand:V4SF 2 "register_operand" "x")
3456 (match_operand:V4SF 1 "register_operand" "x")
3459 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3460 [(set_attr "type" "ssemov")
3461 (set_attr "prefix" "vex")
3462 (set_attr "mode" "SF")])
3464 (define_insn "sse_movss"
3465 [(set (match_operand:V4SF 0 "register_operand" "=x")
3467 (match_operand:V4SF 2 "register_operand" "x")
3468 (match_operand:V4SF 1 "register_operand" "0")
3471 "movss\t{%2, %0|%0, %2}"
3472 [(set_attr "type" "ssemov")
3473 (set_attr "mode" "SF")])
3475 (define_insn "*vec_dupv4sf_avx"
3476 [(set (match_operand:V4SF 0 "register_operand" "=x")
3478 (match_operand:SF 1 "register_operand" "x")))]
3480 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3481 [(set_attr "type" "sselog1")
3482 (set_attr "prefix" "vex")
3483 (set_attr "mode" "V4SF")])
3485 (define_insn "*vec_dupv4sf"
3486 [(set (match_operand:V4SF 0 "register_operand" "=x")
3488 (match_operand:SF 1 "register_operand" "0")))]
3490 "shufps\t{$0, %0, %0|%0, %0, 0}"
3491 [(set_attr "type" "sselog1")
3492 (set_attr "mode" "V4SF")])
3494 (define_insn "*vec_concatv2sf_avx"
3495 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3497 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3498 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3501 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3502 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3503 vmovss\t{%1, %0|%0, %1}
3504 punpckldq\t{%2, %0|%0, %2}
3505 movd\t{%1, %0|%0, %1}"
3506 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3507 (set (attr "prefix")
3508 (if_then_else (eq_attr "alternative" "3,4")
3509 (const_string "orig")
3510 (const_string "vex")))
3511 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3513 ;; Although insertps takes register source, we prefer
3514 ;; unpcklps with register source since it is shorter.
3515 (define_insn "*vec_concatv2sf_sse4_1"
3516 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3518 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3519 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3522 unpcklps\t{%2, %0|%0, %2}
3523 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3524 movss\t{%1, %0|%0, %1}
3525 punpckldq\t{%2, %0|%0, %2}
3526 movd\t{%1, %0|%0, %1}"
3527 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3528 (set_attr "prefix_extra" "*,1,*,*,*")
3529 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3531 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3532 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3533 ;; alternatives pretty much forces the MMX alternative to be chosen.
3534 (define_insn "*vec_concatv2sf_sse"
3535 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3537 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3538 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3541 unpcklps\t{%2, %0|%0, %2}
3542 movss\t{%1, %0|%0, %1}
3543 punpckldq\t{%2, %0|%0, %2}
3544 movd\t{%1, %0|%0, %1}"
3545 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3546 (set_attr "mode" "V4SF,SF,DI,DI")])
3548 (define_insn "*vec_concatv4sf_avx"
3549 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3551 (match_operand:V2SF 1 "register_operand" " x,x")
3552 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3555 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3556 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3557 [(set_attr "type" "ssemov")
3558 (set_attr "prefix" "vex")
3559 (set_attr "mode" "V4SF,V2SF")])
3561 (define_insn "*vec_concatv4sf_sse"
3562 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3564 (match_operand:V2SF 1 "register_operand" " 0,0")
3565 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3568 movlhps\t{%2, %0|%0, %2}
3569 movhps\t{%2, %0|%0, %2}"
3570 [(set_attr "type" "ssemov")
3571 (set_attr "mode" "V4SF,V2SF")])
3573 (define_expand "vec_init<mode>"
3574 [(match_operand:SSEMODE 0 "register_operand" "")
3575 (match_operand 1 "" "")]
3578 ix86_expand_vector_init (false, operands[0], operands[1]);
3582 (define_insn "*vec_setv4sf_0_avx"
3583 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3586 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3587 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3591 vmovss\t{%2, %1, %0|%0, %1, %2}
3592 vmovss\t{%2, %0|%0, %2}
3593 vmovd\t{%2, %0|%0, %2}
3595 [(set_attr "type" "ssemov")
3596 (set_attr "prefix" "vex")
3597 (set_attr "mode" "SF")])
3599 (define_insn "vec_setv4sf_0"
3600 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3603 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3604 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3608 movss\t{%2, %0|%0, %2}
3609 movss\t{%2, %0|%0, %2}
3610 movd\t{%2, %0|%0, %2}
3612 [(set_attr "type" "ssemov")
3613 (set_attr "mode" "SF")])
3615 ;; A subset is vec_setv4sf.
3616 (define_insn "*vec_setv4sf_avx"
3617 [(set (match_operand:V4SF 0 "register_operand" "=x")
3620 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3621 (match_operand:V4SF 1 "register_operand" "x")
3622 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3625 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3626 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3628 [(set_attr "type" "sselog")
3629 (set_attr "prefix" "vex")
3630 (set_attr "mode" "V4SF")])
3632 (define_insn "*vec_setv4sf_sse4_1"
3633 [(set (match_operand:V4SF 0 "register_operand" "=x")
3636 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3637 (match_operand:V4SF 1 "register_operand" "0")
3638 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3641 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3642 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3644 [(set_attr "type" "sselog")
3645 (set_attr "prefix_extra" "1")
3646 (set_attr "mode" "V4SF")])
3648 (define_insn "*avx_insertps"
3649 [(set (match_operand:V4SF 0 "register_operand" "=x")
3650 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3651 (match_operand:V4SF 1 "register_operand" "x")
3652 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3655 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3656 [(set_attr "type" "sselog")
3657 (set_attr "prefix" "vex")
3658 (set_attr "mode" "V4SF")])
3660 (define_insn "sse4_1_insertps"
3661 [(set (match_operand:V4SF 0 "register_operand" "=x")
3662 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3663 (match_operand:V4SF 1 "register_operand" "0")
3664 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3667 "insertps\t{%3, %2, %0|%0, %2, %3}";
3668 [(set_attr "type" "sselog")
3669 (set_attr "prefix_extra" "1")
3670 (set_attr "mode" "V4SF")])
3673 [(set (match_operand:V4SF 0 "memory_operand" "")
3676 (match_operand:SF 1 "nonmemory_operand" ""))
3679 "TARGET_SSE && reload_completed"
3682 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3686 (define_expand "vec_set<mode>"
3687 [(match_operand:SSEMODE 0 "register_operand" "")
3688 (match_operand:<ssescalarmode> 1 "register_operand" "")
3689 (match_operand 2 "const_int_operand" "")]
3692 ix86_expand_vector_set (false, operands[0], operands[1],
3693 INTVAL (operands[2]));
3697 (define_insn_and_split "*vec_extractv4sf_0"
3698 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3700 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3701 (parallel [(const_int 0)])))]
3702 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3704 "&& reload_completed"
3707 rtx op1 = operands[1];
3709 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3711 op1 = gen_lowpart (SFmode, op1);
3712 emit_move_insn (operands[0], op1);
3716 (define_expand "avx_vextractf128<mode>"
3717 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3718 (match_operand:AVX256MODE 1 "register_operand" "")
3719 (match_operand:SI 2 "const_0_to_1_operand" "")]
3722 switch (INTVAL (operands[2]))
3725 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3728 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3736 (define_insn "vec_extract_lo_<mode>"
3737 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3738 (vec_select:<avxhalfvecmode>
3739 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3740 (parallel [(const_int 0) (const_int 1)])))]
3742 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3743 [(set_attr "type" "sselog")
3744 (set_attr "memory" "none,store")
3745 (set_attr "prefix" "vex")
3746 (set_attr "mode" "V8SF")])
3748 (define_insn "vec_extract_hi_<mode>"
3749 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3750 (vec_select:<avxhalfvecmode>
3751 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3752 (parallel [(const_int 2) (const_int 3)])))]
3754 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3755 [(set_attr "type" "sselog")
3756 (set_attr "memory" "none,store")
3757 (set_attr "prefix" "vex")
3758 (set_attr "mode" "V8SF")])
3760 (define_insn "vec_extract_lo_<mode>"
3761 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3762 (vec_select:<avxhalfvecmode>
3763 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3764 (parallel [(const_int 0) (const_int 1)
3765 (const_int 2) (const_int 3)])))]
3767 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3768 [(set_attr "type" "sselog")
3769 (set_attr "memory" "none,store")
3770 (set_attr "prefix" "vex")
3771 (set_attr "mode" "V8SF")])
3773 (define_insn "vec_extract_hi_<mode>"
3774 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3775 (vec_select:<avxhalfvecmode>
3776 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3777 (parallel [(const_int 4) (const_int 5)
3778 (const_int 6) (const_int 7)])))]
3780 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3781 [(set_attr "type" "sselog")
3782 (set_attr "memory" "none,store")
3783 (set_attr "prefix" "vex")
3784 (set_attr "mode" "V8SF")])
3786 (define_insn "vec_extract_lo_v16hi"
3787 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3789 (match_operand:V16HI 1 "register_operand" "x,x")
3790 (parallel [(const_int 0) (const_int 1)
3791 (const_int 2) (const_int 3)
3792 (const_int 4) (const_int 5)
3793 (const_int 6) (const_int 7)])))]
3795 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3796 [(set_attr "type" "sselog")
3797 (set_attr "memory" "none,store")
3798 (set_attr "prefix" "vex")
3799 (set_attr "mode" "V8SF")])
3801 (define_insn "vec_extract_hi_v16hi"
3802 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3804 (match_operand:V16HI 1 "register_operand" "x,x")
3805 (parallel [(const_int 8) (const_int 9)
3806 (const_int 10) (const_int 11)
3807 (const_int 12) (const_int 13)
3808 (const_int 14) (const_int 15)])))]
3810 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3811 [(set_attr "type" "sselog")
3812 (set_attr "memory" "none,store")
3813 (set_attr "prefix" "vex")
3814 (set_attr "mode" "V8SF")])
3816 (define_insn "vec_extract_lo_v32qi"
3817 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3819 (match_operand:V32QI 1 "register_operand" "x,x")
3820 (parallel [(const_int 0) (const_int 1)
3821 (const_int 2) (const_int 3)
3822 (const_int 4) (const_int 5)
3823 (const_int 6) (const_int 7)
3824 (const_int 8) (const_int 9)
3825 (const_int 10) (const_int 11)
3826 (const_int 12) (const_int 13)
3827 (const_int 14) (const_int 15)])))]
3829 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3830 [(set_attr "type" "sselog")
3831 (set_attr "memory" "none,store")
3832 (set_attr "prefix" "vex")
3833 (set_attr "mode" "V8SF")])
3835 (define_insn "vec_extract_hi_v32qi"
3836 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3838 (match_operand:V32QI 1 "register_operand" "x,x")
3839 (parallel [(const_int 16) (const_int 17)
3840 (const_int 18) (const_int 19)
3841 (const_int 20) (const_int 21)
3842 (const_int 22) (const_int 23)
3843 (const_int 24) (const_int 25)
3844 (const_int 26) (const_int 27)
3845 (const_int 28) (const_int 29)
3846 (const_int 30) (const_int 31)])))]
3848 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3849 [(set_attr "type" "sselog")
3850 (set_attr "memory" "none,store")
3851 (set_attr "prefix" "vex")
3852 (set_attr "mode" "V8SF")])
3854 (define_insn "*sse4_1_extractps"
3855 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3857 (match_operand:V4SF 1 "register_operand" "x")
3858 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3860 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3861 [(set_attr "type" "sselog")
3862 (set_attr "prefix_extra" "1")
3863 (set_attr "prefix" "maybe_vex")
3864 (set_attr "mode" "V4SF")])
3866 (define_insn_and_split "*vec_extract_v4sf_mem"
3867 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3869 (match_operand:V4SF 1 "memory_operand" "o")
3870 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3876 int i = INTVAL (operands[2]);
3878 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3882 (define_expand "vec_extract<mode>"
3883 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3884 (match_operand:SSEMODE 1 "register_operand" "")
3885 (match_operand 2 "const_int_operand" "")]
3888 ix86_expand_vector_extract (false, operands[0], operands[1],
3889 INTVAL (operands[2]));
3893 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3895 ;; Parallel double-precision floating point element swizzling
3897 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3899 (define_insn "avx_unpckhpd256"
3900 [(set (match_operand:V4DF 0 "register_operand" "=x")
3903 (match_operand:V4DF 1 "register_operand" "x")
3904 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3905 (parallel [(const_int 1) (const_int 5)
3906 (const_int 3) (const_int 7)])))]
3908 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3909 [(set_attr "type" "sselog")
3910 (set_attr "prefix" "vex")
3911 (set_attr "mode" "V4DF")])
3913 (define_expand "sse2_unpckhpd_exp"
3914 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
3917 (match_operand:V2DF 1 "nonimmediate_operand" "")
3918 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3919 (parallel [(const_int 1)
3922 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
3924 (define_insn "*avx_unpckhpd"
3925 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3928 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
3929 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
3930 (parallel [(const_int 1)
3932 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3934 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3935 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3936 vmovhpd\t{%1, %0|%0, %1}"
3937 [(set_attr "type" "sselog,ssemov,ssemov")
3938 (set_attr "prefix" "vex")
3939 (set_attr "mode" "V2DF,V1DF,V1DF")])
3941 (define_insn "sse2_unpckhpd"
3942 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3945 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3946 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3947 (parallel [(const_int 1)
3949 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3951 unpckhpd\t{%2, %0|%0, %2}
3952 movlpd\t{%H1, %0|%0, %H1}
3953 movhpd\t{%1, %0|%0, %1}"
3954 [(set_attr "type" "sselog,ssemov,ssemov")
3955 (set_attr "mode" "V2DF,V1DF,V1DF")])
3957 (define_insn "avx_movddup256"
3958 [(set (match_operand:V4DF 0 "register_operand" "=x")
3961 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
3963 (parallel [(const_int 0) (const_int 2)
3964 (const_int 4) (const_int 6)])))]
3966 "vmovddup\t{%1, %0|%0, %1}"
3967 [(set_attr "type" "sselog1")
3968 (set_attr "prefix" "vex")
3969 (set_attr "mode" "V4DF")])
3971 (define_insn "*avx_movddup"
3972 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3975 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3977 (parallel [(const_int 0)
3979 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3981 vmovddup\t{%1, %0|%0, %1}
3983 [(set_attr "type" "sselog1,ssemov")
3984 (set_attr "prefix" "vex")
3985 (set_attr "mode" "V2DF")])
3987 (define_insn "*sse3_movddup"
3988 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3991 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3993 (parallel [(const_int 0)
3995 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3997 movddup\t{%1, %0|%0, %1}
3999 [(set_attr "type" "sselog1,ssemov")
4000 (set_attr "mode" "V2DF")])
4003 [(set (match_operand:V2DF 0 "memory_operand" "")
4006 (match_operand:V2DF 1 "register_operand" "")
4008 (parallel [(const_int 0)
4010 "TARGET_SSE3 && reload_completed"
4013 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4014 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4015 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4019 (define_insn "avx_unpcklpd256"
4020 [(set (match_operand:V4DF 0 "register_operand" "=x")
4023 (match_operand:V4DF 1 "register_operand" "x")
4024 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4025 (parallel [(const_int 0) (const_int 4)
4026 (const_int 2) (const_int 6)])))]
4028 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4029 [(set_attr "type" "sselog")
4030 (set_attr "prefix" "vex")
4031 (set_attr "mode" "V4DF")])
4033 (define_expand "sse2_unpcklpd_exp"
4034 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4037 (match_operand:V2DF 1 "nonimmediate_operand" "")
4038 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4039 (parallel [(const_int 0)
4042 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4044 (define_insn "*avx_unpcklpd"
4045 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4048 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4049 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4050 (parallel [(const_int 0)
4052 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4054 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4055 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4056 vmovlpd\t{%2, %H0|%H0, %2}"
4057 [(set_attr "type" "sselog,ssemov,ssemov")
4058 (set_attr "prefix" "vex")
4059 (set_attr "mode" "V2DF,V1DF,V1DF")])
4061 (define_insn "sse2_unpcklpd"
4062 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4065 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4066 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4067 (parallel [(const_int 0)
4069 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4071 unpcklpd\t{%2, %0|%0, %2}
4072 movhpd\t{%2, %0|%0, %2}
4073 movlpd\t{%2, %H0|%H0, %2}"
4074 [(set_attr "type" "sselog,ssemov,ssemov")
4075 (set_attr "mode" "V2DF,V1DF,V1DF")])
4077 (define_expand "avx_shufpd256"
4078 [(match_operand:V4DF 0 "register_operand" "")
4079 (match_operand:V4DF 1 "register_operand" "")
4080 (match_operand:V4DF 2 "nonimmediate_operand" "")
4081 (match_operand:SI 3 "const_int_operand" "")]
4084 int mask = INTVAL (operands[3]);
4085 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4087 GEN_INT (mask & 2 ? 5 : 4),
4088 GEN_INT (mask & 4 ? 3 : 2),
4089 GEN_INT (mask & 8 ? 7 : 6)));
4093 (define_insn "avx_shufpd256_1"
4094 [(set (match_operand:V4DF 0 "register_operand" "=x")
4097 (match_operand:V4DF 1 "register_operand" "x")
4098 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4099 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4100 (match_operand 4 "const_4_to_5_operand" "")
4101 (match_operand 5 "const_2_to_3_operand" "")
4102 (match_operand 6 "const_6_to_7_operand" "")])))]
4106 mask = INTVAL (operands[3]);
4107 mask |= (INTVAL (operands[4]) - 4) << 1;
4108 mask |= (INTVAL (operands[5]) - 2) << 2;
4109 mask |= (INTVAL (operands[6]) - 6) << 3;
4110 operands[3] = GEN_INT (mask);
4112 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4114 [(set_attr "type" "sselog")
4115 (set_attr "prefix" "vex")
4116 (set_attr "mode" "V4DF")])
4118 (define_expand "sse2_shufpd"
4119 [(match_operand:V2DF 0 "register_operand" "")
4120 (match_operand:V2DF 1 "register_operand" "")
4121 (match_operand:V2DF 2 "nonimmediate_operand" "")
4122 (match_operand:SI 3 "const_int_operand" "")]
4125 int mask = INTVAL (operands[3]);
4126 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4128 GEN_INT (mask & 2 ? 3 : 2)));
4132 (define_expand "vec_extract_even<mode>"
4133 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4134 (vec_select:SSEMODE4S
4135 (vec_concat:<ssedoublesizemode>
4136 (match_operand:SSEMODE4S 1 "register_operand" "")
4137 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4138 (parallel [(const_int 0)
4144 (define_expand "vec_extract_odd<mode>"
4145 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4146 (vec_select:SSEMODE4S
4147 (vec_concat:<ssedoublesizemode>
4148 (match_operand:SSEMODE4S 1 "register_operand" "")
4149 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4150 (parallel [(const_int 1)
4156 (define_expand "vec_extract_even<mode>"
4157 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4158 (vec_select:SSEMODE2D
4159 (vec_concat:<ssedoublesizemode>
4160 (match_operand:SSEMODE2D 1 "register_operand" "")
4161 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4162 (parallel [(const_int 0)
4166 (define_expand "vec_extract_odd<mode>"
4167 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4168 (vec_select:SSEMODE2D
4169 (vec_concat:<ssedoublesizemode>
4170 (match_operand:SSEMODE2D 1 "register_operand" "")
4171 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4172 (parallel [(const_int 1)
4176 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4177 (define_insn "*avx_punpckhqdq"
4178 [(set (match_operand:V2DI 0 "register_operand" "=x")
4181 (match_operand:V2DI 1 "register_operand" "x")
4182 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4183 (parallel [(const_int 1)
4186 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4187 [(set_attr "type" "sselog")
4188 (set_attr "prefix" "vex")
4189 (set_attr "mode" "TI")])
4191 (define_insn "sse2_punpckhqdq"
4192 [(set (match_operand:V2DI 0 "register_operand" "=x")
4195 (match_operand:V2DI 1 "register_operand" "0")
4196 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4197 (parallel [(const_int 1)
4200 "punpckhqdq\t{%2, %0|%0, %2}"
4201 [(set_attr "type" "sselog")
4202 (set_attr "prefix_data16" "1")
4203 (set_attr "mode" "TI")])
4205 (define_insn "*avx_punpcklqdq"
4206 [(set (match_operand:V2DI 0 "register_operand" "=x")
4209 (match_operand:V2DI 1 "register_operand" "x")
4210 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4211 (parallel [(const_int 0)
4214 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4215 [(set_attr "type" "sselog")
4216 (set_attr "prefix" "vex")
4217 (set_attr "mode" "TI")])
4219 (define_insn "sse2_punpcklqdq"
4220 [(set (match_operand:V2DI 0 "register_operand" "=x")
4223 (match_operand:V2DI 1 "register_operand" "0")
4224 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4225 (parallel [(const_int 0)
4228 "punpcklqdq\t{%2, %0|%0, %2}"
4229 [(set_attr "type" "sselog")
4230 (set_attr "prefix_data16" "1")
4231 (set_attr "mode" "TI")])
4233 (define_insn "*avx_shufpd_<mode>"
4234 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4235 (vec_select:SSEMODE2D
4236 (vec_concat:<ssedoublesizemode>
4237 (match_operand:SSEMODE2D 1 "register_operand" "x")
4238 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4239 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4240 (match_operand 4 "const_2_to_3_operand" "")])))]
4244 mask = INTVAL (operands[3]);
4245 mask |= (INTVAL (operands[4]) - 2) << 1;
4246 operands[3] = GEN_INT (mask);
4248 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4250 [(set_attr "type" "sselog")
4251 (set_attr "prefix" "vex")
4252 (set_attr "mode" "V2DF")])
4254 (define_insn "sse2_shufpd_<mode>"
4255 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4256 (vec_select:SSEMODE2D
4257 (vec_concat:<ssedoublesizemode>
4258 (match_operand:SSEMODE2D 1 "register_operand" "0")
4259 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4260 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4261 (match_operand 4 "const_2_to_3_operand" "")])))]
4265 mask = INTVAL (operands[3]);
4266 mask |= (INTVAL (operands[4]) - 2) << 1;
4267 operands[3] = GEN_INT (mask);
4269 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4271 [(set_attr "type" "sselog")
4272 (set_attr "mode" "V2DF")])
4274 ;; Avoid combining registers from different units in a single alternative,
4275 ;; see comment above inline_secondary_memory_needed function in i386.c
4276 (define_insn "*avx_storehpd"
4277 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4279 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4280 (parallel [(const_int 1)])))]
4281 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4283 vmovhpd\t{%1, %0|%0, %1}
4284 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4288 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4289 (set_attr "prefix" "vex")
4290 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4292 (define_insn "sse2_storehpd"
4293 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4295 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4296 (parallel [(const_int 1)])))]
4297 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4299 movhpd\t{%1, %0|%0, %1}
4304 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4305 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4308 [(set (match_operand:DF 0 "register_operand" "")
4310 (match_operand:V2DF 1 "memory_operand" "")
4311 (parallel [(const_int 1)])))]
4312 "TARGET_SSE2 && reload_completed"
4313 [(set (match_dup 0) (match_dup 1))]
4315 operands[1] = adjust_address (operands[1], DFmode, 8);
4318 ;; Avoid combining registers from different units in a single alternative,
4319 ;; see comment above inline_secondary_memory_needed function in i386.c
4320 (define_insn "sse2_storelpd"
4321 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4323 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4324 (parallel [(const_int 0)])))]
4325 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4327 %vmovlpd\t{%1, %0|%0, %1}
4332 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4333 (set_attr "prefix" "maybe_vex")
4334 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4337 [(set (match_operand:DF 0 "register_operand" "")
4339 (match_operand:V2DF 1 "nonimmediate_operand" "")
4340 (parallel [(const_int 0)])))]
4341 "TARGET_SSE2 && reload_completed"
4344 rtx op1 = operands[1];
4346 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4348 op1 = gen_lowpart (DFmode, op1);
4349 emit_move_insn (operands[0], op1);
4353 (define_expand "sse2_loadhpd_exp"
4354 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4357 (match_operand:V2DF 1 "nonimmediate_operand" "")
4358 (parallel [(const_int 0)]))
4359 (match_operand:DF 2 "nonimmediate_operand" "")))]
4361 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4363 ;; Avoid combining registers from different units in a single alternative,
4364 ;; see comment above inline_secondary_memory_needed function in i386.c
4365 (define_insn "*avx_loadhpd"
4366 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4369 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4370 (parallel [(const_int 0)]))
4371 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4372 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4374 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4375 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4379 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4380 (set_attr "prefix" "vex")
4381 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4383 (define_insn "sse2_loadhpd"
4384 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4387 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4388 (parallel [(const_int 0)]))
4389 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4390 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4392 movhpd\t{%2, %0|%0, %2}
4393 unpcklpd\t{%2, %0|%0, %2}
4394 shufpd\t{$1, %1, %0|%0, %1, 1}
4398 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4399 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4402 [(set (match_operand:V2DF 0 "memory_operand" "")
4404 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4405 (match_operand:DF 1 "register_operand" "")))]
4406 "TARGET_SSE2 && reload_completed"
4407 [(set (match_dup 0) (match_dup 1))]
4409 operands[0] = adjust_address (operands[0], DFmode, 8);
4412 (define_expand "sse2_loadlpd_exp"
4413 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4415 (match_operand:DF 2 "nonimmediate_operand" "")
4417 (match_operand:V2DF 1 "nonimmediate_operand" "")
4418 (parallel [(const_int 1)]))))]
4420 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4422 ;; Avoid combining registers from different units in a single alternative,
4423 ;; see comment above inline_secondary_memory_needed function in i386.c
4424 (define_insn "*avx_loadlpd"
4425 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4427 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4429 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4430 (parallel [(const_int 1)]))))]
4431 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4433 vmovsd\t{%2, %0|%0, %2}
4434 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4435 vmovsd\t{%2, %1, %0|%0, %1, %2}
4436 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4440 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4441 (set_attr "prefix" "vex")
4442 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4444 (define_insn "sse2_loadlpd"
4445 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4447 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4449 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4450 (parallel [(const_int 1)]))))]
4451 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4453 movsd\t{%2, %0|%0, %2}
4454 movlpd\t{%2, %0|%0, %2}
4455 movsd\t{%2, %0|%0, %2}
4456 shufpd\t{$2, %2, %0|%0, %2, 2}
4457 movhpd\t{%H1, %0|%0, %H1}
4461 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4462 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4465 [(set (match_operand:V2DF 0 "memory_operand" "")
4467 (match_operand:DF 1 "register_operand" "")
4468 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4469 "TARGET_SSE2 && reload_completed"
4470 [(set (match_dup 0) (match_dup 1))]
4472 operands[0] = adjust_address (operands[0], DFmode, 8);
4475 ;; Not sure these two are ever used, but it doesn't hurt to have
4477 (define_insn "*vec_extractv2df_1_sse"
4478 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4480 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4481 (parallel [(const_int 1)])))]
4482 "!TARGET_SSE2 && TARGET_SSE
4483 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4485 movhps\t{%1, %0|%0, %1}
4486 movhlps\t{%1, %0|%0, %1}
4487 movlps\t{%H1, %0|%0, %H1}"
4488 [(set_attr "type" "ssemov")
4489 (set_attr "mode" "V2SF,V4SF,V2SF")])
4491 (define_insn "*vec_extractv2df_0_sse"
4492 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4494 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4495 (parallel [(const_int 0)])))]
4496 "!TARGET_SSE2 && TARGET_SSE
4497 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4499 movlps\t{%1, %0|%0, %1}
4500 movaps\t{%1, %0|%0, %1}
4501 movlps\t{%1, %0|%0, %1}"
4502 [(set_attr "type" "ssemov")
4503 (set_attr "mode" "V2SF,V4SF,V2SF")])
4505 (define_insn "*avx_movsd"
4506 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4508 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4509 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4513 vmovsd\t{%2, %1, %0|%0, %1, %2}
4514 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4515 vmovlpd\t{%2, %0|%0, %2}
4516 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4517 vmovhps\t{%1, %H0|%H0, %1}"
4518 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4519 (set_attr "prefix" "vex")
4520 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4522 (define_insn "sse2_movsd"
4523 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4525 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4526 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4530 movsd\t{%2, %0|%0, %2}
4531 movlpd\t{%2, %0|%0, %2}
4532 movlpd\t{%2, %0|%0, %2}
4533 shufpd\t{$2, %2, %0|%0, %2, 2}
4534 movhps\t{%H1, %0|%0, %H1}
4535 movhps\t{%1, %H0|%H0, %1}"
4536 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4537 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4539 (define_insn "*vec_dupv2df_sse3"
4540 [(set (match_operand:V2DF 0 "register_operand" "=x")
4542 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4544 "%vmovddup\t{%1, %0|%0, %1}"
4545 [(set_attr "type" "sselog1")
4546 (set_attr "prefix" "maybe_vex")
4547 (set_attr "mode" "DF")])
4549 (define_insn "vec_dupv2df"
4550 [(set (match_operand:V2DF 0 "register_operand" "=x")
4552 (match_operand:DF 1 "register_operand" "0")))]
4555 [(set_attr "type" "sselog1")
4556 (set_attr "mode" "V2DF")])
4558 (define_insn "*vec_concatv2df_sse3"
4559 [(set (match_operand:V2DF 0 "register_operand" "=x")
4561 (match_operand:DF 1 "nonimmediate_operand" "xm")
4564 "%vmovddup\t{%1, %0|%0, %1}"
4565 [(set_attr "type" "sselog1")
4566 (set_attr "prefix" "maybe_vex")
4567 (set_attr "mode" "DF")])
4569 (define_insn "*vec_concatv2df_avx"
4570 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4572 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4573 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4576 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4577 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4578 vmovsd\t{%1, %0|%0, %1}"
4579 [(set_attr "type" "ssemov")
4580 (set_attr "prefix" "vex")
4581 (set_attr "mode" "DF,V1DF,DF")])
4583 (define_insn "*vec_concatv2df"
4584 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4586 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4587 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4590 unpcklpd\t{%2, %0|%0, %2}
4591 movhpd\t{%2, %0|%0, %2}
4592 movsd\t{%1, %0|%0, %1}
4593 movlhps\t{%2, %0|%0, %2}
4594 movhps\t{%2, %0|%0, %2}"
4595 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4596 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4598 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4600 ;; Parallel integral arithmetic
4602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4604 (define_expand "neg<mode>2"
4605 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4608 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4610 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4612 (define_expand "<plusminus_insn><mode>3"
4613 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4615 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4616 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4618 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4620 (define_insn "*avx_<plusminus_insn><mode>3"
4621 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4623 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4624 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4625 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4626 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4627 [(set_attr "type" "sseiadd")
4628 (set_attr "prefix" "vex")
4629 (set_attr "mode" "TI")])
4631 (define_insn "*<plusminus_insn><mode>3"
4632 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4634 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4635 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4636 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4637 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4638 [(set_attr "type" "sseiadd")
4639 (set_attr "prefix_data16" "1")
4640 (set_attr "mode" "TI")])
4642 (define_expand "sse2_<plusminus_insn><mode>3"
4643 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4644 (sat_plusminus:SSEMODE12
4645 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4646 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4648 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4650 (define_insn "*avx_<plusminus_insn><mode>3"
4651 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4652 (sat_plusminus:SSEMODE12
4653 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4654 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4655 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4656 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4657 [(set_attr "type" "sseiadd")
4658 (set_attr "prefix" "vex")
4659 (set_attr "mode" "TI")])
4661 (define_insn "*sse2_<plusminus_insn><mode>3"
4662 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4663 (sat_plusminus:SSEMODE12
4664 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4665 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4666 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4667 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4668 [(set_attr "type" "sseiadd")
4669 (set_attr "prefix_data16" "1")
4670 (set_attr "mode" "TI")])
4672 (define_insn_and_split "mulv16qi3"
4673 [(set (match_operand:V16QI 0 "register_operand" "")
4674 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4675 (match_operand:V16QI 2 "register_operand" "")))]
4677 && !(reload_completed || reload_in_progress)"
4682 rtx t[12], op0, op[3];
4687 /* On SSE5, we can take advantage of the pperm instruction to pack and
4688 unpack the bytes. Unpack data such that we've got a source byte in
4689 each low byte of each word. We don't care what goes into the high
4690 byte, so put 0 there. */
4691 for (i = 0; i < 6; ++i)
4692 t[i] = gen_reg_rtx (V8HImode);
4694 for (i = 0; i < 2; i++)
4697 op[1] = operands[i+1];
4698 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4701 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4704 /* Multiply words. */
4705 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4706 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4708 /* Pack the low byte of each word back into a single xmm */
4709 op[0] = operands[0];
4712 ix86_expand_sse5_pack (op);
4716 for (i = 0; i < 12; ++i)
4717 t[i] = gen_reg_rtx (V16QImode);
4719 /* Unpack data such that we've got a source byte in each low byte of
4720 each word. We don't care what goes into the high byte of each word.
4721 Rather than trying to get zero in there, most convenient is to let
4722 it be a copy of the low byte. */
4723 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4724 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4725 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4726 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4728 /* Multiply words. The end-of-line annotations here give a picture of what
4729 the output of that instruction looks like. Dot means don't care; the
4730 letters are the bytes of the result with A being the most significant. */
4731 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4732 gen_lowpart (V8HImode, t[0]),
4733 gen_lowpart (V8HImode, t[1])));
4734 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4735 gen_lowpart (V8HImode, t[2]),
4736 gen_lowpart (V8HImode, t[3])));
4738 /* Extract the relevant bytes and merge them back together. */
4739 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4740 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4741 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4742 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4743 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4744 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4747 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4751 (define_expand "mulv8hi3"
4752 [(set (match_operand:V8HI 0 "register_operand" "")
4753 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4754 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4756 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4758 (define_insn "*avx_mulv8hi3"
4759 [(set (match_operand:V8HI 0 "register_operand" "=x")
4760 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4761 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4762 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4763 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4764 [(set_attr "type" "sseimul")
4765 (set_attr "prefix" "vex")
4766 (set_attr "mode" "TI")])
4768 (define_insn "*mulv8hi3"
4769 [(set (match_operand:V8HI 0 "register_operand" "=x")
4770 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4771 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4772 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4773 "pmullw\t{%2, %0|%0, %2}"
4774 [(set_attr "type" "sseimul")
4775 (set_attr "prefix_data16" "1")
4776 (set_attr "mode" "TI")])
4778 (define_expand "smulv8hi3_highpart"
4779 [(set (match_operand:V8HI 0 "register_operand" "")
4784 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4786 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4789 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4791 (define_insn "*avxv8hi3_highpart"
4792 [(set (match_operand:V8HI 0 "register_operand" "=x")
4797 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4799 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4801 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4802 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4803 [(set_attr "type" "sseimul")
4804 (set_attr "prefix" "vex")
4805 (set_attr "mode" "TI")])
4807 (define_insn "*smulv8hi3_highpart"
4808 [(set (match_operand:V8HI 0 "register_operand" "=x")
4813 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4815 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4817 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4818 "pmulhw\t{%2, %0|%0, %2}"
4819 [(set_attr "type" "sseimul")
4820 (set_attr "prefix_data16" "1")
4821 (set_attr "mode" "TI")])
4823 (define_expand "umulv8hi3_highpart"
4824 [(set (match_operand:V8HI 0 "register_operand" "")
4829 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4831 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4834 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4836 (define_insn "*avx_umulv8hi3_highpart"
4837 [(set (match_operand:V8HI 0 "register_operand" "=x")
4842 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4844 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4846 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4847 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4848 [(set_attr "type" "sseimul")
4849 (set_attr "prefix" "vex")
4850 (set_attr "mode" "TI")])
4852 (define_insn "*umulv8hi3_highpart"
4853 [(set (match_operand:V8HI 0 "register_operand" "=x")
4858 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4860 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4862 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4863 "pmulhuw\t{%2, %0|%0, %2}"
4864 [(set_attr "type" "sseimul")
4865 (set_attr "prefix_data16" "1")
4866 (set_attr "mode" "TI")])
4868 (define_expand "sse2_umulv2siv2di3"
4869 [(set (match_operand:V2DI 0 "register_operand" "")
4873 (match_operand:V4SI 1 "nonimmediate_operand" "")
4874 (parallel [(const_int 0) (const_int 2)])))
4877 (match_operand:V4SI 2 "nonimmediate_operand" "")
4878 (parallel [(const_int 0) (const_int 2)])))))]
4880 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4882 (define_insn "*avx_umulv2siv2di3"
4883 [(set (match_operand:V2DI 0 "register_operand" "=x")
4887 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4888 (parallel [(const_int 0) (const_int 2)])))
4891 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4892 (parallel [(const_int 0) (const_int 2)])))))]
4893 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4894 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4895 [(set_attr "type" "sseimul")
4896 (set_attr "prefix" "vex")
4897 (set_attr "mode" "TI")])
4899 (define_insn "*sse2_umulv2siv2di3"
4900 [(set (match_operand:V2DI 0 "register_operand" "=x")
4904 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4905 (parallel [(const_int 0) (const_int 2)])))
4908 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4909 (parallel [(const_int 0) (const_int 2)])))))]
4910 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4911 "pmuludq\t{%2, %0|%0, %2}"
4912 [(set_attr "type" "sseimul")
4913 (set_attr "prefix_data16" "1")
4914 (set_attr "mode" "TI")])
4916 (define_expand "sse4_1_mulv2siv2di3"
4917 [(set (match_operand:V2DI 0 "register_operand" "")
4921 (match_operand:V4SI 1 "nonimmediate_operand" "")
4922 (parallel [(const_int 0) (const_int 2)])))
4925 (match_operand:V4SI 2 "nonimmediate_operand" "")
4926 (parallel [(const_int 0) (const_int 2)])))))]
4928 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4930 (define_insn "*avx_mulv2siv2di3"
4931 [(set (match_operand:V2DI 0 "register_operand" "=x")
4935 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4936 (parallel [(const_int 0) (const_int 2)])))
4939 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4940 (parallel [(const_int 0) (const_int 2)])))))]
4941 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4942 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4943 [(set_attr "type" "sseimul")
4944 (set_attr "prefix" "vex")
4945 (set_attr "mode" "TI")])
4947 (define_insn "*sse4_1_mulv2siv2di3"
4948 [(set (match_operand:V2DI 0 "register_operand" "=x")
4952 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4953 (parallel [(const_int 0) (const_int 2)])))
4956 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4957 (parallel [(const_int 0) (const_int 2)])))))]
4958 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4959 "pmuldq\t{%2, %0|%0, %2}"
4960 [(set_attr "type" "sseimul")
4961 (set_attr "prefix_extra" "1")
4962 (set_attr "mode" "TI")])
4964 (define_expand "sse2_pmaddwd"
4965 [(set (match_operand:V4SI 0 "register_operand" "")
4970 (match_operand:V8HI 1 "nonimmediate_operand" "")
4971 (parallel [(const_int 0)
4977 (match_operand:V8HI 2 "nonimmediate_operand" "")
4978 (parallel [(const_int 0)
4984 (vec_select:V4HI (match_dup 1)
4985 (parallel [(const_int 1)
4990 (vec_select:V4HI (match_dup 2)
4991 (parallel [(const_int 1)
4994 (const_int 7)]))))))]
4996 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4998 (define_insn "*avx_pmaddwd"
4999 [(set (match_operand:V4SI 0 "register_operand" "=x")
5004 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5005 (parallel [(const_int 0)
5011 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5012 (parallel [(const_int 0)
5018 (vec_select:V4HI (match_dup 1)
5019 (parallel [(const_int 1)
5024 (vec_select:V4HI (match_dup 2)
5025 (parallel [(const_int 1)
5028 (const_int 7)]))))))]
5029 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5030 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5031 [(set_attr "type" "sseiadd")
5032 (set_attr "prefix" "vex")
5033 (set_attr "mode" "TI")])
5035 (define_insn "*sse2_pmaddwd"
5036 [(set (match_operand:V4SI 0 "register_operand" "=x")
5041 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5042 (parallel [(const_int 0)
5048 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5049 (parallel [(const_int 0)
5055 (vec_select:V4HI (match_dup 1)
5056 (parallel [(const_int 1)
5061 (vec_select:V4HI (match_dup 2)
5062 (parallel [(const_int 1)
5065 (const_int 7)]))))))]
5066 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5067 "pmaddwd\t{%2, %0|%0, %2}"
5068 [(set_attr "type" "sseiadd")
5069 (set_attr "prefix_data16" "1")
5070 (set_attr "mode" "TI")])
5072 (define_expand "mulv4si3"
5073 [(set (match_operand:V4SI 0 "register_operand" "")
5074 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5075 (match_operand:V4SI 2 "register_operand" "")))]
5078 if (TARGET_SSE4_1 || TARGET_SSE5)
5079 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5082 (define_insn "*avx_mulv4si3"
5083 [(set (match_operand:V4SI 0 "register_operand" "=x")
5084 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5085 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5086 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5087 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5088 [(set_attr "type" "sseimul")
5089 (set_attr "prefix" "vex")
5090 (set_attr "mode" "TI")])
5092 (define_insn "*sse4_1_mulv4si3"
5093 [(set (match_operand:V4SI 0 "register_operand" "=x")
5094 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5095 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5096 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5097 "pmulld\t{%2, %0|%0, %2}"
5098 [(set_attr "type" "sseimul")
5099 (set_attr "prefix_extra" "1")
5100 (set_attr "mode" "TI")])
5102 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5103 ;; multiply/add. In general, we expect the define_split to occur before
5104 ;; register allocation, so we have to handle the corner case where the target
5105 ;; is the same as one of the inputs.
5106 (define_insn_and_split "*sse5_mulv4si3"
5107 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5108 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5109 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5112 "&& (reload_completed
5113 || (!reg_mentioned_p (operands[0], operands[1])
5114 && !reg_mentioned_p (operands[0], operands[2])))"
5118 (plus:V4SI (mult:V4SI (match_dup 1)
5122 operands[3] = CONST0_RTX (V4SImode);
5124 [(set_attr "type" "ssemuladd")
5125 (set_attr "mode" "TI")])
5127 (define_insn_and_split "*sse2_mulv4si3"
5128 [(set (match_operand:V4SI 0 "register_operand" "")
5129 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5130 (match_operand:V4SI 2 "register_operand" "")))]
5131 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5132 && !(reload_completed || reload_in_progress)"
5137 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5143 t1 = gen_reg_rtx (V4SImode);
5144 t2 = gen_reg_rtx (V4SImode);
5145 t3 = gen_reg_rtx (V4SImode);
5146 t4 = gen_reg_rtx (V4SImode);
5147 t5 = gen_reg_rtx (V4SImode);
5148 t6 = gen_reg_rtx (V4SImode);
5149 thirtytwo = GEN_INT (32);
5151 /* Multiply elements 2 and 0. */
5152 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5155 /* Shift both input vectors down one element, so that elements 3
5156 and 1 are now in the slots for elements 2 and 0. For K8, at
5157 least, this is faster than using a shuffle. */
5158 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5159 gen_lowpart (TImode, op1),
5161 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5162 gen_lowpart (TImode, op2),
5164 /* Multiply elements 3 and 1. */
5165 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5168 /* Move the results in element 2 down to element 1; we don't care
5169 what goes in elements 2 and 3. */
5170 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5171 const0_rtx, const0_rtx));
5172 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5173 const0_rtx, const0_rtx));
5175 /* Merge the parts back together. */
5176 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5180 (define_insn_and_split "mulv2di3"
5181 [(set (match_operand:V2DI 0 "register_operand" "")
5182 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5183 (match_operand:V2DI 2 "register_operand" "")))]
5185 && !(reload_completed || reload_in_progress)"
5190 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5195 /* op1: A,B,C,D, op2: E,F,G,H */
5197 op1 = gen_lowpart (V4SImode, operands[1]);
5198 op2 = gen_lowpart (V4SImode, operands[2]);
5199 t1 = gen_reg_rtx (V4SImode);
5200 t2 = gen_reg_rtx (V4SImode);
5201 t3 = gen_reg_rtx (V4SImode);
5202 t4 = gen_reg_rtx (V2DImode);
5203 t5 = gen_reg_rtx (V2DImode);
5206 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5213 emit_move_insn (t2, CONST0_RTX (V4SImode));
5215 /* t3: (B*E),(A*F),(D*G),(C*H) */
5216 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5218 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5219 emit_insn (gen_sse5_phadddq (t4, t3));
5221 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5222 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5224 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5225 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5232 t1 = gen_reg_rtx (V2DImode);
5233 t2 = gen_reg_rtx (V2DImode);
5234 t3 = gen_reg_rtx (V2DImode);
5235 t4 = gen_reg_rtx (V2DImode);
5236 t5 = gen_reg_rtx (V2DImode);
5237 t6 = gen_reg_rtx (V2DImode);
5238 thirtytwo = GEN_INT (32);
5240 /* Multiply low parts. */
5241 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5242 gen_lowpart (V4SImode, op2)));
5244 /* Shift input vectors left 32 bits so we can multiply high parts. */
5245 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5246 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5248 /* Multiply high parts by low parts. */
5249 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5250 gen_lowpart (V4SImode, t3)));
5251 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5252 gen_lowpart (V4SImode, t2)));
5254 /* Shift them back. */
5255 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5256 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5258 /* Add the three parts together. */
5259 emit_insn (gen_addv2di3 (t6, t1, t4));
5260 emit_insn (gen_addv2di3 (op0, t6, t5));
5264 (define_expand "vec_widen_smult_hi_v8hi"
5265 [(match_operand:V4SI 0 "register_operand" "")
5266 (match_operand:V8HI 1 "register_operand" "")
5267 (match_operand:V8HI 2 "register_operand" "")]
5270 rtx op1, op2, t1, t2, dest;
5274 t1 = gen_reg_rtx (V8HImode);
5275 t2 = gen_reg_rtx (V8HImode);
5276 dest = gen_lowpart (V8HImode, operands[0]);
5278 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5279 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5280 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5284 (define_expand "vec_widen_smult_lo_v8hi"
5285 [(match_operand:V4SI 0 "register_operand" "")
5286 (match_operand:V8HI 1 "register_operand" "")
5287 (match_operand:V8HI 2 "register_operand" "")]
5290 rtx op1, op2, t1, t2, dest;
5294 t1 = gen_reg_rtx (V8HImode);
5295 t2 = gen_reg_rtx (V8HImode);
5296 dest = gen_lowpart (V8HImode, operands[0]);
5298 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5299 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5300 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5304 (define_expand "vec_widen_umult_hi_v8hi"
5305 [(match_operand:V4SI 0 "register_operand" "")
5306 (match_operand:V8HI 1 "register_operand" "")
5307 (match_operand:V8HI 2 "register_operand" "")]
5310 rtx op1, op2, t1, t2, dest;
5314 t1 = gen_reg_rtx (V8HImode);
5315 t2 = gen_reg_rtx (V8HImode);
5316 dest = gen_lowpart (V8HImode, operands[0]);
5318 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5319 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5320 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5324 (define_expand "vec_widen_umult_lo_v8hi"
5325 [(match_operand:V4SI 0 "register_operand" "")
5326 (match_operand:V8HI 1 "register_operand" "")
5327 (match_operand:V8HI 2 "register_operand" "")]
5330 rtx op1, op2, t1, t2, dest;
5334 t1 = gen_reg_rtx (V8HImode);
5335 t2 = gen_reg_rtx (V8HImode);
5336 dest = gen_lowpart (V8HImode, operands[0]);
5338 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5339 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5340 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5344 (define_expand "vec_widen_smult_hi_v4si"
5345 [(match_operand:V2DI 0 "register_operand" "")
5346 (match_operand:V4SI 1 "register_operand" "")
5347 (match_operand:V4SI 2 "register_operand" "")]
5352 t1 = gen_reg_rtx (V4SImode);
5353 t2 = gen_reg_rtx (V4SImode);
5355 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5360 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5365 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5369 (define_expand "vec_widen_smult_lo_v4si"
5370 [(match_operand:V2DI 0 "register_operand" "")
5371 (match_operand:V4SI 1 "register_operand" "")
5372 (match_operand:V4SI 2 "register_operand" "")]
5377 t1 = gen_reg_rtx (V4SImode);
5378 t2 = gen_reg_rtx (V4SImode);
5380 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5385 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5390 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5395 (define_expand "vec_widen_umult_hi_v4si"
5396 [(match_operand:V2DI 0 "register_operand" "")
5397 (match_operand:V4SI 1 "register_operand" "")
5398 (match_operand:V4SI 2 "register_operand" "")]
5401 rtx op1, op2, t1, t2;
5405 t1 = gen_reg_rtx (V4SImode);
5406 t2 = gen_reg_rtx (V4SImode);
5408 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5409 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5410 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5414 (define_expand "vec_widen_umult_lo_v4si"
5415 [(match_operand:V2DI 0 "register_operand" "")
5416 (match_operand:V4SI 1 "register_operand" "")
5417 (match_operand:V4SI 2 "register_operand" "")]
5420 rtx op1, op2, t1, t2;
5424 t1 = gen_reg_rtx (V4SImode);
5425 t2 = gen_reg_rtx (V4SImode);
5427 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5428 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5429 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5433 (define_expand "sdot_prodv8hi"
5434 [(match_operand:V4SI 0 "register_operand" "")
5435 (match_operand:V8HI 1 "register_operand" "")
5436 (match_operand:V8HI 2 "register_operand" "")
5437 (match_operand:V4SI 3 "register_operand" "")]
5440 rtx t = gen_reg_rtx (V4SImode);
5441 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5442 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5446 (define_expand "udot_prodv4si"
5447 [(match_operand:V2DI 0 "register_operand" "")
5448 (match_operand:V4SI 1 "register_operand" "")
5449 (match_operand:V4SI 2 "register_operand" "")
5450 (match_operand:V2DI 3 "register_operand" "")]
5455 t1 = gen_reg_rtx (V2DImode);
5456 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5457 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5459 t2 = gen_reg_rtx (V4SImode);
5460 t3 = gen_reg_rtx (V4SImode);
5461 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5462 gen_lowpart (TImode, operands[1]),
5464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5465 gen_lowpart (TImode, operands[2]),
5468 t4 = gen_reg_rtx (V2DImode);
5469 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5471 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5475 (define_insn "*avx_ashr<mode>3"
5476 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5478 (match_operand:SSEMODE24 1 "register_operand" "x")
5479 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5481 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5482 [(set_attr "type" "sseishft")
5483 (set_attr "prefix" "vex")
5484 (set_attr "mode" "TI")])
5486 (define_insn "ashr<mode>3"
5487 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5489 (match_operand:SSEMODE24 1 "register_operand" "0")
5490 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5492 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5493 [(set_attr "type" "sseishft")
5494 (set_attr "prefix_data16" "1")
5495 (set_attr "mode" "TI")])
5497 (define_insn "*avx_lshr<mode>3"
5498 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5499 (lshiftrt:SSEMODE248
5500 (match_operand:SSEMODE248 1 "register_operand" "x")
5501 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5503 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5504 [(set_attr "type" "sseishft")
5505 (set_attr "prefix" "vex")
5506 (set_attr "mode" "TI")])
5508 (define_insn "lshr<mode>3"
5509 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5510 (lshiftrt:SSEMODE248
5511 (match_operand:SSEMODE248 1 "register_operand" "0")
5512 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5514 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5515 [(set_attr "type" "sseishft")
5516 (set_attr "prefix_data16" "1")
5517 (set_attr "mode" "TI")])
5519 (define_insn "*avx_ashl<mode>3"
5520 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5522 (match_operand:SSEMODE248 1 "register_operand" "x")
5523 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5525 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5526 [(set_attr "type" "sseishft")
5527 (set_attr "prefix" "vex")
5528 (set_attr "mode" "TI")])
5530 (define_insn "ashl<mode>3"
5531 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5533 (match_operand:SSEMODE248 1 "register_operand" "0")
5534 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5536 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5537 [(set_attr "type" "sseishft")
5538 (set_attr "prefix_data16" "1")
5539 (set_attr "mode" "TI")])
5541 (define_expand "vec_shl_<mode>"
5542 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5543 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5544 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5547 operands[0] = gen_lowpart (TImode, operands[0]);
5548 operands[1] = gen_lowpart (TImode, operands[1]);
5551 (define_expand "vec_shr_<mode>"
5552 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5553 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5554 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5557 operands[0] = gen_lowpart (TImode, operands[0]);
5558 operands[1] = gen_lowpart (TImode, operands[1]);
5561 (define_insn "*avx_<code><mode>3"
5562 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5564 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5565 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5566 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5567 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5568 [(set_attr "type" "sseiadd")
5569 (set_attr "prefix" "vex")
5570 (set_attr "mode" "TI")])
5572 (define_expand "<code>v16qi3"
5573 [(set (match_operand:V16QI 0 "register_operand" "")
5575 (match_operand:V16QI 1 "nonimmediate_operand" "")
5576 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5578 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5580 (define_insn "*<code>v16qi3"
5581 [(set (match_operand:V16QI 0 "register_operand" "=x")
5583 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5584 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5585 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5586 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5587 [(set_attr "type" "sseiadd")
5588 (set_attr "prefix_data16" "1")
5589 (set_attr "mode" "TI")])
5591 (define_expand "<code>v8hi3"
5592 [(set (match_operand:V8HI 0 "register_operand" "")
5594 (match_operand:V8HI 1 "nonimmediate_operand" "")
5595 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5597 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5599 (define_insn "*<code>v8hi3"
5600 [(set (match_operand:V8HI 0 "register_operand" "=x")
5602 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5603 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5604 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5605 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5606 [(set_attr "type" "sseiadd")
5607 (set_attr "prefix_data16" "1")
5608 (set_attr "mode" "TI")])
5610 (define_expand "umaxv8hi3"
5611 [(set (match_operand:V8HI 0 "register_operand" "")
5612 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5613 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5617 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5620 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5621 if (rtx_equal_p (op3, op2))
5622 op3 = gen_reg_rtx (V8HImode);
5623 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5624 emit_insn (gen_addv8hi3 (op0, op3, op2));
5629 (define_expand "smax<mode>3"
5630 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5631 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5632 (match_operand:SSEMODE14 2 "register_operand" "")))]
5636 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5642 xops[0] = operands[0];
5643 xops[1] = operands[1];
5644 xops[2] = operands[2];
5645 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5646 xops[4] = operands[1];
5647 xops[5] = operands[2];
5648 ok = ix86_expand_int_vcond (xops);
5654 (define_insn "*sse4_1_<code><mode>3"
5655 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5657 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5658 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5659 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5660 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5661 [(set_attr "type" "sseiadd")
5662 (set_attr "prefix_extra" "1")
5663 (set_attr "mode" "TI")])
5665 (define_expand "umaxv4si3"
5666 [(set (match_operand:V4SI 0 "register_operand" "")
5667 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5668 (match_operand:V4SI 2 "register_operand" "")))]
5672 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5678 xops[0] = operands[0];
5679 xops[1] = operands[1];
5680 xops[2] = operands[2];
5681 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5682 xops[4] = operands[1];
5683 xops[5] = operands[2];
5684 ok = ix86_expand_int_vcond (xops);
5690 (define_insn "*sse4_1_<code><mode>3"
5691 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5693 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5694 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5695 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5696 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5697 [(set_attr "type" "sseiadd")
5698 (set_attr "prefix_extra" "1")
5699 (set_attr "mode" "TI")])
5701 (define_expand "smin<mode>3"
5702 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5703 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5704 (match_operand:SSEMODE14 2 "register_operand" "")))]
5708 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5714 xops[0] = operands[0];
5715 xops[1] = operands[2];
5716 xops[2] = operands[1];
5717 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5718 xops[4] = operands[1];
5719 xops[5] = operands[2];
5720 ok = ix86_expand_int_vcond (xops);
5726 (define_expand "umin<mode>3"
5727 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5728 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5729 (match_operand:SSEMODE24 2 "register_operand" "")))]
5733 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5739 xops[0] = operands[0];
5740 xops[1] = operands[2];
5741 xops[2] = operands[1];
5742 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5743 xops[4] = operands[1];
5744 xops[5] = operands[2];
5745 ok = ix86_expand_int_vcond (xops);
5751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5753 ;; Parallel integral comparisons
5755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5757 (define_expand "sse2_eq<mode>3"
5758 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5760 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5761 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5762 "TARGET_SSE2 && !TARGET_SSE5"
5763 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5765 (define_insn "*avx_eq<mode>3"
5766 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5768 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5769 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5770 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5771 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5772 [(set_attr "type" "ssecmp")
5773 (set_attr "prefix" "vex")
5774 (set_attr "mode" "TI")])
5776 (define_insn "*sse2_eq<mode>3"
5777 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5779 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5780 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5781 "TARGET_SSE2 && !TARGET_SSE5
5782 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5783 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5784 [(set_attr "type" "ssecmp")
5785 (set_attr "prefix_data16" "1")
5786 (set_attr "mode" "TI")])
5788 (define_expand "sse4_1_eqv2di3"
5789 [(set (match_operand:V2DI 0 "register_operand" "")
5791 (match_operand:V2DI 1 "nonimmediate_operand" "")
5792 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5794 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5796 (define_insn "*sse4_1_eqv2di3"
5797 [(set (match_operand:V2DI 0 "register_operand" "=x")
5799 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5800 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5801 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5802 "pcmpeqq\t{%2, %0|%0, %2}"
5803 [(set_attr "type" "ssecmp")
5804 (set_attr "prefix_extra" "1")
5805 (set_attr "mode" "TI")])
5807 (define_insn "*avx_gt<mode>3"
5808 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5810 (match_operand:SSEMODE1248 1 "register_operand" "x")
5811 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5813 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5814 [(set_attr "type" "ssecmp")
5815 (set_attr "prefix" "vex")
5816 (set_attr "mode" "TI")])
5818 (define_insn "sse2_gt<mode>3"
5819 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5821 (match_operand:SSEMODE124 1 "register_operand" "0")
5822 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5823 "TARGET_SSE2 && !TARGET_SSE5"
5824 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5825 [(set_attr "type" "ssecmp")
5826 (set_attr "prefix_data16" "1")
5827 (set_attr "mode" "TI")])
5829 (define_insn "sse4_2_gtv2di3"
5830 [(set (match_operand:V2DI 0 "register_operand" "=x")
5832 (match_operand:V2DI 1 "register_operand" "0")
5833 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5835 "pcmpgtq\t{%2, %0|%0, %2}"
5836 [(set_attr "type" "ssecmp")
5837 (set_attr "mode" "TI")])
5839 (define_expand "vcond<mode>"
5840 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5841 (if_then_else:SSEMODEI
5842 (match_operator 3 ""
5843 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
5844 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
5845 (match_operand:SSEMODEI 1 "general_operand" "")
5846 (match_operand:SSEMODEI 2 "general_operand" "")))]
5849 if (ix86_expand_int_vcond (operands))
5855 (define_expand "vcondu<mode>"
5856 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5857 (if_then_else:SSEMODEI
5858 (match_operator 3 ""
5859 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
5860 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
5861 (match_operand:SSEMODEI 1 "general_operand" "")
5862 (match_operand:SSEMODEI 2 "general_operand" "")))]
5865 if (ix86_expand_int_vcond (operands))
5871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5873 ;; Parallel bitwise logical operations
5875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5877 (define_expand "one_cmpl<mode>2"
5878 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5879 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5883 int i, n = GET_MODE_NUNITS (<MODE>mode);
5884 rtvec v = rtvec_alloc (n);
5886 for (i = 0; i < n; ++i)
5887 RTVEC_ELT (v, i) = constm1_rtx;
5889 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5892 (define_insn "*avx_andnot<mode>3"
5893 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5895 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5896 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5898 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5899 [(set_attr "type" "sselog")
5900 (set_attr "prefix" "vex")
5901 (set_attr "mode" "<avxvecpsmode>")])
5903 (define_insn "*sse_andnot<mode>3"
5904 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5906 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5907 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5908 "(TARGET_SSE && !TARGET_SSE2)"
5909 "andnps\t{%2, %0|%0, %2}"
5910 [(set_attr "type" "sselog")
5911 (set_attr "mode" "V4SF")])
5913 (define_insn "*avx_andnot<mode>3"
5914 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5916 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5917 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5919 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5920 [(set_attr "type" "sselog")
5921 (set_attr "prefix" "vex")
5922 (set_attr "mode" "TI")])
5924 (define_insn "sse2_andnot<mode>3"
5925 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5927 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5928 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5930 "pandn\t{%2, %0|%0, %2}"
5931 [(set_attr "type" "sselog")
5932 (set_attr "prefix_data16" "1")
5933 (set_attr "mode" "TI")])
5935 (define_insn "*andnottf3"
5936 [(set (match_operand:TF 0 "register_operand" "=x")
5938 (not:TF (match_operand:TF 1 "register_operand" "0"))
5939 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5941 "pandn\t{%2, %0|%0, %2}"
5942 [(set_attr "type" "sselog")
5943 (set_attr "prefix_data16" "1")
5944 (set_attr "mode" "TI")])
5946 (define_expand "<code><mode>3"
5947 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5949 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5950 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5952 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5954 (define_insn "*avx_<code><mode>3"
5955 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5957 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5958 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5960 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5961 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5962 [(set_attr "type" "sselog")
5963 (set_attr "prefix" "vex")
5964 (set_attr "mode" "<avxvecpsmode>")])
5966 (define_insn "*sse_<code><mode>3"
5967 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5969 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5970 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5971 "(TARGET_SSE && !TARGET_SSE2)
5972 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5973 "<plogicprefix>ps\t{%2, %0|%0, %2}"
5974 [(set_attr "type" "sselog")
5975 (set_attr "mode" "V4SF")])
5977 (define_insn "*avx_<code><mode>3"
5978 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5980 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5981 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5983 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5984 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
5985 [(set_attr "type" "sselog")
5986 (set_attr "prefix" "vex")
5987 (set_attr "mode" "TI")])
5989 (define_insn "*sse2_<code><mode>3"
5990 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5992 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5993 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5994 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5995 "p<plogicprefix>\t{%2, %0|%0, %2}"
5996 [(set_attr "type" "sselog")
5997 (set_attr "prefix_data16" "1")
5998 (set_attr "mode" "TI")])
6000 (define_expand "<code>tf3"
6001 [(set (match_operand:TF 0 "register_operand" "")
6003 (match_operand:TF 1 "nonimmediate_operand" "")
6004 (match_operand:TF 2 "nonimmediate_operand" "")))]
6006 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6008 (define_insn "*<code>tf3"
6009 [(set (match_operand:TF 0 "register_operand" "=x")
6011 (match_operand:TF 1 "nonimmediate_operand" "%0")
6012 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6013 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6014 "p<plogicprefix>\t{%2, %0|%0, %2}"
6015 [(set_attr "type" "sselog")
6016 (set_attr "prefix_data16" "1")
6017 (set_attr "mode" "TI")])
6019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6021 ;; Parallel integral element swizzling
6023 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6026 ;; op1 = abcdefghijklmnop
6027 ;; op2 = qrstuvwxyz012345
6028 ;; h1 = aqbrcsdteufvgwhx
6029 ;; l1 = iyjzk0l1m2n3o4p5
6030 ;; h2 = aiqybjrzcks0dlt1
6031 ;; l2 = emu2fnv3gow4hpx5
6032 ;; h3 = aeimquy2bfjnrvz3
6033 ;; l3 = cgkosw04dhlptx15
6034 ;; result = bdfhjlnprtvxz135
6035 (define_expand "vec_pack_trunc_v8hi"
6036 [(match_operand:V16QI 0 "register_operand" "")
6037 (match_operand:V8HI 1 "register_operand" "")
6038 (match_operand:V8HI 2 "register_operand" "")]
6041 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6045 ix86_expand_sse5_pack (operands);
6049 op1 = gen_lowpart (V16QImode, operands[1]);
6050 op2 = gen_lowpart (V16QImode, operands[2]);
6051 h1 = gen_reg_rtx (V16QImode);
6052 l1 = gen_reg_rtx (V16QImode);
6053 h2 = gen_reg_rtx (V16QImode);
6054 l2 = gen_reg_rtx (V16QImode);
6055 h3 = gen_reg_rtx (V16QImode);
6056 l3 = gen_reg_rtx (V16QImode);
6058 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6059 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6060 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6061 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6062 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6063 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6064 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6075 ;; result = bdfhjlnp
6076 (define_expand "vec_pack_trunc_v4si"
6077 [(match_operand:V8HI 0 "register_operand" "")
6078 (match_operand:V4SI 1 "register_operand" "")
6079 (match_operand:V4SI 2 "register_operand" "")]
6082 rtx op1, op2, h1, l1, h2, l2;
6086 ix86_expand_sse5_pack (operands);
6090 op1 = gen_lowpart (V8HImode, operands[1]);
6091 op2 = gen_lowpart (V8HImode, operands[2]);
6092 h1 = gen_reg_rtx (V8HImode);
6093 l1 = gen_reg_rtx (V8HImode);
6094 h2 = gen_reg_rtx (V8HImode);
6095 l2 = gen_reg_rtx (V8HImode);
6097 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6098 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6099 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6100 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6101 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6111 (define_expand "vec_pack_trunc_v2di"
6112 [(match_operand:V4SI 0 "register_operand" "")
6113 (match_operand:V2DI 1 "register_operand" "")
6114 (match_operand:V2DI 2 "register_operand" "")]
6117 rtx op1, op2, h1, l1;
6121 ix86_expand_sse5_pack (operands);
6125 op1 = gen_lowpart (V4SImode, operands[1]);
6126 op2 = gen_lowpart (V4SImode, operands[2]);
6127 h1 = gen_reg_rtx (V4SImode);
6128 l1 = gen_reg_rtx (V4SImode);
6130 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6131 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6132 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6136 (define_expand "vec_interleave_highv16qi"
6137 [(set (match_operand:V16QI 0 "register_operand" "")
6140 (match_operand:V16QI 1 "register_operand" "")
6141 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6142 (parallel [(const_int 8) (const_int 24)
6143 (const_int 9) (const_int 25)
6144 (const_int 10) (const_int 26)
6145 (const_int 11) (const_int 27)
6146 (const_int 12) (const_int 28)
6147 (const_int 13) (const_int 29)
6148 (const_int 14) (const_int 30)
6149 (const_int 15) (const_int 31)])))]
6152 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6156 (define_expand "vec_interleave_lowv16qi"
6157 [(set (match_operand:V16QI 0 "register_operand" "")
6160 (match_operand:V16QI 1 "register_operand" "")
6161 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6162 (parallel [(const_int 0) (const_int 16)
6163 (const_int 1) (const_int 17)
6164 (const_int 2) (const_int 18)
6165 (const_int 3) (const_int 19)
6166 (const_int 4) (const_int 20)
6167 (const_int 5) (const_int 21)
6168 (const_int 6) (const_int 22)
6169 (const_int 7) (const_int 23)])))]
6172 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6176 (define_expand "vec_interleave_highv8hi"
6177 [(set (match_operand:V8HI 0 "register_operand" "=")
6180 (match_operand:V8HI 1 "register_operand" "")
6181 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6182 (parallel [(const_int 4) (const_int 12)
6183 (const_int 5) (const_int 13)
6184 (const_int 6) (const_int 14)
6185 (const_int 7) (const_int 15)])))]
6188 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6192 (define_expand "vec_interleave_lowv8hi"
6193 [(set (match_operand:V8HI 0 "register_operand" "")
6196 (match_operand:V8HI 1 "register_operand" "")
6197 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6198 (parallel [(const_int 0) (const_int 8)
6199 (const_int 1) (const_int 9)
6200 (const_int 2) (const_int 10)
6201 (const_int 3) (const_int 11)])))]
6204 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6208 (define_expand "vec_interleave_highv4si"
6209 [(set (match_operand:V4SI 0 "register_operand" "")
6212 (match_operand:V4SI 1 "register_operand" "")
6213 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6214 (parallel [(const_int 2) (const_int 6)
6215 (const_int 3) (const_int 7)])))]
6218 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6222 (define_expand "vec_interleave_lowv4si"
6223 [(set (match_operand:V4SI 0 "register_operand" "")
6226 (match_operand:V4SI 1 "register_operand" "")
6227 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6228 (parallel [(const_int 0) (const_int 4)
6229 (const_int 1) (const_int 5)])))]
6232 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6236 (define_expand "vec_interleave_highv2di"
6237 [(set (match_operand:V2DI 0 "register_operand" "")
6240 (match_operand:V2DI 1 "register_operand" "")
6241 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6242 (parallel [(const_int 1)
6246 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6250 (define_expand "vec_interleave_lowv2di"
6251 [(set (match_operand:V2DI 0 "register_operand" "")
6254 (match_operand:V2DI 1 "register_operand" "")
6255 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6256 (parallel [(const_int 0)
6260 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6264 (define_expand "vec_interleave_highv4sf"
6265 [(set (match_operand:V4SF 0 "register_operand" "")
6268 (match_operand:V4SF 1 "register_operand" "")
6269 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6270 (parallel [(const_int 2) (const_int 6)
6271 (const_int 3) (const_int 7)])))]
6274 (define_expand "vec_interleave_lowv4sf"
6275 [(set (match_operand:V4SF 0 "register_operand" "")
6278 (match_operand:V4SF 1 "register_operand" "")
6279 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6280 (parallel [(const_int 0) (const_int 4)
6281 (const_int 1) (const_int 5)])))]
6284 (define_expand "vec_interleave_highv2df"
6285 [(set (match_operand:V2DF 0 "register_operand" "")
6288 (match_operand:V2DF 1 "register_operand" "")
6289 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6290 (parallel [(const_int 1)
6294 (define_expand "vec_interleave_lowv2df"
6295 [(set (match_operand:V2DF 0 "register_operand" "")
6298 (match_operand:V2DF 1 "register_operand" "")
6299 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6300 (parallel [(const_int 0)
6304 (define_insn "*avx_packsswb"
6305 [(set (match_operand:V16QI 0 "register_operand" "=x")
6308 (match_operand:V8HI 1 "register_operand" "x"))
6310 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6312 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6313 [(set_attr "type" "sselog")
6314 (set_attr "prefix" "vex")
6315 (set_attr "mode" "TI")])
6317 (define_insn "sse2_packsswb"
6318 [(set (match_operand:V16QI 0 "register_operand" "=x")
6321 (match_operand:V8HI 1 "register_operand" "0"))
6323 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6325 "packsswb\t{%2, %0|%0, %2}"
6326 [(set_attr "type" "sselog")
6327 (set_attr "prefix_data16" "1")
6328 (set_attr "mode" "TI")])
6330 (define_insn "*avx_packssdw"
6331 [(set (match_operand:V8HI 0 "register_operand" "=x")
6334 (match_operand:V4SI 1 "register_operand" "x"))
6336 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6338 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6339 [(set_attr "type" "sselog")
6340 (set_attr "prefix" "vex")
6341 (set_attr "mode" "TI")])
6343 (define_insn "sse2_packssdw"
6344 [(set (match_operand:V8HI 0 "register_operand" "=x")
6347 (match_operand:V4SI 1 "register_operand" "0"))
6349 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6351 "packssdw\t{%2, %0|%0, %2}"
6352 [(set_attr "type" "sselog")
6353 (set_attr "prefix_data16" "1")
6354 (set_attr "mode" "TI")])
6356 (define_insn "*avx_packuswb"
6357 [(set (match_operand:V16QI 0 "register_operand" "=x")
6360 (match_operand:V8HI 1 "register_operand" "x"))
6362 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6364 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6365 [(set_attr "type" "sselog")
6366 (set_attr "prefix" "vex")
6367 (set_attr "mode" "TI")])
6369 (define_insn "sse2_packuswb"
6370 [(set (match_operand:V16QI 0 "register_operand" "=x")
6373 (match_operand:V8HI 1 "register_operand" "0"))
6375 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6377 "packuswb\t{%2, %0|%0, %2}"
6378 [(set_attr "type" "sselog")
6379 (set_attr "prefix_data16" "1")
6380 (set_attr "mode" "TI")])
6382 (define_insn "*avx_punpckhbw"
6383 [(set (match_operand:V16QI 0 "register_operand" "=x")
6386 (match_operand:V16QI 1 "register_operand" "x")
6387 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6388 (parallel [(const_int 8) (const_int 24)
6389 (const_int 9) (const_int 25)
6390 (const_int 10) (const_int 26)
6391 (const_int 11) (const_int 27)
6392 (const_int 12) (const_int 28)
6393 (const_int 13) (const_int 29)
6394 (const_int 14) (const_int 30)
6395 (const_int 15) (const_int 31)])))]
6397 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6398 [(set_attr "type" "sselog")
6399 (set_attr "prefix" "vex")
6400 (set_attr "mode" "TI")])
6402 (define_insn "sse2_punpckhbw"
6403 [(set (match_operand:V16QI 0 "register_operand" "=x")
6406 (match_operand:V16QI 1 "register_operand" "0")
6407 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6408 (parallel [(const_int 8) (const_int 24)
6409 (const_int 9) (const_int 25)
6410 (const_int 10) (const_int 26)
6411 (const_int 11) (const_int 27)
6412 (const_int 12) (const_int 28)
6413 (const_int 13) (const_int 29)
6414 (const_int 14) (const_int 30)
6415 (const_int 15) (const_int 31)])))]
6417 "punpckhbw\t{%2, %0|%0, %2}"
6418 [(set_attr "type" "sselog")
6419 (set_attr "prefix_data16" "1")
6420 (set_attr "mode" "TI")])
6422 (define_insn "*avx_punpcklbw"
6423 [(set (match_operand:V16QI 0 "register_operand" "=x")
6426 (match_operand:V16QI 1 "register_operand" "x")
6427 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6428 (parallel [(const_int 0) (const_int 16)
6429 (const_int 1) (const_int 17)
6430 (const_int 2) (const_int 18)
6431 (const_int 3) (const_int 19)
6432 (const_int 4) (const_int 20)
6433 (const_int 5) (const_int 21)
6434 (const_int 6) (const_int 22)
6435 (const_int 7) (const_int 23)])))]
6437 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6438 [(set_attr "type" "sselog")
6439 (set_attr "prefix" "vex")
6440 (set_attr "mode" "TI")])
6442 (define_insn "sse2_punpcklbw"
6443 [(set (match_operand:V16QI 0 "register_operand" "=x")
6446 (match_operand:V16QI 1 "register_operand" "0")
6447 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6448 (parallel [(const_int 0) (const_int 16)
6449 (const_int 1) (const_int 17)
6450 (const_int 2) (const_int 18)
6451 (const_int 3) (const_int 19)
6452 (const_int 4) (const_int 20)
6453 (const_int 5) (const_int 21)
6454 (const_int 6) (const_int 22)
6455 (const_int 7) (const_int 23)])))]
6457 "punpcklbw\t{%2, %0|%0, %2}"
6458 [(set_attr "type" "sselog")
6459 (set_attr "prefix_data16" "1")
6460 (set_attr "mode" "TI")])
6462 (define_insn "*avx_punpckhwd"
6463 [(set (match_operand:V8HI 0 "register_operand" "=x")
6466 (match_operand:V8HI 1 "register_operand" "x")
6467 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6468 (parallel [(const_int 4) (const_int 12)
6469 (const_int 5) (const_int 13)
6470 (const_int 6) (const_int 14)
6471 (const_int 7) (const_int 15)])))]
6473 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6474 [(set_attr "type" "sselog")
6475 (set_attr "prefix" "vex")
6476 (set_attr "mode" "TI")])
6478 (define_insn "sse2_punpckhwd"
6479 [(set (match_operand:V8HI 0 "register_operand" "=x")
6482 (match_operand:V8HI 1 "register_operand" "0")
6483 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6484 (parallel [(const_int 4) (const_int 12)
6485 (const_int 5) (const_int 13)
6486 (const_int 6) (const_int 14)
6487 (const_int 7) (const_int 15)])))]
6489 "punpckhwd\t{%2, %0|%0, %2}"
6490 [(set_attr "type" "sselog")
6491 (set_attr "prefix_data16" "1")
6492 (set_attr "mode" "TI")])
6494 (define_insn "*avx_punpcklwd"
6495 [(set (match_operand:V8HI 0 "register_operand" "=x")
6498 (match_operand:V8HI 1 "register_operand" "x")
6499 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6500 (parallel [(const_int 0) (const_int 8)
6501 (const_int 1) (const_int 9)
6502 (const_int 2) (const_int 10)
6503 (const_int 3) (const_int 11)])))]
6505 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6506 [(set_attr "type" "sselog")
6507 (set_attr "prefix" "vex")
6508 (set_attr "mode" "TI")])
6510 (define_insn "sse2_punpcklwd"
6511 [(set (match_operand:V8HI 0 "register_operand" "=x")
6514 (match_operand:V8HI 1 "register_operand" "0")
6515 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6516 (parallel [(const_int 0) (const_int 8)
6517 (const_int 1) (const_int 9)
6518 (const_int 2) (const_int 10)
6519 (const_int 3) (const_int 11)])))]
6521 "punpcklwd\t{%2, %0|%0, %2}"
6522 [(set_attr "type" "sselog")
6523 (set_attr "prefix_data16" "1")
6524 (set_attr "mode" "TI")])
6526 (define_insn "*avx_punpckhdq"
6527 [(set (match_operand:V4SI 0 "register_operand" "=x")
6530 (match_operand:V4SI 1 "register_operand" "x")
6531 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6532 (parallel [(const_int 2) (const_int 6)
6533 (const_int 3) (const_int 7)])))]
6535 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6536 [(set_attr "type" "sselog")
6537 (set_attr "prefix" "vex")
6538 (set_attr "mode" "TI")])
6540 (define_insn "sse2_punpckhdq"
6541 [(set (match_operand:V4SI 0 "register_operand" "=x")
6544 (match_operand:V4SI 1 "register_operand" "0")
6545 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6546 (parallel [(const_int 2) (const_int 6)
6547 (const_int 3) (const_int 7)])))]
6549 "punpckhdq\t{%2, %0|%0, %2}"
6550 [(set_attr "type" "sselog")
6551 (set_attr "prefix_data16" "1")
6552 (set_attr "mode" "TI")])
6554 (define_insn "*avx_punpckldq"
6555 [(set (match_operand:V4SI 0 "register_operand" "=x")
6558 (match_operand:V4SI 1 "register_operand" "x")
6559 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6560 (parallel [(const_int 0) (const_int 4)
6561 (const_int 1) (const_int 5)])))]
6563 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6564 [(set_attr "type" "sselog")
6565 (set_attr "prefix" "vex")
6566 (set_attr "mode" "TI")])
6568 (define_insn "sse2_punpckldq"
6569 [(set (match_operand:V4SI 0 "register_operand" "=x")
6572 (match_operand:V4SI 1 "register_operand" "0")
6573 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6574 (parallel [(const_int 0) (const_int 4)
6575 (const_int 1) (const_int 5)])))]
6577 "punpckldq\t{%2, %0|%0, %2}"
6578 [(set_attr "type" "sselog")
6579 (set_attr "prefix_data16" "1")
6580 (set_attr "mode" "TI")])
6582 (define_insn "*avx_pinsr<avxmodesuffixs>"
6583 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6584 (vec_merge:SSEMODE124
6585 (vec_duplicate:SSEMODE124
6586 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6587 (match_operand:SSEMODE124 1 "register_operand" "x")
6588 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6591 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6592 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6594 [(set_attr "type" "sselog")
6595 (set_attr "prefix" "vex")
6596 (set_attr "mode" "TI")])
6598 (define_insn "*sse4_1_pinsrb"
6599 [(set (match_operand:V16QI 0 "register_operand" "=x")
6601 (vec_duplicate:V16QI
6602 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6603 (match_operand:V16QI 1 "register_operand" "0")
6604 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6607 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6608 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6610 [(set_attr "type" "sselog")
6611 (set_attr "prefix_extra" "1")
6612 (set_attr "mode" "TI")])
6614 (define_insn "*sse2_pinsrw"
6615 [(set (match_operand:V8HI 0 "register_operand" "=x")
6618 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6619 (match_operand:V8HI 1 "register_operand" "0")
6620 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6623 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6624 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6626 [(set_attr "type" "sselog")
6627 (set_attr "prefix_data16" "1")
6628 (set_attr "mode" "TI")])
6630 ;; It must come before sse2_loadld since it is preferred.
6631 (define_insn "*sse4_1_pinsrd"
6632 [(set (match_operand:V4SI 0 "register_operand" "=x")
6635 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6636 (match_operand:V4SI 1 "register_operand" "0")
6637 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6640 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6641 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6643 [(set_attr "type" "sselog")
6644 (set_attr "prefix_extra" "1")
6645 (set_attr "mode" "TI")])
6647 (define_insn "*avx_pinsrq"
6648 [(set (match_operand:V2DI 0 "register_operand" "=x")
6651 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6652 (match_operand:V2DI 1 "register_operand" "x")
6653 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6654 "TARGET_AVX && TARGET_64BIT"
6656 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6657 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6659 [(set_attr "type" "sselog")
6660 (set_attr "prefix" "vex")
6661 (set_attr "mode" "TI")])
6663 (define_insn "*sse4_1_pinsrq"
6664 [(set (match_operand:V2DI 0 "register_operand" "=x")
6667 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6668 (match_operand:V2DI 1 "register_operand" "0")
6669 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6670 "TARGET_SSE4_1 && TARGET_64BIT"
6672 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6673 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6675 [(set_attr "type" "sselog")
6676 (set_attr "prefix_extra" "1")
6677 (set_attr "mode" "TI")])
6679 (define_insn "*sse4_1_pextrb"
6680 [(set (match_operand:SI 0 "register_operand" "=r")
6683 (match_operand:V16QI 1 "register_operand" "x")
6684 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6686 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6687 [(set_attr "type" "sselog")
6688 (set_attr "prefix_extra" "1")
6689 (set_attr "prefix" "maybe_vex")
6690 (set_attr "mode" "TI")])
6692 (define_insn "*sse4_1_pextrb_memory"
6693 [(set (match_operand:QI 0 "memory_operand" "=m")
6695 (match_operand:V16QI 1 "register_operand" "x")
6696 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6698 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6699 [(set_attr "type" "sselog")
6700 (set_attr "prefix_extra" "1")
6701 (set_attr "prefix" "maybe_vex")
6702 (set_attr "mode" "TI")])
6704 (define_insn "*sse2_pextrw"
6705 [(set (match_operand:SI 0 "register_operand" "=r")
6708 (match_operand:V8HI 1 "register_operand" "x")
6709 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6711 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6712 [(set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1")
6714 (set_attr "prefix" "maybe_vex")
6715 (set_attr "mode" "TI")])
6717 (define_insn "*sse4_1_pextrw_memory"
6718 [(set (match_operand:HI 0 "memory_operand" "=m")
6720 (match_operand:V8HI 1 "register_operand" "x")
6721 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6723 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6724 [(set_attr "type" "sselog")
6725 (set_attr "prefix_extra" "1")
6726 (set_attr "prefix" "maybe_vex")
6727 (set_attr "mode" "TI")])
6729 (define_insn "*sse4_1_pextrd"
6730 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6732 (match_operand:V4SI 1 "register_operand" "x")
6733 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6735 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6736 [(set_attr "type" "sselog")
6737 (set_attr "prefix_extra" "1")
6738 (set_attr "prefix" "maybe_vex")
6739 (set_attr "mode" "TI")])
6741 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6742 (define_insn "*sse4_1_pextrq"
6743 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6745 (match_operand:V2DI 1 "register_operand" "x")
6746 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6747 "TARGET_SSE4_1 && TARGET_64BIT"
6748 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6749 [(set_attr "type" "sselog")
6750 (set_attr "prefix_extra" "1")
6751 (set_attr "prefix" "maybe_vex")
6752 (set_attr "mode" "TI")])
6754 (define_expand "sse2_pshufd"
6755 [(match_operand:V4SI 0 "register_operand" "")
6756 (match_operand:V4SI 1 "nonimmediate_operand" "")
6757 (match_operand:SI 2 "const_int_operand" "")]
6760 int mask = INTVAL (operands[2]);
6761 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6762 GEN_INT ((mask >> 0) & 3),
6763 GEN_INT ((mask >> 2) & 3),
6764 GEN_INT ((mask >> 4) & 3),
6765 GEN_INT ((mask >> 6) & 3)));
6769 (define_insn "sse2_pshufd_1"
6770 [(set (match_operand:V4SI 0 "register_operand" "=x")
6772 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6773 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6774 (match_operand 3 "const_0_to_3_operand" "")
6775 (match_operand 4 "const_0_to_3_operand" "")
6776 (match_operand 5 "const_0_to_3_operand" "")])))]
6780 mask |= INTVAL (operands[2]) << 0;
6781 mask |= INTVAL (operands[3]) << 2;
6782 mask |= INTVAL (operands[4]) << 4;
6783 mask |= INTVAL (operands[5]) << 6;
6784 operands[2] = GEN_INT (mask);
6786 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6788 [(set_attr "type" "sselog1")
6789 (set_attr "prefix_data16" "1")
6790 (set_attr "prefix" "vex")
6791 (set_attr "mode" "TI")])
6793 (define_expand "sse2_pshuflw"
6794 [(match_operand:V8HI 0 "register_operand" "")
6795 (match_operand:V8HI 1 "nonimmediate_operand" "")
6796 (match_operand:SI 2 "const_int_operand" "")]
6799 int mask = INTVAL (operands[2]);
6800 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6801 GEN_INT ((mask >> 0) & 3),
6802 GEN_INT ((mask >> 2) & 3),
6803 GEN_INT ((mask >> 4) & 3),
6804 GEN_INT ((mask >> 6) & 3)));
6808 (define_insn "sse2_pshuflw_1"
6809 [(set (match_operand:V8HI 0 "register_operand" "=x")
6811 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6812 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6813 (match_operand 3 "const_0_to_3_operand" "")
6814 (match_operand 4 "const_0_to_3_operand" "")
6815 (match_operand 5 "const_0_to_3_operand" "")
6823 mask |= INTVAL (operands[2]) << 0;
6824 mask |= INTVAL (operands[3]) << 2;
6825 mask |= INTVAL (operands[4]) << 4;
6826 mask |= INTVAL (operands[5]) << 6;
6827 operands[2] = GEN_INT (mask);
6829 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6831 [(set_attr "type" "sselog")
6832 (set_attr "prefix_rep" "1")
6833 (set_attr "prefix" "maybe_vex")
6834 (set_attr "mode" "TI")])
6836 (define_expand "sse2_pshufhw"
6837 [(match_operand:V8HI 0 "register_operand" "")
6838 (match_operand:V8HI 1 "nonimmediate_operand" "")
6839 (match_operand:SI 2 "const_int_operand" "")]
6842 int mask = INTVAL (operands[2]);
6843 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6844 GEN_INT (((mask >> 0) & 3) + 4),
6845 GEN_INT (((mask >> 2) & 3) + 4),
6846 GEN_INT (((mask >> 4) & 3) + 4),
6847 GEN_INT (((mask >> 6) & 3) + 4)));
6851 (define_insn "sse2_pshufhw_1"
6852 [(set (match_operand:V8HI 0 "register_operand" "=x")
6854 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6855 (parallel [(const_int 0)
6859 (match_operand 2 "const_4_to_7_operand" "")
6860 (match_operand 3 "const_4_to_7_operand" "")
6861 (match_operand 4 "const_4_to_7_operand" "")
6862 (match_operand 5 "const_4_to_7_operand" "")])))]
6866 mask |= (INTVAL (operands[2]) - 4) << 0;
6867 mask |= (INTVAL (operands[3]) - 4) << 2;
6868 mask |= (INTVAL (operands[4]) - 4) << 4;
6869 mask |= (INTVAL (operands[5]) - 4) << 6;
6870 operands[2] = GEN_INT (mask);
6872 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6874 [(set_attr "type" "sselog")
6875 (set_attr "prefix_rep" "1")
6876 (set_attr "prefix" "maybe_vex")
6877 (set_attr "mode" "TI")])
6879 (define_expand "sse2_loadd"
6880 [(set (match_operand:V4SI 0 "register_operand" "")
6883 (match_operand:SI 1 "nonimmediate_operand" ""))
6887 "operands[2] = CONST0_RTX (V4SImode);")
6889 (define_insn "*avx_loadld"
6890 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6893 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6894 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6898 vmovd\t{%2, %0|%0, %2}
6899 vmovd\t{%2, %0|%0, %2}
6900 vmovss\t{%2, %1, %0|%0, %1, %2}"
6901 [(set_attr "type" "ssemov")
6902 (set_attr "prefix" "vex")
6903 (set_attr "mode" "TI,TI,V4SF")])
6905 (define_insn "sse2_loadld"
6906 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6909 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6910 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6914 movd\t{%2, %0|%0, %2}
6915 movd\t{%2, %0|%0, %2}
6916 movss\t{%2, %0|%0, %2}
6917 movss\t{%2, %0|%0, %2}"
6918 [(set_attr "type" "ssemov")
6919 (set_attr "mode" "TI,TI,V4SF,SF")])
6921 (define_insn_and_split "sse2_stored"
6922 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6924 (match_operand:V4SI 1 "register_operand" "x,Yi")
6925 (parallel [(const_int 0)])))]
6928 "&& reload_completed
6929 && (TARGET_INTER_UNIT_MOVES
6930 || MEM_P (operands [0])
6931 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6932 [(set (match_dup 0) (match_dup 1))]
6934 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
6937 (define_insn_and_split "*vec_ext_v4si_mem"
6938 [(set (match_operand:SI 0 "register_operand" "=r")
6940 (match_operand:V4SI 1 "memory_operand" "o")
6941 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6947 int i = INTVAL (operands[2]);
6949 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6953 (define_expand "sse_storeq"
6954 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6956 (match_operand:V2DI 1 "register_operand" "")
6957 (parallel [(const_int 0)])))]
6961 (define_insn "*sse2_storeq_rex64"
6962 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6964 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6965 (parallel [(const_int 0)])))]
6966 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6970 %vmov{q}\t{%1, %0|%0, %1}"
6971 [(set_attr "type" "*,*,imov")
6972 (set_attr "prefix" "*,*,maybe_vex")
6973 (set_attr "mode" "*,*,DI")])
6975 (define_insn "*sse2_storeq"
6976 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
6978 (match_operand:V2DI 1 "register_operand" "x")
6979 (parallel [(const_int 0)])))]
6984 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6986 (match_operand:V2DI 1 "register_operand" "")
6987 (parallel [(const_int 0)])))]
6990 && (TARGET_INTER_UNIT_MOVES
6991 || MEM_P (operands [0])
6992 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6993 [(set (match_dup 0) (match_dup 1))]
6995 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
6998 (define_insn "*vec_extractv2di_1_rex64_avx"
6999 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7001 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7002 (parallel [(const_int 1)])))]
7005 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7007 vmovhps\t{%1, %0|%0, %1}
7008 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7009 vmovq\t{%H1, %0|%0, %H1}
7010 vmov{q}\t{%H1, %0|%0, %H1}"
7011 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7012 (set_attr "memory" "*,none,*,*")
7013 (set_attr "prefix" "vex")
7014 (set_attr "mode" "V2SF,TI,TI,DI")])
7016 (define_insn "*vec_extractv2di_1_rex64"
7017 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7019 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7020 (parallel [(const_int 1)])))]
7021 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7023 movhps\t{%1, %0|%0, %1}
7024 psrldq\t{$8, %0|%0, 8}
7025 movq\t{%H1, %0|%0, %H1}
7026 mov{q}\t{%H1, %0|%0, %H1}"
7027 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7028 (set_attr "memory" "*,none,*,*")
7029 (set_attr "mode" "V2SF,TI,TI,DI")])
7031 (define_insn "*vec_extractv2di_1_avx"
7032 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7034 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7035 (parallel [(const_int 1)])))]
7038 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7040 vmovhps\t{%1, %0|%0, %1}
7041 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7042 vmovq\t{%H1, %0|%0, %H1}"
7043 [(set_attr "type" "ssemov,sseishft,ssemov")
7044 (set_attr "memory" "*,none,*")
7045 (set_attr "prefix" "vex")
7046 (set_attr "mode" "V2SF,TI,TI")])
7048 (define_insn "*vec_extractv2di_1_sse2"
7049 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7051 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7052 (parallel [(const_int 1)])))]
7054 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7056 movhps\t{%1, %0|%0, %1}
7057 psrldq\t{$8, %0|%0, 8}
7058 movq\t{%H1, %0|%0, %H1}"
7059 [(set_attr "type" "ssemov,sseishft,ssemov")
7060 (set_attr "memory" "*,none,*")
7061 (set_attr "mode" "V2SF,TI,TI")])
7063 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7064 (define_insn "*vec_extractv2di_1_sse"
7065 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7067 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7068 (parallel [(const_int 1)])))]
7069 "!TARGET_SSE2 && TARGET_SSE
7070 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7072 movhps\t{%1, %0|%0, %1}
7073 movhlps\t{%1, %0|%0, %1}
7074 movlps\t{%H1, %0|%0, %H1}"
7075 [(set_attr "type" "ssemov")
7076 (set_attr "mode" "V2SF,V4SF,V2SF")])
7078 (define_insn "*vec_dupv4si"
7079 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7081 (match_operand:SI 1 "register_operand" " Y2,0")))]
7084 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7085 shufps\t{$0, %0, %0|%0, %0, 0}"
7086 [(set_attr "type" "sselog1")
7087 (set_attr "prefix" "maybe_vex,orig")
7088 (set_attr "mode" "TI,V4SF")])
7090 (define_insn "*vec_dupv2di_avx"
7091 [(set (match_operand:V2DI 0 "register_operand" "=x")
7093 (match_operand:DI 1 "register_operand" "x")))]
7095 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7096 [(set_attr "type" "sselog1")
7097 (set_attr "prefix" "vex")
7098 (set_attr "mode" "TI")])
7100 (define_insn "*vec_dupv2di"
7101 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7103 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7108 [(set_attr "type" "sselog1,ssemov")
7109 (set_attr "mode" "TI,V4SF")])
7111 (define_insn "*vec_concatv2si_avx"
7112 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7114 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7115 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7118 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7119 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7120 vmovd\t{%1, %0|%0, %1}
7121 punpckldq\t{%2, %0|%0, %2}
7122 movd\t{%1, %0|%0, %1}"
7123 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7124 (set (attr "prefix")
7125 (if_then_else (eq_attr "alternative" "3,4")
7126 (const_string "orig")
7127 (const_string "vex")))
7128 (set_attr "mode" "TI,TI,TI,DI,DI")])
7130 (define_insn "*vec_concatv2si_sse4_1"
7131 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7133 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7134 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7137 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7138 punpckldq\t{%2, %0|%0, %2}
7139 movd\t{%1, %0|%0, %1}
7140 punpckldq\t{%2, %0|%0, %2}
7141 movd\t{%1, %0|%0, %1}"
7142 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7143 (set_attr "prefix_extra" "1,*,*,*,*")
7144 (set_attr "mode" "TI,TI,TI,DI,DI")])
7146 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7147 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7148 ;; alternatives pretty much forces the MMX alternative to be chosen.
7149 (define_insn "*vec_concatv2si_sse2"
7150 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7152 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7153 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7156 punpckldq\t{%2, %0|%0, %2}
7157 movd\t{%1, %0|%0, %1}
7158 punpckldq\t{%2, %0|%0, %2}
7159 movd\t{%1, %0|%0, %1}"
7160 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7161 (set_attr "mode" "TI,TI,DI,DI")])
7163 (define_insn "*vec_concatv2si_sse"
7164 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7166 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7167 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7170 unpcklps\t{%2, %0|%0, %2}
7171 movss\t{%1, %0|%0, %1}
7172 punpckldq\t{%2, %0|%0, %2}
7173 movd\t{%1, %0|%0, %1}"
7174 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7175 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7177 (define_insn "*vec_concatv4si_1_avx"
7178 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7180 (match_operand:V2SI 1 "register_operand" " x,x")
7181 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7184 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7185 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7186 [(set_attr "type" "sselog,ssemov")
7187 (set_attr "prefix" "vex")
7188 (set_attr "mode" "TI,V2SF")])
7190 (define_insn "*vec_concatv4si_1"
7191 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7193 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7194 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7197 punpcklqdq\t{%2, %0|%0, %2}
7198 movlhps\t{%2, %0|%0, %2}
7199 movhps\t{%2, %0|%0, %2}"
7200 [(set_attr "type" "sselog,ssemov,ssemov")
7201 (set_attr "mode" "TI,V4SF,V2SF")])
7203 (define_insn "*vec_concatv2di_avx"
7204 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7206 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7207 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7208 "!TARGET_64BIT && TARGET_AVX"
7210 vmovq\t{%1, %0|%0, %1}
7211 movq2dq\t{%1, %0|%0, %1}
7212 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7213 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7214 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7215 (set (attr "prefix")
7216 (if_then_else (eq_attr "alternative" "1")
7217 (const_string "orig")
7218 (const_string "vex")))
7219 (set_attr "mode" "TI,TI,TI,V2SF")])
7221 (define_insn "vec_concatv2di"
7222 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7224 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7225 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7226 "!TARGET_64BIT && TARGET_SSE"
7228 movq\t{%1, %0|%0, %1}
7229 movq2dq\t{%1, %0|%0, %1}
7230 punpcklqdq\t{%2, %0|%0, %2}
7231 movlhps\t{%2, %0|%0, %2}
7232 movhps\t{%2, %0|%0, %2}"
7233 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7234 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7236 (define_insn "*vec_concatv2di_rex64_avx"
7237 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7239 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7240 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7241 "TARGET_64BIT && TARGET_AVX"
7243 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7244 vmovq\t{%1, %0|%0, %1}
7245 vmovq\t{%1, %0|%0, %1}
7246 movq2dq\t{%1, %0|%0, %1}
7247 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7248 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7249 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7250 (set (attr "prefix")
7251 (if_then_else (eq_attr "alternative" "3")
7252 (const_string "orig")
7253 (const_string "vex")))
7254 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7256 (define_insn "*vec_concatv2di_rex64_sse4_1"
7257 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7259 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7260 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7261 "TARGET_64BIT && TARGET_SSE4_1"
7263 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7264 movq\t{%1, %0|%0, %1}
7265 movq\t{%1, %0|%0, %1}
7266 movq2dq\t{%1, %0|%0, %1}
7267 punpcklqdq\t{%2, %0|%0, %2}
7268 movlhps\t{%2, %0|%0, %2}
7269 movhps\t{%2, %0|%0, %2}"
7270 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7271 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7272 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7274 (define_insn "*vec_concatv2di_rex64_sse"
7275 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7277 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7278 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7279 "TARGET_64BIT && TARGET_SSE"
7281 movq\t{%1, %0|%0, %1}
7282 movq\t{%1, %0|%0, %1}
7283 movq2dq\t{%1, %0|%0, %1}
7284 punpcklqdq\t{%2, %0|%0, %2}
7285 movlhps\t{%2, %0|%0, %2}
7286 movhps\t{%2, %0|%0, %2}"
7287 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7288 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7290 (define_expand "vec_unpacku_hi_v16qi"
7291 [(match_operand:V8HI 0 "register_operand" "")
7292 (match_operand:V16QI 1 "register_operand" "")]
7296 ix86_expand_sse4_unpack (operands, true, true);
7297 else if (TARGET_SSE5)
7298 ix86_expand_sse5_unpack (operands, true, true);
7300 ix86_expand_sse_unpack (operands, true, true);
7304 (define_expand "vec_unpacks_hi_v16qi"
7305 [(match_operand:V8HI 0 "register_operand" "")
7306 (match_operand:V16QI 1 "register_operand" "")]
7310 ix86_expand_sse4_unpack (operands, false, true);
7311 else if (TARGET_SSE5)
7312 ix86_expand_sse5_unpack (operands, false, true);
7314 ix86_expand_sse_unpack (operands, false, true);
7318 (define_expand "vec_unpacku_lo_v16qi"
7319 [(match_operand:V8HI 0 "register_operand" "")
7320 (match_operand:V16QI 1 "register_operand" "")]
7324 ix86_expand_sse4_unpack (operands, true, false);
7325 else if (TARGET_SSE5)
7326 ix86_expand_sse5_unpack (operands, true, false);
7328 ix86_expand_sse_unpack (operands, true, false);
7332 (define_expand "vec_unpacks_lo_v16qi"
7333 [(match_operand:V8HI 0 "register_operand" "")
7334 (match_operand:V16QI 1 "register_operand" "")]
7338 ix86_expand_sse4_unpack (operands, false, false);
7339 else if (TARGET_SSE5)
7340 ix86_expand_sse5_unpack (operands, false, false);
7342 ix86_expand_sse_unpack (operands, false, false);
7346 (define_expand "vec_unpacku_hi_v8hi"
7347 [(match_operand:V4SI 0 "register_operand" "")
7348 (match_operand:V8HI 1 "register_operand" "")]
7352 ix86_expand_sse4_unpack (operands, true, true);
7353 else if (TARGET_SSE5)
7354 ix86_expand_sse5_unpack (operands, true, true);
7356 ix86_expand_sse_unpack (operands, true, true);
7360 (define_expand "vec_unpacks_hi_v8hi"
7361 [(match_operand:V4SI 0 "register_operand" "")
7362 (match_operand:V8HI 1 "register_operand" "")]
7366 ix86_expand_sse4_unpack (operands, false, true);
7367 else if (TARGET_SSE5)
7368 ix86_expand_sse5_unpack (operands, false, true);
7370 ix86_expand_sse_unpack (operands, false, true);
7374 (define_expand "vec_unpacku_lo_v8hi"
7375 [(match_operand:V4SI 0 "register_operand" "")
7376 (match_operand:V8HI 1 "register_operand" "")]
7380 ix86_expand_sse4_unpack (operands, true, false);
7381 else if (TARGET_SSE5)
7382 ix86_expand_sse5_unpack (operands, true, false);
7384 ix86_expand_sse_unpack (operands, true, false);
7388 (define_expand "vec_unpacks_lo_v8hi"
7389 [(match_operand:V4SI 0 "register_operand" "")
7390 (match_operand:V8HI 1 "register_operand" "")]
7394 ix86_expand_sse4_unpack (operands, false, false);
7395 else if (TARGET_SSE5)
7396 ix86_expand_sse5_unpack (operands, false, false);
7398 ix86_expand_sse_unpack (operands, false, false);
7402 (define_expand "vec_unpacku_hi_v4si"
7403 [(match_operand:V2DI 0 "register_operand" "")
7404 (match_operand:V4SI 1 "register_operand" "")]
7408 ix86_expand_sse4_unpack (operands, true, true);
7409 else if (TARGET_SSE5)
7410 ix86_expand_sse5_unpack (operands, true, true);
7412 ix86_expand_sse_unpack (operands, true, true);
7416 (define_expand "vec_unpacks_hi_v4si"
7417 [(match_operand:V2DI 0 "register_operand" "")
7418 (match_operand:V4SI 1 "register_operand" "")]
7422 ix86_expand_sse4_unpack (operands, false, true);
7423 else if (TARGET_SSE5)
7424 ix86_expand_sse5_unpack (operands, false, true);
7426 ix86_expand_sse_unpack (operands, false, true);
7430 (define_expand "vec_unpacku_lo_v4si"
7431 [(match_operand:V2DI 0 "register_operand" "")
7432 (match_operand:V4SI 1 "register_operand" "")]
7436 ix86_expand_sse4_unpack (operands, true, false);
7437 else if (TARGET_SSE5)
7438 ix86_expand_sse5_unpack (operands, true, false);
7440 ix86_expand_sse_unpack (operands, true, false);
7444 (define_expand "vec_unpacks_lo_v4si"
7445 [(match_operand:V2DI 0 "register_operand" "")
7446 (match_operand:V4SI 1 "register_operand" "")]
7450 ix86_expand_sse4_unpack (operands, false, false);
7451 else if (TARGET_SSE5)
7452 ix86_expand_sse5_unpack (operands, false, false);
7454 ix86_expand_sse_unpack (operands, false, false);
7458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7464 (define_expand "sse2_uavgv16qi3"
7465 [(set (match_operand:V16QI 0 "register_operand" "")
7471 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7473 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7474 (const_vector:V16QI [(const_int 1) (const_int 1)
7475 (const_int 1) (const_int 1)
7476 (const_int 1) (const_int 1)
7477 (const_int 1) (const_int 1)
7478 (const_int 1) (const_int 1)
7479 (const_int 1) (const_int 1)
7480 (const_int 1) (const_int 1)
7481 (const_int 1) (const_int 1)]))
7484 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7486 (define_insn "*avx_uavgv16qi3"
7487 [(set (match_operand:V16QI 0 "register_operand" "=x")
7493 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7495 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7496 (const_vector:V16QI [(const_int 1) (const_int 1)
7497 (const_int 1) (const_int 1)
7498 (const_int 1) (const_int 1)
7499 (const_int 1) (const_int 1)
7500 (const_int 1) (const_int 1)
7501 (const_int 1) (const_int 1)
7502 (const_int 1) (const_int 1)
7503 (const_int 1) (const_int 1)]))
7505 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7506 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7507 [(set_attr "type" "sseiadd")
7508 (set_attr "prefix" "vex")
7509 (set_attr "mode" "TI")])
7511 (define_insn "*sse2_uavgv16qi3"
7512 [(set (match_operand:V16QI 0 "register_operand" "=x")
7518 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7520 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7521 (const_vector:V16QI [(const_int 1) (const_int 1)
7522 (const_int 1) (const_int 1)
7523 (const_int 1) (const_int 1)
7524 (const_int 1) (const_int 1)
7525 (const_int 1) (const_int 1)
7526 (const_int 1) (const_int 1)
7527 (const_int 1) (const_int 1)
7528 (const_int 1) (const_int 1)]))
7530 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7531 "pavgb\t{%2, %0|%0, %2}"
7532 [(set_attr "type" "sseiadd")
7533 (set_attr "prefix_data16" "1")
7534 (set_attr "mode" "TI")])
7536 (define_expand "sse2_uavgv8hi3"
7537 [(set (match_operand:V8HI 0 "register_operand" "")
7543 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7545 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7546 (const_vector:V8HI [(const_int 1) (const_int 1)
7547 (const_int 1) (const_int 1)
7548 (const_int 1) (const_int 1)
7549 (const_int 1) (const_int 1)]))
7552 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7554 (define_insn "*avx_uavgv8hi3"
7555 [(set (match_operand:V8HI 0 "register_operand" "=x")
7561 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7563 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7564 (const_vector:V8HI [(const_int 1) (const_int 1)
7565 (const_int 1) (const_int 1)
7566 (const_int 1) (const_int 1)
7567 (const_int 1) (const_int 1)]))
7569 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7570 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7571 [(set_attr "type" "sseiadd")
7572 (set_attr "prefix" "vex")
7573 (set_attr "mode" "TI")])
7575 (define_insn "*sse2_uavgv8hi3"
7576 [(set (match_operand:V8HI 0 "register_operand" "=x")
7582 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7584 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7585 (const_vector:V8HI [(const_int 1) (const_int 1)
7586 (const_int 1) (const_int 1)
7587 (const_int 1) (const_int 1)
7588 (const_int 1) (const_int 1)]))
7590 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7591 "pavgw\t{%2, %0|%0, %2}"
7592 [(set_attr "type" "sseiadd")
7593 (set_attr "prefix_data16" "1")
7594 (set_attr "mode" "TI")])
7596 ;; The correct representation for this is absolutely enormous, and
7597 ;; surely not generally useful.
7598 (define_insn "*avx_psadbw"
7599 [(set (match_operand:V2DI 0 "register_operand" "=x")
7600 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7601 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7604 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7605 [(set_attr "type" "sseiadd")
7606 (set_attr "prefix" "vex")
7607 (set_attr "mode" "TI")])
7609 (define_insn "sse2_psadbw"
7610 [(set (match_operand:V2DI 0 "register_operand" "=x")
7611 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7612 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7615 "psadbw\t{%2, %0|%0, %2}"
7616 [(set_attr "type" "sseiadd")
7617 (set_attr "prefix_data16" "1")
7618 (set_attr "mode" "TI")])
7620 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7621 [(set (match_operand:SI 0 "register_operand" "=r")
7623 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7625 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7626 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7627 [(set_attr "type" "ssecvt")
7628 (set_attr "prefix" "vex")
7629 (set_attr "mode" "<MODE>")])
7631 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7632 [(set (match_operand:SI 0 "register_operand" "=r")
7634 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7636 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7637 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7638 [(set_attr "type" "ssecvt")
7639 (set_attr "prefix" "maybe_vex")
7640 (set_attr "mode" "<MODE>")])
7642 (define_insn "sse2_pmovmskb"
7643 [(set (match_operand:SI 0 "register_operand" "=r")
7644 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7647 "%vpmovmskb\t{%1, %0|%0, %1}"
7648 [(set_attr "type" "ssecvt")
7649 (set_attr "prefix_data16" "1")
7650 (set_attr "prefix" "maybe_vex")
7651 (set_attr "mode" "SI")])
7653 (define_expand "sse2_maskmovdqu"
7654 [(set (match_operand:V16QI 0 "memory_operand" "")
7655 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7656 (match_operand:V16QI 2 "register_operand" "")
7662 (define_insn "*sse2_maskmovdqu"
7663 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7664 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7665 (match_operand:V16QI 2 "register_operand" "x")
7666 (mem:V16QI (match_dup 0))]
7668 "TARGET_SSE2 && !TARGET_64BIT"
7669 ;; @@@ check ordering of operands in intel/nonintel syntax
7670 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7671 [(set_attr "type" "ssecvt")
7672 (set_attr "prefix_data16" "1")
7673 (set_attr "prefix" "maybe_vex")
7674 (set_attr "mode" "TI")])
7676 (define_insn "*sse2_maskmovdqu_rex64"
7677 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7678 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7679 (match_operand:V16QI 2 "register_operand" "x")
7680 (mem:V16QI (match_dup 0))]
7682 "TARGET_SSE2 && TARGET_64BIT"
7683 ;; @@@ check ordering of operands in intel/nonintel syntax
7684 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7685 [(set_attr "type" "ssecvt")
7686 (set_attr "prefix_data16" "1")
7687 (set_attr "prefix" "maybe_vex")
7688 (set_attr "mode" "TI")])
7690 (define_insn "sse_ldmxcsr"
7691 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7695 [(set_attr "type" "sse")
7696 (set_attr "prefix" "maybe_vex")
7697 (set_attr "memory" "load")])
7699 (define_insn "sse_stmxcsr"
7700 [(set (match_operand:SI 0 "memory_operand" "=m")
7701 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7704 [(set_attr "type" "sse")
7705 (set_attr "prefix" "maybe_vex")
7706 (set_attr "memory" "store")])
7708 (define_expand "sse_sfence"
7710 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7711 "TARGET_SSE || TARGET_3DNOW_A"
7713 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7714 MEM_VOLATILE_P (operands[0]) = 1;
7717 (define_insn "*sse_sfence"
7718 [(set (match_operand:BLK 0 "" "")
7719 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7720 "TARGET_SSE || TARGET_3DNOW_A"
7722 [(set_attr "type" "sse")
7723 (set_attr "memory" "unknown")])
7725 (define_insn "sse2_clflush"
7726 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7730 [(set_attr "type" "sse")
7731 (set_attr "memory" "unknown")])
7733 (define_expand "sse2_mfence"
7735 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7738 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7739 MEM_VOLATILE_P (operands[0]) = 1;
7742 (define_insn "*sse2_mfence"
7743 [(set (match_operand:BLK 0 "" "")
7744 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7745 "TARGET_64BIT || TARGET_SSE2"
7747 [(set_attr "type" "sse")
7748 (set_attr "memory" "unknown")])
7750 (define_expand "sse2_lfence"
7752 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7755 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7756 MEM_VOLATILE_P (operands[0]) = 1;
7759 (define_insn "*sse2_lfence"
7760 [(set (match_operand:BLK 0 "" "")
7761 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7764 [(set_attr "type" "sse")
7765 (set_attr "memory" "unknown")])
7767 (define_insn "sse3_mwait"
7768 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7769 (match_operand:SI 1 "register_operand" "c")]
7772 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7773 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7774 ;; we only need to set up 32bit registers.
7776 [(set_attr "length" "3")])
7778 (define_insn "sse3_monitor"
7779 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7780 (match_operand:SI 1 "register_operand" "c")
7781 (match_operand:SI 2 "register_operand" "d")]
7783 "TARGET_SSE3 && !TARGET_64BIT"
7784 "monitor\t%0, %1, %2"
7785 [(set_attr "length" "3")])
7787 (define_insn "sse3_monitor64"
7788 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7789 (match_operand:SI 1 "register_operand" "c")
7790 (match_operand:SI 2 "register_operand" "d")]
7792 "TARGET_SSE3 && TARGET_64BIT"
7793 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7794 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7795 ;; zero extended to 64bit, we only need to set up 32bit registers.
7797 [(set_attr "length" "3")])
7799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7801 ;; SSSE3 instructions
7803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7805 (define_insn "*avx_phaddwv8hi3"
7806 [(set (match_operand:V8HI 0 "register_operand" "=x")
7812 (match_operand:V8HI 1 "register_operand" "x")
7813 (parallel [(const_int 0)]))
7814 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7816 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7817 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7820 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7821 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7823 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7824 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7830 (parallel [(const_int 0)]))
7831 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7833 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7834 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7837 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7838 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7840 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7841 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7843 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7844 [(set_attr "type" "sseiadd")
7845 (set_attr "prefix" "vex")
7846 (set_attr "mode" "TI")])
7848 (define_insn "ssse3_phaddwv8hi3"
7849 [(set (match_operand:V8HI 0 "register_operand" "=x")
7855 (match_operand:V8HI 1 "register_operand" "0")
7856 (parallel [(const_int 0)]))
7857 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7859 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7860 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7863 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7864 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7866 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7867 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7872 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7873 (parallel [(const_int 0)]))
7874 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7876 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7877 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7880 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7881 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7883 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7884 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7886 "phaddw\t{%2, %0|%0, %2}"
7887 [(set_attr "type" "sseiadd")
7888 (set_attr "prefix_data16" "1")
7889 (set_attr "prefix_extra" "1")
7890 (set_attr "mode" "TI")])
7892 (define_insn "ssse3_phaddwv4hi3"
7893 [(set (match_operand:V4HI 0 "register_operand" "=y")
7898 (match_operand:V4HI 1 "register_operand" "0")
7899 (parallel [(const_int 0)]))
7900 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7902 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7903 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7907 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7908 (parallel [(const_int 0)]))
7909 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7911 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7912 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7914 "phaddw\t{%2, %0|%0, %2}"
7915 [(set_attr "type" "sseiadd")
7916 (set_attr "prefix_extra" "1")
7917 (set_attr "mode" "DI")])
7919 (define_insn "*avx_phadddv4si3"
7920 [(set (match_operand:V4SI 0 "register_operand" "=x")
7925 (match_operand:V4SI 1 "register_operand" "x")
7926 (parallel [(const_int 0)]))
7927 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7929 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7930 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7934 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7935 (parallel [(const_int 0)]))
7936 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7938 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7939 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7941 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7942 [(set_attr "type" "sseiadd")
7943 (set_attr "prefix" "vex")
7944 (set_attr "mode" "TI")])
7946 (define_insn "ssse3_phadddv4si3"
7947 [(set (match_operand:V4SI 0 "register_operand" "=x")
7952 (match_operand:V4SI 1 "register_operand" "0")
7953 (parallel [(const_int 0)]))
7954 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7956 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7957 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7961 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7962 (parallel [(const_int 0)]))
7963 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7965 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7966 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7968 "phaddd\t{%2, %0|%0, %2}"
7969 [(set_attr "type" "sseiadd")
7970 (set_attr "prefix_data16" "1")
7971 (set_attr "prefix_extra" "1")
7972 (set_attr "mode" "TI")])
7974 (define_insn "ssse3_phadddv2si3"
7975 [(set (match_operand:V2SI 0 "register_operand" "=y")
7979 (match_operand:V2SI 1 "register_operand" "0")
7980 (parallel [(const_int 0)]))
7981 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7984 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7985 (parallel [(const_int 0)]))
7986 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7988 "phaddd\t{%2, %0|%0, %2}"
7989 [(set_attr "type" "sseiadd")
7990 (set_attr "prefix_extra" "1")
7991 (set_attr "mode" "DI")])
7993 (define_insn "*avx_phaddswv8hi3"
7994 [(set (match_operand:V8HI 0 "register_operand" "=x")
8000 (match_operand:V8HI 1 "register_operand" "x")
8001 (parallel [(const_int 0)]))
8002 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8004 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8005 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8008 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8009 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8011 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8012 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8017 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8018 (parallel [(const_int 0)]))
8019 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8021 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8022 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8025 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8026 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8028 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8029 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8031 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8032 [(set_attr "type" "sseiadd")
8033 (set_attr "prefix" "vex")
8034 (set_attr "mode" "TI")])
8036 (define_insn "ssse3_phaddswv8hi3"
8037 [(set (match_operand:V8HI 0 "register_operand" "=x")
8043 (match_operand:V8HI 1 "register_operand" "0")
8044 (parallel [(const_int 0)]))
8045 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8047 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8048 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8051 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8052 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8054 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8055 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8060 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8061 (parallel [(const_int 0)]))
8062 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8064 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8065 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8068 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8069 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8071 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8072 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8074 "phaddsw\t{%2, %0|%0, %2}"
8075 [(set_attr "type" "sseiadd")
8076 (set_attr "prefix_data16" "1")
8077 (set_attr "prefix_extra" "1")
8078 (set_attr "mode" "TI")])
8080 (define_insn "ssse3_phaddswv4hi3"
8081 [(set (match_operand:V4HI 0 "register_operand" "=y")
8086 (match_operand:V4HI 1 "register_operand" "0")
8087 (parallel [(const_int 0)]))
8088 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8090 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8091 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8095 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8096 (parallel [(const_int 0)]))
8097 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8099 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8100 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8102 "phaddsw\t{%2, %0|%0, %2}"
8103 [(set_attr "type" "sseiadd")
8104 (set_attr "prefix_extra" "1")
8105 (set_attr "mode" "DI")])
8107 (define_insn "*avx_phsubwv8hi3"
8108 [(set (match_operand:V8HI 0 "register_operand" "=x")
8114 (match_operand:V8HI 1 "register_operand" "x")
8115 (parallel [(const_int 0)]))
8116 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8118 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8119 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8122 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8123 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8125 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8126 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8131 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8132 (parallel [(const_int 0)]))
8133 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8135 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8136 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8139 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8140 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8142 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8143 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8145 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8146 [(set_attr "type" "sseiadd")
8147 (set_attr "prefix" "vex")
8148 (set_attr "mode" "TI")])
8150 (define_insn "ssse3_phsubwv8hi3"
8151 [(set (match_operand:V8HI 0 "register_operand" "=x")
8157 (match_operand:V8HI 1 "register_operand" "0")
8158 (parallel [(const_int 0)]))
8159 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8161 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8162 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8165 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8166 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8168 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8169 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8174 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8175 (parallel [(const_int 0)]))
8176 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8178 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8179 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8182 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8183 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8185 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8186 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8188 "phsubw\t{%2, %0|%0, %2}"
8189 [(set_attr "type" "sseiadd")
8190 (set_attr "prefix_data16" "1")
8191 (set_attr "prefix_extra" "1")
8192 (set_attr "mode" "TI")])
8194 (define_insn "ssse3_phsubwv4hi3"
8195 [(set (match_operand:V4HI 0 "register_operand" "=y")
8200 (match_operand:V4HI 1 "register_operand" "0")
8201 (parallel [(const_int 0)]))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8204 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8209 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8210 (parallel [(const_int 0)]))
8211 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8216 "phsubw\t{%2, %0|%0, %2}"
8217 [(set_attr "type" "sseiadd")
8218 (set_attr "prefix_extra" "1")
8219 (set_attr "mode" "DI")])
8221 (define_insn "*avx_phsubdv4si3"
8222 [(set (match_operand:V4SI 0 "register_operand" "=x")
8227 (match_operand:V4SI 1 "register_operand" "x")
8228 (parallel [(const_int 0)]))
8229 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8231 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8232 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8236 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8237 (parallel [(const_int 0)]))
8238 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8240 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8241 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8243 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8244 [(set_attr "type" "sseiadd")
8245 (set_attr "prefix" "vex")
8246 (set_attr "mode" "TI")])
8248 (define_insn "ssse3_phsubdv4si3"
8249 [(set (match_operand:V4SI 0 "register_operand" "=x")
8254 (match_operand:V4SI 1 "register_operand" "0")
8255 (parallel [(const_int 0)]))
8256 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8258 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8259 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8263 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8264 (parallel [(const_int 0)]))
8265 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8267 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8268 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8270 "phsubd\t{%2, %0|%0, %2}"
8271 [(set_attr "type" "sseiadd")
8272 (set_attr "prefix_data16" "1")
8273 (set_attr "prefix_extra" "1")
8274 (set_attr "mode" "TI")])
8276 (define_insn "ssse3_phsubdv2si3"
8277 [(set (match_operand:V2SI 0 "register_operand" "=y")
8281 (match_operand:V2SI 1 "register_operand" "0")
8282 (parallel [(const_int 0)]))
8283 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8286 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8287 (parallel [(const_int 0)]))
8288 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8290 "phsubd\t{%2, %0|%0, %2}"
8291 [(set_attr "type" "sseiadd")
8292 (set_attr "prefix_extra" "1")
8293 (set_attr "mode" "DI")])
8295 (define_insn "*avx_phsubswv8hi3"
8296 [(set (match_operand:V8HI 0 "register_operand" "=x")
8302 (match_operand:V8HI 1 "register_operand" "x")
8303 (parallel [(const_int 0)]))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8306 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8310 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8311 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8313 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8314 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8319 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8320 (parallel [(const_int 0)]))
8321 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8323 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8324 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8327 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8328 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8330 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8331 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8333 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8334 [(set_attr "type" "sseiadd")
8335 (set_attr "prefix" "vex")
8336 (set_attr "mode" "TI")])
8338 (define_insn "ssse3_phsubswv8hi3"
8339 [(set (match_operand:V8HI 0 "register_operand" "=x")
8345 (match_operand:V8HI 1 "register_operand" "0")
8346 (parallel [(const_int 0)]))
8347 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8349 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8350 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8353 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8362 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8363 (parallel [(const_int 0)]))
8364 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8366 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8367 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8370 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8371 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8374 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8376 "phsubsw\t{%2, %0|%0, %2}"
8377 [(set_attr "type" "sseiadd")
8378 (set_attr "prefix_data16" "1")
8379 (set_attr "prefix_extra" "1")
8380 (set_attr "mode" "TI")])
8382 (define_insn "ssse3_phsubswv4hi3"
8383 [(set (match_operand:V4HI 0 "register_operand" "=y")
8388 (match_operand:V4HI 1 "register_operand" "0")
8389 (parallel [(const_int 0)]))
8390 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8392 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8393 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8397 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8398 (parallel [(const_int 0)]))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8404 "phsubsw\t{%2, %0|%0, %2}"
8405 [(set_attr "type" "sseiadd")
8406 (set_attr "prefix_extra" "1")
8407 (set_attr "mode" "DI")])
8409 (define_insn "*avx_pmaddubsw128"
8410 [(set (match_operand:V8HI 0 "register_operand" "=x")
8415 (match_operand:V16QI 1 "register_operand" "x")
8416 (parallel [(const_int 0)
8426 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8427 (parallel [(const_int 0)
8437 (vec_select:V16QI (match_dup 1)
8438 (parallel [(const_int 1)
8447 (vec_select:V16QI (match_dup 2)
8448 (parallel [(const_int 1)
8455 (const_int 15)]))))))]
8457 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8458 [(set_attr "type" "sseiadd")
8459 (set_attr "prefix" "vex")
8460 (set_attr "mode" "TI")])
8462 (define_insn "ssse3_pmaddubsw128"
8463 [(set (match_operand:V8HI 0 "register_operand" "=x")
8468 (match_operand:V16QI 1 "register_operand" "0")
8469 (parallel [(const_int 0)
8479 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8480 (parallel [(const_int 0)
8490 (vec_select:V16QI (match_dup 1)
8491 (parallel [(const_int 1)
8500 (vec_select:V16QI (match_dup 2)
8501 (parallel [(const_int 1)
8508 (const_int 15)]))))))]
8510 "pmaddubsw\t{%2, %0|%0, %2}"
8511 [(set_attr "type" "sseiadd")
8512 (set_attr "prefix_data16" "1")
8513 (set_attr "prefix_extra" "1")
8514 (set_attr "mode" "TI")])
8516 (define_insn "ssse3_pmaddubsw"
8517 [(set (match_operand:V4HI 0 "register_operand" "=y")
8522 (match_operand:V8QI 1 "register_operand" "0")
8523 (parallel [(const_int 0)
8529 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8530 (parallel [(const_int 0)
8536 (vec_select:V8QI (match_dup 1)
8537 (parallel [(const_int 1)
8542 (vec_select:V8QI (match_dup 2)
8543 (parallel [(const_int 1)
8546 (const_int 7)]))))))]
8548 "pmaddubsw\t{%2, %0|%0, %2}"
8549 [(set_attr "type" "sseiadd")
8550 (set_attr "prefix_extra" "1")
8551 (set_attr "mode" "DI")])
8553 (define_expand "ssse3_pmulhrswv8hi3"
8554 [(set (match_operand:V8HI 0 "register_operand" "")
8561 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8563 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8565 (const_vector:V8HI [(const_int 1) (const_int 1)
8566 (const_int 1) (const_int 1)
8567 (const_int 1) (const_int 1)
8568 (const_int 1) (const_int 1)]))
8571 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8573 (define_insn "*avx_pmulhrswv8hi3"
8574 [(set (match_operand:V8HI 0 "register_operand" "=x")
8581 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8583 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8585 (const_vector:V8HI [(const_int 1) (const_int 1)
8586 (const_int 1) (const_int 1)
8587 (const_int 1) (const_int 1)
8588 (const_int 1) (const_int 1)]))
8590 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8591 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8592 [(set_attr "type" "sseimul")
8593 (set_attr "prefix" "vex")
8594 (set_attr "mode" "TI")])
8596 (define_insn "*ssse3_pmulhrswv8hi3"
8597 [(set (match_operand:V8HI 0 "register_operand" "=x")
8604 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8606 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8608 (const_vector:V8HI [(const_int 1) (const_int 1)
8609 (const_int 1) (const_int 1)
8610 (const_int 1) (const_int 1)
8611 (const_int 1) (const_int 1)]))
8613 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8614 "pmulhrsw\t{%2, %0|%0, %2}"
8615 [(set_attr "type" "sseimul")
8616 (set_attr "prefix_data16" "1")
8617 (set_attr "prefix_extra" "1")
8618 (set_attr "mode" "TI")])
8620 (define_expand "ssse3_pmulhrswv4hi3"
8621 [(set (match_operand:V4HI 0 "register_operand" "")
8628 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8630 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8632 (const_vector:V4HI [(const_int 1) (const_int 1)
8633 (const_int 1) (const_int 1)]))
8636 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8638 (define_insn "*ssse3_pmulhrswv4hi3"
8639 [(set (match_operand:V4HI 0 "register_operand" "=y")
8646 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8648 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8650 (const_vector:V4HI [(const_int 1) (const_int 1)
8651 (const_int 1) (const_int 1)]))
8653 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8654 "pmulhrsw\t{%2, %0|%0, %2}"
8655 [(set_attr "type" "sseimul")
8656 (set_attr "prefix_extra" "1")
8657 (set_attr "mode" "DI")])
8659 (define_insn "*avx_pshufbv16qi3"
8660 [(set (match_operand:V16QI 0 "register_operand" "=x")
8661 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8662 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8665 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8666 [(set_attr "type" "sselog1")
8667 (set_attr "prefix" "vex")
8668 (set_attr "mode" "TI")])
8670 (define_insn "ssse3_pshufbv16qi3"
8671 [(set (match_operand:V16QI 0 "register_operand" "=x")
8672 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8673 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8676 "pshufb\t{%2, %0|%0, %2}";
8677 [(set_attr "type" "sselog1")
8678 (set_attr "prefix_data16" "1")
8679 (set_attr "prefix_extra" "1")
8680 (set_attr "mode" "TI")])
8682 (define_insn "ssse3_pshufbv8qi3"
8683 [(set (match_operand:V8QI 0 "register_operand" "=y")
8684 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8685 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8688 "pshufb\t{%2, %0|%0, %2}";
8689 [(set_attr "type" "sselog1")
8690 (set_attr "prefix_extra" "1")
8691 (set_attr "mode" "DI")])
8693 (define_insn "*avx_psign<mode>3"
8694 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8696 [(match_operand:SSEMODE124 1 "register_operand" "x")
8697 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8700 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8701 [(set_attr "type" "sselog1")
8702 (set_attr "prefix" "vex")
8703 (set_attr "mode" "TI")])
8705 (define_insn "ssse3_psign<mode>3"
8706 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8708 [(match_operand:SSEMODE124 1 "register_operand" "0")
8709 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8712 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8713 [(set_attr "type" "sselog1")
8714 (set_attr "prefix_data16" "1")
8715 (set_attr "prefix_extra" "1")
8716 (set_attr "mode" "TI")])
8718 (define_insn "ssse3_psign<mode>3"
8719 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8721 [(match_operand:MMXMODEI 1 "register_operand" "0")
8722 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8725 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8726 [(set_attr "type" "sselog1")
8727 (set_attr "prefix_extra" "1")
8728 (set_attr "mode" "DI")])
8730 (define_insn "*avx_palignrti"
8731 [(set (match_operand:TI 0 "register_operand" "=x")
8732 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8733 (match_operand:TI 2 "nonimmediate_operand" "xm")
8734 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8738 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8739 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8741 [(set_attr "type" "sseishft")
8742 (set_attr "prefix" "vex")
8743 (set_attr "mode" "TI")])
8745 (define_insn "ssse3_palignrti"
8746 [(set (match_operand:TI 0 "register_operand" "=x")
8747 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8748 (match_operand:TI 2 "nonimmediate_operand" "xm")
8749 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8753 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8754 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8756 [(set_attr "type" "sseishft")
8757 (set_attr "prefix_data16" "1")
8758 (set_attr "prefix_extra" "1")
8759 (set_attr "mode" "TI")])
8761 (define_insn "ssse3_palignrdi"
8762 [(set (match_operand:DI 0 "register_operand" "=y")
8763 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8764 (match_operand:DI 2 "nonimmediate_operand" "ym")
8765 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8769 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8770 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8772 [(set_attr "type" "sseishft")
8773 (set_attr "prefix_extra" "1")
8774 (set_attr "mode" "DI")])
8776 (define_insn "abs<mode>2"
8777 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8778 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8780 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8781 [(set_attr "type" "sselog1")
8782 (set_attr "prefix_data16" "1")
8783 (set_attr "prefix_extra" "1")
8784 (set_attr "prefix" "maybe_vex")
8785 (set_attr "mode" "TI")])
8787 (define_insn "abs<mode>2"
8788 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8789 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8791 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8792 [(set_attr "type" "sselog1")
8793 (set_attr "prefix_extra" "1")
8794 (set_attr "mode" "DI")])
8796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8798 ;; AMD SSE4A instructions
8800 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8802 (define_insn "sse4a_movnt<mode>"
8803 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8805 [(match_operand:MODEF 1 "register_operand" "x")]
8808 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8809 [(set_attr "type" "ssemov")
8810 (set_attr "mode" "<MODE>")])
8812 (define_insn "sse4a_vmmovnt<mode>"
8813 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8814 (unspec:<ssescalarmode>
8815 [(vec_select:<ssescalarmode>
8816 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8817 (parallel [(const_int 0)]))]
8820 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8821 [(set_attr "type" "ssemov")
8822 (set_attr "mode" "<ssescalarmode>")])
8824 (define_insn "sse4a_extrqi"
8825 [(set (match_operand:V2DI 0 "register_operand" "=x")
8826 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8827 (match_operand 2 "const_int_operand" "")
8828 (match_operand 3 "const_int_operand" "")]
8831 "extrq\t{%3, %2, %0|%0, %2, %3}"
8832 [(set_attr "type" "sse")
8833 (set_attr "prefix_data16" "1")
8834 (set_attr "mode" "TI")])
8836 (define_insn "sse4a_extrq"
8837 [(set (match_operand:V2DI 0 "register_operand" "=x")
8838 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8839 (match_operand:V16QI 2 "register_operand" "x")]
8842 "extrq\t{%2, %0|%0, %2}"
8843 [(set_attr "type" "sse")
8844 (set_attr "prefix_data16" "1")
8845 (set_attr "mode" "TI")])
8847 (define_insn "sse4a_insertqi"
8848 [(set (match_operand:V2DI 0 "register_operand" "=x")
8849 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8850 (match_operand:V2DI 2 "register_operand" "x")
8851 (match_operand 3 "const_int_operand" "")
8852 (match_operand 4 "const_int_operand" "")]
8855 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8856 [(set_attr "type" "sseins")
8857 (set_attr "prefix_rep" "1")
8858 (set_attr "mode" "TI")])
8860 (define_insn "sse4a_insertq"
8861 [(set (match_operand:V2DI 0 "register_operand" "=x")
8862 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8863 (match_operand:V2DI 2 "register_operand" "x")]
8866 "insertq\t{%2, %0|%0, %2}"
8867 [(set_attr "type" "sseins")
8868 (set_attr "prefix_rep" "1")
8869 (set_attr "mode" "TI")])
8871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8873 ;; Intel SSE4.1 instructions
8875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8877 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
8878 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8879 (vec_merge:AVXMODEF2P
8880 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8881 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8882 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8884 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8885 [(set_attr "type" "ssemov")
8886 (set_attr "prefix" "vex")
8887 (set_attr "mode" "<avxvecmode>")])
8889 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8890 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8892 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8893 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8894 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8897 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8898 [(set_attr "type" "ssemov")
8899 (set_attr "prefix" "vex")
8900 (set_attr "mode" "<avxvecmode>")])
8902 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8903 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8904 (vec_merge:SSEMODEF2P
8905 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8906 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8907 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8909 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8910 [(set_attr "type" "ssemov")
8911 (set_attr "prefix_extra" "1")
8912 (set_attr "mode" "<MODE>")])
8914 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8915 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8917 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8918 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8919 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8922 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8923 [(set_attr "type" "ssemov")
8924 (set_attr "prefix_extra" "1")
8925 (set_attr "mode" "<MODE>")])
8927 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8928 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8930 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8931 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8932 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8935 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8936 [(set_attr "type" "ssemul")
8937 (set_attr "prefix" "vex")
8938 (set_attr "mode" "<avxvecmode>")])
8940 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8941 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8943 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8944 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8945 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8948 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8949 [(set_attr "type" "ssemul")
8950 (set_attr "prefix_extra" "1")
8951 (set_attr "mode" "<MODE>")])
8953 (define_insn "sse4_1_movntdqa"
8954 [(set (match_operand:V2DI 0 "register_operand" "=x")
8955 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8958 "%vmovntdqa\t{%1, %0|%0, %1}"
8959 [(set_attr "type" "ssecvt")
8960 (set_attr "prefix_extra" "1")
8961 (set_attr "prefix" "maybe_vex")
8962 (set_attr "mode" "TI")])
8964 (define_insn "*avx_mpsadbw"
8965 [(set (match_operand:V16QI 0 "register_operand" "=x")
8966 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8967 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8968 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8971 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8972 [(set_attr "type" "sselog1")
8973 (set_attr "prefix" "vex")
8974 (set_attr "mode" "TI")])
8976 (define_insn "sse4_1_mpsadbw"
8977 [(set (match_operand:V16QI 0 "register_operand" "=x")
8978 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8979 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8980 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8983 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
8984 [(set_attr "type" "sselog1")
8985 (set_attr "prefix_extra" "1")
8986 (set_attr "mode" "TI")])
8988 (define_insn "*avx_packusdw"
8989 [(set (match_operand:V8HI 0 "register_operand" "=x")
8992 (match_operand:V4SI 1 "register_operand" "x"))
8994 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
8996 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8997 [(set_attr "type" "sselog")
8998 (set_attr "prefix" "vex")
8999 (set_attr "mode" "TI")])
9001 (define_insn "sse4_1_packusdw"
9002 [(set (match_operand:V8HI 0 "register_operand" "=x")
9005 (match_operand:V4SI 1 "register_operand" "0"))
9007 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9009 "packusdw\t{%2, %0|%0, %2}"
9010 [(set_attr "type" "sselog")
9011 (set_attr "prefix_extra" "1")
9012 (set_attr "mode" "TI")])
9014 (define_insn "*avx_pblendvb"
9015 [(set (match_operand:V16QI 0 "register_operand" "=x")
9016 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9017 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9018 (match_operand:V16QI 3 "register_operand" "x")]
9021 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9022 [(set_attr "type" "ssemov")
9023 (set_attr "prefix" "vex")
9024 (set_attr "mode" "TI")])
9026 (define_insn "sse4_1_pblendvb"
9027 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9028 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9029 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9030 (match_operand:V16QI 3 "register_operand" "Yz")]
9033 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9034 [(set_attr "type" "ssemov")
9035 (set_attr "prefix_extra" "1")
9036 (set_attr "mode" "TI")])
9038 (define_insn "*avx_pblendw"
9039 [(set (match_operand:V8HI 0 "register_operand" "=x")
9041 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9042 (match_operand:V8HI 1 "register_operand" "x")
9043 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9045 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9046 [(set_attr "type" "ssemov")
9047 (set_attr "prefix" "vex")
9048 (set_attr "mode" "TI")])
9050 (define_insn "sse4_1_pblendw"
9051 [(set (match_operand:V8HI 0 "register_operand" "=x")
9053 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9054 (match_operand:V8HI 1 "register_operand" "0")
9055 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9057 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9058 [(set_attr "type" "ssemov")
9059 (set_attr "prefix_extra" "1")
9060 (set_attr "mode" "TI")])
9062 (define_insn "sse4_1_phminposuw"
9063 [(set (match_operand:V8HI 0 "register_operand" "=x")
9064 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9065 UNSPEC_PHMINPOSUW))]
9067 "%vphminposuw\t{%1, %0|%0, %1}"
9068 [(set_attr "type" "sselog1")
9069 (set_attr "prefix_extra" "1")
9070 (set_attr "prefix" "maybe_vex")
9071 (set_attr "mode" "TI")])
9073 (define_insn "sse4_1_extendv8qiv8hi2"
9074 [(set (match_operand:V8HI 0 "register_operand" "=x")
9077 (match_operand:V16QI 1 "register_operand" "x")
9078 (parallel [(const_int 0)
9087 "%vpmovsxbw\t{%1, %0|%0, %1}"
9088 [(set_attr "type" "ssemov")
9089 (set_attr "prefix_extra" "1")
9090 (set_attr "prefix" "maybe_vex")
9091 (set_attr "mode" "TI")])
9093 (define_insn "*sse4_1_extendv8qiv8hi2"
9094 [(set (match_operand:V8HI 0 "register_operand" "=x")
9097 (vec_duplicate:V16QI
9098 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9099 (parallel [(const_int 0)
9108 "%vpmovsxbw\t{%1, %0|%0, %1}"
9109 [(set_attr "type" "ssemov")
9110 (set_attr "prefix_extra" "1")
9111 (set_attr "prefix" "maybe_vex")
9112 (set_attr "mode" "TI")])
9114 (define_insn "sse4_1_extendv4qiv4si2"
9115 [(set (match_operand:V4SI 0 "register_operand" "=x")
9118 (match_operand:V16QI 1 "register_operand" "x")
9119 (parallel [(const_int 0)
9124 "%vpmovsxbd\t{%1, %0|%0, %1}"
9125 [(set_attr "type" "ssemov")
9126 (set_attr "prefix_extra" "1")
9127 (set_attr "prefix" "maybe_vex")
9128 (set_attr "mode" "TI")])
9130 (define_insn "*sse4_1_extendv4qiv4si2"
9131 [(set (match_operand:V4SI 0 "register_operand" "=x")
9134 (vec_duplicate:V16QI
9135 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9136 (parallel [(const_int 0)
9141 "%vpmovsxbd\t{%1, %0|%0, %1}"
9142 [(set_attr "type" "ssemov")
9143 (set_attr "prefix_extra" "1")
9144 (set_attr "prefix" "maybe_vex")
9145 (set_attr "mode" "TI")])
9147 (define_insn "sse4_1_extendv2qiv2di2"
9148 [(set (match_operand:V2DI 0 "register_operand" "=x")
9151 (match_operand:V16QI 1 "register_operand" "x")
9152 (parallel [(const_int 0)
9155 "%vpmovsxbq\t{%1, %0|%0, %1}"
9156 [(set_attr "type" "ssemov")
9157 (set_attr "prefix_extra" "1")
9158 (set_attr "prefix" "maybe_vex")
9159 (set_attr "mode" "TI")])
9161 (define_insn "*sse4_1_extendv2qiv2di2"
9162 [(set (match_operand:V2DI 0 "register_operand" "=x")
9165 (vec_duplicate:V16QI
9166 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9167 (parallel [(const_int 0)
9170 "%vpmovsxbq\t{%1, %0|%0, %1}"
9171 [(set_attr "type" "ssemov")
9172 (set_attr "prefix_extra" "1")
9173 (set_attr "prefix" "maybe_vex")
9174 (set_attr "mode" "TI")])
9176 (define_insn "sse4_1_extendv4hiv4si2"
9177 [(set (match_operand:V4SI 0 "register_operand" "=x")
9180 (match_operand:V8HI 1 "register_operand" "x")
9181 (parallel [(const_int 0)
9186 "%vpmovsxwd\t{%1, %0|%0, %1}"
9187 [(set_attr "type" "ssemov")
9188 (set_attr "prefix_extra" "1")
9189 (set_attr "prefix" "maybe_vex")
9190 (set_attr "mode" "TI")])
9192 (define_insn "*sse4_1_extendv4hiv4si2"
9193 [(set (match_operand:V4SI 0 "register_operand" "=x")
9197 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9198 (parallel [(const_int 0)
9203 "%vpmovsxwd\t{%1, %0|%0, %1}"
9204 [(set_attr "type" "ssemov")
9205 (set_attr "prefix_extra" "1")
9206 (set_attr "prefix" "maybe_vex")
9207 (set_attr "mode" "TI")])
9209 (define_insn "sse4_1_extendv2hiv2di2"
9210 [(set (match_operand:V2DI 0 "register_operand" "=x")
9213 (match_operand:V8HI 1 "register_operand" "x")
9214 (parallel [(const_int 0)
9217 "%vpmovsxwq\t{%1, %0|%0, %1}"
9218 [(set_attr "type" "ssemov")
9219 (set_attr "prefix_extra" "1")
9220 (set_attr "prefix" "maybe_vex")
9221 (set_attr "mode" "TI")])
9223 (define_insn "*sse4_1_extendv2hiv2di2"
9224 [(set (match_operand:V2DI 0 "register_operand" "=x")
9228 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9229 (parallel [(const_int 0)
9232 "%vpmovsxwq\t{%1, %0|%0, %1}"
9233 [(set_attr "type" "ssemov")
9234 (set_attr "prefix_extra" "1")
9235 (set_attr "prefix" "maybe_vex")
9236 (set_attr "mode" "TI")])
9238 (define_insn "sse4_1_extendv2siv2di2"
9239 [(set (match_operand:V2DI 0 "register_operand" "=x")
9242 (match_operand:V4SI 1 "register_operand" "x")
9243 (parallel [(const_int 0)
9246 "%vpmovsxdq\t{%1, %0|%0, %1}"
9247 [(set_attr "type" "ssemov")
9248 (set_attr "prefix_extra" "1")
9249 (set_attr "prefix" "maybe_vex")
9250 (set_attr "mode" "TI")])
9252 (define_insn "*sse4_1_extendv2siv2di2"
9253 [(set (match_operand:V2DI 0 "register_operand" "=x")
9257 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9258 (parallel [(const_int 0)
9261 "%vpmovsxdq\t{%1, %0|%0, %1}"
9262 [(set_attr "type" "ssemov")
9263 (set_attr "prefix_extra" "1")
9264 (set_attr "prefix" "maybe_vex")
9265 (set_attr "mode" "TI")])
9267 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9268 [(set (match_operand:V8HI 0 "register_operand" "=x")
9271 (match_operand:V16QI 1 "register_operand" "x")
9272 (parallel [(const_int 0)
9281 "%vpmovzxbw\t{%1, %0|%0, %1}"
9282 [(set_attr "type" "ssemov")
9283 (set_attr "prefix_extra" "1")
9284 (set_attr "prefix" "maybe_vex")
9285 (set_attr "mode" "TI")])
9287 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9288 [(set (match_operand:V8HI 0 "register_operand" "=x")
9291 (vec_duplicate:V16QI
9292 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9293 (parallel [(const_int 0)
9302 "%vpmovzxbw\t{%1, %0|%0, %1}"
9303 [(set_attr "type" "ssemov")
9304 (set_attr "prefix_extra" "1")
9305 (set_attr "prefix" "maybe_vex")
9306 (set_attr "mode" "TI")])
9308 (define_insn "sse4_1_zero_extendv4qiv4si2"
9309 [(set (match_operand:V4SI 0 "register_operand" "=x")
9312 (match_operand:V16QI 1 "register_operand" "x")
9313 (parallel [(const_int 0)
9318 "%vpmovzxbd\t{%1, %0|%0, %1}"
9319 [(set_attr "type" "ssemov")
9320 (set_attr "prefix_extra" "1")
9321 (set_attr "prefix" "maybe_vex")
9322 (set_attr "mode" "TI")])
9324 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9325 [(set (match_operand:V4SI 0 "register_operand" "=x")
9328 (vec_duplicate:V16QI
9329 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9330 (parallel [(const_int 0)
9335 "%vpmovzxbd\t{%1, %0|%0, %1}"
9336 [(set_attr "type" "ssemov")
9337 (set_attr "prefix_extra" "1")
9338 (set_attr "prefix" "maybe_vex")
9339 (set_attr "mode" "TI")])
9341 (define_insn "sse4_1_zero_extendv2qiv2di2"
9342 [(set (match_operand:V2DI 0 "register_operand" "=x")
9345 (match_operand:V16QI 1 "register_operand" "x")
9346 (parallel [(const_int 0)
9349 "%vpmovzxbq\t{%1, %0|%0, %1}"
9350 [(set_attr "type" "ssemov")
9351 (set_attr "prefix_extra" "1")
9352 (set_attr "prefix" "maybe_vex")
9353 (set_attr "mode" "TI")])
9355 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9356 [(set (match_operand:V2DI 0 "register_operand" "=x")
9359 (vec_duplicate:V16QI
9360 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9361 (parallel [(const_int 0)
9364 "%vpmovzxbq\t{%1, %0|%0, %1}"
9365 [(set_attr "type" "ssemov")
9366 (set_attr "prefix_extra" "1")
9367 (set_attr "prefix" "maybe_vex")
9368 (set_attr "mode" "TI")])
9370 (define_insn "sse4_1_zero_extendv4hiv4si2"
9371 [(set (match_operand:V4SI 0 "register_operand" "=x")
9374 (match_operand:V8HI 1 "register_operand" "x")
9375 (parallel [(const_int 0)
9380 "%vpmovzxwd\t{%1, %0|%0, %1}"
9381 [(set_attr "type" "ssemov")
9382 (set_attr "prefix_extra" "1")
9383 (set_attr "prefix" "maybe_vex")
9384 (set_attr "mode" "TI")])
9386 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9387 [(set (match_operand:V4SI 0 "register_operand" "=x")
9391 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9392 (parallel [(const_int 0)
9397 "%vpmovzxwd\t{%1, %0|%0, %1}"
9398 [(set_attr "type" "ssemov")
9399 (set_attr "prefix_extra" "1")
9400 (set_attr "prefix" "maybe_vex")
9401 (set_attr "mode" "TI")])
9403 (define_insn "sse4_1_zero_extendv2hiv2di2"
9404 [(set (match_operand:V2DI 0 "register_operand" "=x")
9407 (match_operand:V8HI 1 "register_operand" "x")
9408 (parallel [(const_int 0)
9411 "%vpmovzxwq\t{%1, %0|%0, %1}"
9412 [(set_attr "type" "ssemov")
9413 (set_attr "prefix_extra" "1")
9414 (set_attr "prefix" "maybe_vex")
9415 (set_attr "mode" "TI")])
9417 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9418 [(set (match_operand:V2DI 0 "register_operand" "=x")
9422 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9423 (parallel [(const_int 0)
9426 "%vpmovzxwq\t{%1, %0|%0, %1}"
9427 [(set_attr "type" "ssemov")
9428 (set_attr "prefix_extra" "1")
9429 (set_attr "prefix" "maybe_vex")
9430 (set_attr "mode" "TI")])
9432 (define_insn "sse4_1_zero_extendv2siv2di2"
9433 [(set (match_operand:V2DI 0 "register_operand" "=x")
9436 (match_operand:V4SI 1 "register_operand" "x")
9437 (parallel [(const_int 0)
9440 "%vpmovzxdq\t{%1, %0|%0, %1}"
9441 [(set_attr "type" "ssemov")
9442 (set_attr "prefix_extra" "1")
9443 (set_attr "prefix" "maybe_vex")
9444 (set_attr "mode" "TI")])
9446 (define_insn "*sse4_1_zero_extendv2siv2di2"
9447 [(set (match_operand:V2DI 0 "register_operand" "=x")
9451 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9452 (parallel [(const_int 0)
9455 "%vpmovzxdq\t{%1, %0|%0, %1}"
9456 [(set_attr "type" "ssemov")
9457 (set_attr "prefix_extra" "1")
9458 (set_attr "prefix" "maybe_vex")
9459 (set_attr "mode" "TI")])
9461 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9462 ;; setting FLAGS_REG. But it is not a really compare instruction.
9463 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9464 [(set (reg:CC FLAGS_REG)
9465 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9466 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9469 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9470 [(set_attr "type" "ssecomi")
9471 (set_attr "prefix" "vex")
9472 (set_attr "mode" "<MODE>")])
9474 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9475 ;; But it is not a really compare instruction.
9476 (define_insn "avx_ptest256"
9477 [(set (reg:CC FLAGS_REG)
9478 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9479 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9482 "vptest\t{%1, %0|%0, %1}"
9483 [(set_attr "type" "ssecomi")
9484 (set_attr "prefix" "vex")
9485 (set_attr "mode" "OI")])
9487 (define_insn "sse4_1_ptest"
9488 [(set (reg:CC FLAGS_REG)
9489 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9490 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9493 "%vptest\t{%1, %0|%0, %1}"
9494 [(set_attr "type" "ssecomi")
9495 (set_attr "prefix_extra" "1")
9496 (set_attr "prefix" "maybe_vex")
9497 (set_attr "mode" "TI")])
9499 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9500 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9501 (unspec:AVX256MODEF2P
9502 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9503 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9506 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9507 [(set_attr "type" "ssecvt")
9508 (set_attr "prefix" "vex")
9509 (set_attr "mode" "<MODE>")])
9511 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9512 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9514 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9515 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9518 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9519 [(set_attr "type" "ssecvt")
9520 (set_attr "prefix_extra" "1")
9521 (set_attr "prefix" "maybe_vex")
9522 (set_attr "mode" "<MODE>")])
9524 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9525 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9526 (vec_merge:SSEMODEF2P
9528 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9529 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9531 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9534 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9535 [(set_attr "type" "ssecvt")
9536 (set_attr "prefix" "vex")
9537 (set_attr "mode" "<MODE>")])
9539 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9540 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9541 (vec_merge:SSEMODEF2P
9543 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9544 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9546 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9549 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9550 [(set_attr "type" "ssecvt")
9551 (set_attr "prefix_extra" "1")
9552 (set_attr "mode" "<MODE>")])
9554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9556 ;; Intel SSE4.2 string/text processing instructions
9558 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9560 (define_insn_and_split "sse4_2_pcmpestr"
9561 [(set (match_operand:SI 0 "register_operand" "=c,c")
9563 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9564 (match_operand:SI 3 "register_operand" "a,a")
9565 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9566 (match_operand:SI 5 "register_operand" "d,d")
9567 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9569 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9577 (set (reg:CC FLAGS_REG)
9586 && !(reload_completed || reload_in_progress)"
9591 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9592 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9593 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9596 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9597 operands[3], operands[4],
9598 operands[5], operands[6]));
9600 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9601 operands[3], operands[4],
9602 operands[5], operands[6]));
9603 if (flags && !(ecx || xmm0))
9604 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9605 operands[2], operands[3],
9606 operands[4], operands[5],
9610 [(set_attr "type" "sselog")
9611 (set_attr "prefix_data16" "1")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "memory" "none,load")
9614 (set_attr "mode" "TI")])
9616 (define_insn "sse4_2_pcmpestri"
9617 [(set (match_operand:SI 0 "register_operand" "=c,c")
9619 [(match_operand:V16QI 1 "register_operand" "x,x")
9620 (match_operand:SI 2 "register_operand" "a,a")
9621 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9622 (match_operand:SI 4 "register_operand" "d,d")
9623 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9625 (set (reg:CC FLAGS_REG)
9634 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9635 [(set_attr "type" "sselog")
9636 (set_attr "prefix_data16" "1")
9637 (set_attr "prefix_extra" "1")
9638 (set_attr "prefix" "maybe_vex")
9639 (set_attr "memory" "none,load")
9640 (set_attr "mode" "TI")])
9642 (define_insn "sse4_2_pcmpestrm"
9643 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9645 [(match_operand:V16QI 1 "register_operand" "x,x")
9646 (match_operand:SI 2 "register_operand" "a,a")
9647 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9648 (match_operand:SI 4 "register_operand" "d,d")
9649 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9651 (set (reg:CC FLAGS_REG)
9660 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9661 [(set_attr "type" "sselog")
9662 (set_attr "prefix_data16" "1")
9663 (set_attr "prefix_extra" "1")
9664 (set_attr "prefix" "maybe_vex")
9665 (set_attr "memory" "none,load")
9666 (set_attr "mode" "TI")])
9668 (define_insn "sse4_2_pcmpestr_cconly"
9669 [(set (reg:CC FLAGS_REG)
9671 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9672 (match_operand:SI 3 "register_operand" "a,a,a,a")
9673 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9674 (match_operand:SI 5 "register_operand" "d,d,d,d")
9675 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9677 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9678 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9681 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9682 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9683 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9684 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9685 [(set_attr "type" "sselog")
9686 (set_attr "prefix_data16" "1")
9687 (set_attr "prefix_extra" "1")
9688 (set_attr "memory" "none,load,none,load")
9689 (set_attr "prefix" "maybe_vex")
9690 (set_attr "mode" "TI")])
9692 (define_insn_and_split "sse4_2_pcmpistr"
9693 [(set (match_operand:SI 0 "register_operand" "=c,c")
9695 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9696 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9697 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9699 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9705 (set (reg:CC FLAGS_REG)
9712 && !(reload_completed || reload_in_progress)"
9717 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9718 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9719 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9722 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9723 operands[3], operands[4]));
9725 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9726 operands[3], operands[4]));
9727 if (flags && !(ecx || xmm0))
9728 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9729 operands[2], operands[3],
9733 [(set_attr "type" "sselog")
9734 (set_attr "prefix_data16" "1")
9735 (set_attr "prefix_extra" "1")
9736 (set_attr "memory" "none,load")
9737 (set_attr "mode" "TI")])
9739 (define_insn "sse4_2_pcmpistri"
9740 [(set (match_operand:SI 0 "register_operand" "=c,c")
9742 [(match_operand:V16QI 1 "register_operand" "x,x")
9743 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9744 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9746 (set (reg:CC FLAGS_REG)
9753 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9754 [(set_attr "type" "sselog")
9755 (set_attr "prefix_data16" "1")
9756 (set_attr "prefix_extra" "1")
9757 (set_attr "prefix" "maybe_vex")
9758 (set_attr "memory" "none,load")
9759 (set_attr "mode" "TI")])
9761 (define_insn "sse4_2_pcmpistrm"
9762 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9764 [(match_operand:V16QI 1 "register_operand" "x,x")
9765 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9766 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9768 (set (reg:CC FLAGS_REG)
9775 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9776 [(set_attr "type" "sselog")
9777 (set_attr "prefix_data16" "1")
9778 (set_attr "prefix_extra" "1")
9779 (set_attr "prefix" "maybe_vex")
9780 (set_attr "memory" "none,load")
9781 (set_attr "mode" "TI")])
9783 (define_insn "sse4_2_pcmpistr_cconly"
9784 [(set (reg:CC FLAGS_REG)
9786 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9787 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9788 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9790 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9791 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9794 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9795 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9796 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9797 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9798 [(set_attr "type" "sselog")
9799 (set_attr "prefix_data16" "1")
9800 (set_attr "prefix_extra" "1")
9801 (set_attr "memory" "none,load,none,load")
9802 (set_attr "prefix" "maybe_vex")
9803 (set_attr "mode" "TI")])
9805 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9807 ;; SSE5 instructions
9809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9811 ;; SSE5 parallel integer multiply/add instructions.
9812 ;; Note the instruction does not allow the value being added to be a memory
9813 ;; operation. However by pretending via the nonimmediate_operand predicate
9814 ;; that it does and splitting it later allows the following to be recognized:
9815 ;; a[i] = b[i] * c[i] + d[i];
9816 (define_insn "sse5_pmacsww"
9817 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9820 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
9821 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
9822 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9823 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9825 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9826 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9827 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9828 [(set_attr "type" "ssemuladd")
9829 (set_attr "mode" "TI")])
9831 ;; Split pmacsww with two memory operands into a load and the pmacsww.
9833 [(set (match_operand:V8HI 0 "register_operand" "")
9835 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9836 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9837 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9839 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9840 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9841 && !reg_mentioned_p (operands[0], operands[1])
9842 && !reg_mentioned_p (operands[0], operands[2])
9843 && !reg_mentioned_p (operands[0], operands[3])"
9846 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9847 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9852 (define_insn "sse5_pmacssww"
9853 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9855 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9856 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
9857 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9858 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9860 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9861 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9862 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9863 [(set_attr "type" "ssemuladd")
9864 (set_attr "mode" "TI")])
9866 ;; Note the instruction does not allow the value being added to be a memory
9867 ;; operation. However by pretending via the nonimmediate_operand predicate
9868 ;; that it does and splitting it later allows the following to be recognized:
9869 ;; a[i] = b[i] * c[i] + d[i];
9870 (define_insn "sse5_pmacsdd"
9871 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9874 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9875 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9876 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9877 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9879 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9880 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9881 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9882 [(set_attr "type" "ssemuladd")
9883 (set_attr "mode" "TI")])
9885 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
9887 [(set (match_operand:V4SI 0 "register_operand" "")
9889 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9890 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9891 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9893 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9894 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9895 && !reg_mentioned_p (operands[0], operands[1])
9896 && !reg_mentioned_p (operands[0], operands[2])
9897 && !reg_mentioned_p (operands[0], operands[3])"
9900 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9901 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9906 (define_insn "sse5_pmacssdd"
9907 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9909 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9910 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9911 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9912 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9914 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9915 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9916 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9917 [(set_attr "type" "ssemuladd")
9918 (set_attr "mode" "TI")])
9920 (define_insn "sse5_pmacssdql"
9921 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9926 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9927 (parallel [(const_int 1)
9930 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9931 (parallel [(const_int 1)
9933 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9934 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9936 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9937 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9938 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9939 [(set_attr "type" "ssemuladd")
9940 (set_attr "mode" "TI")])
9942 (define_insn "sse5_pmacssdqh"
9943 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9948 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9949 (parallel [(const_int 0)
9953 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9954 (parallel [(const_int 0)
9956 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9957 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9959 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9960 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9961 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9962 [(set_attr "type" "ssemuladd")
9963 (set_attr "mode" "TI")])
9965 (define_insn "sse5_pmacsdql"
9966 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9971 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9972 (parallel [(const_int 1)
9976 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9977 (parallel [(const_int 1)
9979 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9980 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9982 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9983 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9984 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9985 [(set_attr "type" "ssemuladd")
9986 (set_attr "mode" "TI")])
9988 (define_insn_and_split "*sse5_pmacsdql_mem"
9989 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9994 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9995 (parallel [(const_int 1)
9999 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10000 (parallel [(const_int 1)
10002 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10003 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10005 "&& (reload_completed
10006 || (!reg_mentioned_p (operands[0], operands[1])
10007 && !reg_mentioned_p (operands[0], operands[2])))"
10008 [(set (match_dup 0)
10016 (parallel [(const_int 1)
10021 (parallel [(const_int 1)
10025 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10026 ;; fake it with a multiply/add. In general, we expect the define_split to
10027 ;; occur before register allocation, so we have to handle the corner case where
10028 ;; the target is the same as operands 1/2
10029 (define_insn_and_split "sse5_mulv2div2di3_low"
10030 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10034 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10035 (parallel [(const_int 1)
10039 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10040 (parallel [(const_int 1)
10041 (const_int 3)])))))]
10044 "&& (reload_completed
10045 || (!reg_mentioned_p (operands[0], operands[1])
10046 && !reg_mentioned_p (operands[0], operands[2])))"
10047 [(set (match_dup 0)
10055 (parallel [(const_int 1)
10060 (parallel [(const_int 1)
10064 operands[3] = CONST0_RTX (V2DImode);
10066 [(set_attr "type" "ssemuladd")
10067 (set_attr "mode" "TI")])
10069 (define_insn "sse5_pmacsdqh"
10070 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10075 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10076 (parallel [(const_int 0)
10080 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10081 (parallel [(const_int 0)
10083 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10084 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10086 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10087 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10088 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10089 [(set_attr "type" "ssemuladd")
10090 (set_attr "mode" "TI")])
10092 (define_insn_and_split "*sse5_pmacsdqh_mem"
10093 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10098 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10099 (parallel [(const_int 0)
10103 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10104 (parallel [(const_int 0)
10106 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10107 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10109 "&& (reload_completed
10110 || (!reg_mentioned_p (operands[0], operands[1])
10111 && !reg_mentioned_p (operands[0], operands[2])))"
10112 [(set (match_dup 0)
10120 (parallel [(const_int 0)
10125 (parallel [(const_int 0)
10129 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10130 ;; fake it with a multiply/add. In general, we expect the define_split to
10131 ;; occur before register allocation, so we have to handle the corner case where
10132 ;; the target is the same as either operands[1] or operands[2]
10133 (define_insn_and_split "sse5_mulv2div2di3_high"
10134 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10138 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10139 (parallel [(const_int 0)
10143 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10144 (parallel [(const_int 0)
10145 (const_int 2)])))))]
10148 "&& (reload_completed
10149 || (!reg_mentioned_p (operands[0], operands[1])
10150 && !reg_mentioned_p (operands[0], operands[2])))"
10151 [(set (match_dup 0)
10159 (parallel [(const_int 0)
10164 (parallel [(const_int 0)
10168 operands[3] = CONST0_RTX (V2DImode);
10170 [(set_attr "type" "ssemuladd")
10171 (set_attr "mode" "TI")])
10173 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10174 (define_insn "sse5_pmacsswd"
10175 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10180 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10181 (parallel [(const_int 1)
10187 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10188 (parallel [(const_int 1)
10192 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10193 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10195 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10196 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10197 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10198 [(set_attr "type" "ssemuladd")
10199 (set_attr "mode" "TI")])
10201 (define_insn "sse5_pmacswd"
10202 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10207 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10208 (parallel [(const_int 1)
10214 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10215 (parallel [(const_int 1)
10219 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10220 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10222 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10223 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10224 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10225 [(set_attr "type" "ssemuladd")
10226 (set_attr "mode" "TI")])
10228 (define_insn "sse5_pmadcsswd"
10229 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10235 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10236 (parallel [(const_int 0)
10242 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10243 (parallel [(const_int 0)
10251 (parallel [(const_int 1)
10258 (parallel [(const_int 1)
10261 (const_int 7)])))))
10262 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10263 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10265 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10266 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10267 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10268 [(set_attr "type" "ssemuladd")
10269 (set_attr "mode" "TI")])
10271 (define_insn "sse5_pmadcswd"
10272 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10278 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10279 (parallel [(const_int 0)
10285 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10286 (parallel [(const_int 0)
10294 (parallel [(const_int 1)
10301 (parallel [(const_int 1)
10304 (const_int 7)])))))
10305 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10306 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10308 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10309 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10310 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10311 [(set_attr "type" "ssemuladd")
10312 (set_attr "mode" "TI")])
10314 ;; SSE5 parallel XMM conditional moves
10315 (define_insn "sse5_pcmov_<mode>"
10316 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10317 (if_then_else:SSEMODE
10318 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10319 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10320 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10321 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10323 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10324 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10325 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10326 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10327 [(set_attr "type" "sse4arg")])
10329 ;; SSE5 horizontal add/subtract instructions
10330 (define_insn "sse5_phaddbw"
10331 [(set (match_operand:V8HI 0 "register_operand" "=x")
10335 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10336 (parallel [(const_int 0)
10347 (parallel [(const_int 1)
10354 (const_int 15)])))))]
10356 "phaddbw\t{%1, %0|%0, %1}"
10357 [(set_attr "type" "sseiadd1")])
10359 (define_insn "sse5_phaddbd"
10360 [(set (match_operand:V4SI 0 "register_operand" "=x")
10365 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10366 (parallel [(const_int 0)
10373 (parallel [(const_int 1)
10376 (const_int 13)]))))
10381 (parallel [(const_int 2)
10388 (parallel [(const_int 3)
10391 (const_int 15)]))))))]
10393 "phaddbd\t{%1, %0|%0, %1}"
10394 [(set_attr "type" "sseiadd1")])
10396 (define_insn "sse5_phaddbq"
10397 [(set (match_operand:V2DI 0 "register_operand" "=x")
10403 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10404 (parallel [(const_int 0)
10409 (parallel [(const_int 1)
10415 (parallel [(const_int 2)
10420 (parallel [(const_int 3)
10421 (const_int 7)])))))
10427 (parallel [(const_int 8)
10432 (parallel [(const_int 9)
10433 (const_int 13)]))))
10438 (parallel [(const_int 10)
10443 (parallel [(const_int 11)
10444 (const_int 15)])))))))]
10446 "phaddbq\t{%1, %0|%0, %1}"
10447 [(set_attr "type" "sseiadd1")])
10449 (define_insn "sse5_phaddwd"
10450 [(set (match_operand:V4SI 0 "register_operand" "=x")
10454 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10455 (parallel [(const_int 0)
10462 (parallel [(const_int 1)
10465 (const_int 7)])))))]
10467 "phaddwd\t{%1, %0|%0, %1}"
10468 [(set_attr "type" "sseiadd1")])
10470 (define_insn "sse5_phaddwq"
10471 [(set (match_operand:V2DI 0 "register_operand" "=x")
10476 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10477 (parallel [(const_int 0)
10482 (parallel [(const_int 1)
10488 (parallel [(const_int 2)
10493 (parallel [(const_int 3)
10494 (const_int 7)]))))))]
10496 "phaddwq\t{%1, %0|%0, %1}"
10497 [(set_attr "type" "sseiadd1")])
10499 (define_insn "sse5_phadddq"
10500 [(set (match_operand:V2DI 0 "register_operand" "=x")
10504 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10505 (parallel [(const_int 0)
10510 (parallel [(const_int 1)
10511 (const_int 3)])))))]
10513 "phadddq\t{%1, %0|%0, %1}"
10514 [(set_attr "type" "sseiadd1")])
10516 (define_insn "sse5_phaddubw"
10517 [(set (match_operand:V8HI 0 "register_operand" "=x")
10521 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10522 (parallel [(const_int 0)
10533 (parallel [(const_int 1)
10540 (const_int 15)])))))]
10542 "phaddubw\t{%1, %0|%0, %1}"
10543 [(set_attr "type" "sseiadd1")])
10545 (define_insn "sse5_phaddubd"
10546 [(set (match_operand:V4SI 0 "register_operand" "=x")
10551 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10552 (parallel [(const_int 0)
10559 (parallel [(const_int 1)
10562 (const_int 13)]))))
10567 (parallel [(const_int 2)
10574 (parallel [(const_int 3)
10577 (const_int 15)]))))))]
10579 "phaddubd\t{%1, %0|%0, %1}"
10580 [(set_attr "type" "sseiadd1")])
10582 (define_insn "sse5_phaddubq"
10583 [(set (match_operand:V2DI 0 "register_operand" "=x")
10589 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10590 (parallel [(const_int 0)
10595 (parallel [(const_int 1)
10601 (parallel [(const_int 2)
10606 (parallel [(const_int 3)
10607 (const_int 7)])))))
10613 (parallel [(const_int 8)
10618 (parallel [(const_int 9)
10619 (const_int 13)]))))
10624 (parallel [(const_int 10)
10629 (parallel [(const_int 11)
10630 (const_int 15)])))))))]
10632 "phaddubq\t{%1, %0|%0, %1}"
10633 [(set_attr "type" "sseiadd1")])
10635 (define_insn "sse5_phadduwd"
10636 [(set (match_operand:V4SI 0 "register_operand" "=x")
10640 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10641 (parallel [(const_int 0)
10648 (parallel [(const_int 1)
10651 (const_int 7)])))))]
10653 "phadduwd\t{%1, %0|%0, %1}"
10654 [(set_attr "type" "sseiadd1")])
10656 (define_insn "sse5_phadduwq"
10657 [(set (match_operand:V2DI 0 "register_operand" "=x")
10662 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10663 (parallel [(const_int 0)
10668 (parallel [(const_int 1)
10674 (parallel [(const_int 2)
10679 (parallel [(const_int 3)
10680 (const_int 7)]))))))]
10682 "phadduwq\t{%1, %0|%0, %1}"
10683 [(set_attr "type" "sseiadd1")])
10685 (define_insn "sse5_phaddudq"
10686 [(set (match_operand:V2DI 0 "register_operand" "=x")
10690 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10691 (parallel [(const_int 0)
10696 (parallel [(const_int 1)
10697 (const_int 3)])))))]
10699 "phaddudq\t{%1, %0|%0, %1}"
10700 [(set_attr "type" "sseiadd1")])
10702 (define_insn "sse5_phsubbw"
10703 [(set (match_operand:V8HI 0 "register_operand" "=x")
10707 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10708 (parallel [(const_int 0)
10719 (parallel [(const_int 1)
10726 (const_int 15)])))))]
10728 "phsubbw\t{%1, %0|%0, %1}"
10729 [(set_attr "type" "sseiadd1")])
10731 (define_insn "sse5_phsubwd"
10732 [(set (match_operand:V4SI 0 "register_operand" "=x")
10736 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10737 (parallel [(const_int 0)
10744 (parallel [(const_int 1)
10747 (const_int 7)])))))]
10749 "phsubwd\t{%1, %0|%0, %1}"
10750 [(set_attr "type" "sseiadd1")])
10752 (define_insn "sse5_phsubdq"
10753 [(set (match_operand:V2DI 0 "register_operand" "=x")
10757 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10758 (parallel [(const_int 0)
10763 (parallel [(const_int 1)
10764 (const_int 3)])))))]
10766 "phsubdq\t{%1, %0|%0, %1}"
10767 [(set_attr "type" "sseiadd1")])
10769 ;; SSE5 permute instructions
10770 (define_insn "sse5_pperm"
10771 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10773 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
10774 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
10775 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10776 UNSPEC_SSE5_PERMUTE))]
10777 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10778 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10779 [(set_attr "type" "sse4arg")
10780 (set_attr "mode" "TI")])
10782 ;; The following are for the various unpack insns which doesn't need the first
10783 ;; source operand, so we can just use the output operand for the first operand.
10784 ;; This allows either of the other two operands to be a memory operand. We
10785 ;; can't just use the first operand as an argument to the normal pperm because
10786 ;; then an output only argument, suddenly becomes an input operand.
10787 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10788 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10791 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10792 (match_operand 2 "" "")))) ;; parallel with const_int's
10793 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10795 && (register_operand (operands[1], V16QImode)
10796 || register_operand (operands[2], V16QImode))"
10797 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10798 [(set_attr "type" "sseadd")
10799 (set_attr "mode" "TI")])
10801 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10802 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10805 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10806 (match_operand 2 "" "")))) ;; parallel with const_int's
10807 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10809 && (register_operand (operands[1], V16QImode)
10810 || register_operand (operands[2], V16QImode))"
10811 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10812 [(set_attr "type" "sseadd")
10813 (set_attr "mode" "TI")])
10815 (define_insn "sse5_pperm_zero_v8hi_v4si"
10816 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10819 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10820 (match_operand 2 "" "")))) ;; parallel with const_int's
10821 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10823 && (register_operand (operands[1], V8HImode)
10824 || register_operand (operands[2], V16QImode))"
10825 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10826 [(set_attr "type" "sseadd")
10827 (set_attr "mode" "TI")])
10829 (define_insn "sse5_pperm_sign_v8hi_v4si"
10830 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10833 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10834 (match_operand 2 "" "")))) ;; parallel with const_int's
10835 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10837 && (register_operand (operands[1], V8HImode)
10838 || register_operand (operands[2], V16QImode))"
10839 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10840 [(set_attr "type" "sseadd")
10841 (set_attr "mode" "TI")])
10843 (define_insn "sse5_pperm_zero_v4si_v2di"
10844 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10847 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10848 (match_operand 2 "" "")))) ;; parallel with const_int's
10849 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10851 && (register_operand (operands[1], V4SImode)
10852 || register_operand (operands[2], V16QImode))"
10853 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10854 [(set_attr "type" "sseadd")
10855 (set_attr "mode" "TI")])
10857 (define_insn "sse5_pperm_sign_v4si_v2di"
10858 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10861 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10862 (match_operand 2 "" "")))) ;; parallel with const_int's
10863 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10865 && (register_operand (operands[1], V4SImode)
10866 || register_operand (operands[2], V16QImode))"
10867 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10868 [(set_attr "type" "sseadd")
10869 (set_attr "mode" "TI")])
10871 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10872 (define_insn "sse5_pperm_pack_v2di_v4si"
10873 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10876 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
10878 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10879 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10880 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10881 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10882 [(set_attr "type" "sse4arg")
10883 (set_attr "mode" "TI")])
10885 (define_insn "sse5_pperm_pack_v4si_v8hi"
10886 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
10889 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
10891 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10892 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10893 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10894 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10895 [(set_attr "type" "sse4arg")
10896 (set_attr "mode" "TI")])
10898 (define_insn "sse5_pperm_pack_v8hi_v16qi"
10899 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10902 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
10904 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10905 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10906 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10907 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10908 [(set_attr "type" "sse4arg")
10909 (set_attr "mode" "TI")])
10911 ;; Floating point permutation (permps, permpd)
10912 (define_insn "sse5_perm<mode>"
10913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10915 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10916 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10917 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10918 UNSPEC_SSE5_PERMUTE))]
10919 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10920 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10921 [(set_attr "type" "sse4arg")
10922 (set_attr "mode" "<MODE>")])
10924 ;; SSE5 packed rotate instructions
10925 (define_expand "rotl<mode>3"
10926 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10927 (rotate:SSEMODE1248
10928 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10929 (match_operand:SI 2 "general_operand")))]
10932 /* If we were given a scalar, convert it to parallel */
10933 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10935 rtvec vs = rtvec_alloc (<ssescalarnum>);
10936 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10937 rtx reg = gen_reg_rtx (<MODE>mode);
10938 rtx op2 = operands[2];
10941 if (GET_MODE (op2) != <ssescalarmode>mode)
10943 op2 = gen_reg_rtx (<ssescalarmode>mode);
10944 convert_move (op2, operands[2], false);
10947 for (i = 0; i < <ssescalarnum>; i++)
10948 RTVEC_ELT (vs, i) = op2;
10950 emit_insn (gen_vec_init<mode> (reg, par));
10951 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10956 (define_expand "rotr<mode>3"
10957 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10958 (rotatert:SSEMODE1248
10959 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10960 (match_operand:SI 2 "general_operand")))]
10963 /* If we were given a scalar, convert it to parallel */
10964 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10966 rtvec vs = rtvec_alloc (<ssescalarnum>);
10967 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10968 rtx neg = gen_reg_rtx (<MODE>mode);
10969 rtx reg = gen_reg_rtx (<MODE>mode);
10970 rtx op2 = operands[2];
10973 if (GET_MODE (op2) != <ssescalarmode>mode)
10975 op2 = gen_reg_rtx (<ssescalarmode>mode);
10976 convert_move (op2, operands[2], false);
10979 for (i = 0; i < <ssescalarnum>; i++)
10980 RTVEC_ELT (vs, i) = op2;
10982 emit_insn (gen_vec_init<mode> (reg, par));
10983 emit_insn (gen_neg<mode>2 (neg, reg));
10984 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
10989 (define_insn "sse5_rotl<mode>3"
10990 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10991 (rotate:SSEMODE1248
10992 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10993 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10995 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10996 [(set_attr "type" "sseishft")
10997 (set_attr "mode" "TI")])
10999 (define_insn "sse5_rotr<mode>3"
11000 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11001 (rotatert:SSEMODE1248
11002 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11003 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11006 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11007 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11009 [(set_attr "type" "sseishft")
11010 (set_attr "mode" "TI")])
11012 (define_expand "vrotr<mode>3"
11013 [(match_operand:SSEMODE1248 0 "register_operand" "")
11014 (match_operand:SSEMODE1248 1 "register_operand" "")
11015 (match_operand:SSEMODE1248 2 "register_operand" "")]
11018 rtx reg = gen_reg_rtx (<MODE>mode);
11019 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11020 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11024 (define_expand "vrotl<mode>3"
11025 [(match_operand:SSEMODE1248 0 "register_operand" "")
11026 (match_operand:SSEMODE1248 1 "register_operand" "")
11027 (match_operand:SSEMODE1248 2 "register_operand" "")]
11030 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11034 (define_insn "sse5_vrotl<mode>3"
11035 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11036 (if_then_else:SSEMODE1248
11038 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11040 (rotate:SSEMODE1248
11041 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11043 (rotatert:SSEMODE1248
11045 (neg:SSEMODE1248 (match_dup 2)))))]
11046 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11047 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11048 [(set_attr "type" "sseishft")
11049 (set_attr "mode" "TI")])
11051 ;; SSE5 packed shift instructions.
11052 ;; FIXME: add V2DI back in
11053 (define_expand "vlshr<mode>3"
11054 [(match_operand:SSEMODE124 0 "register_operand" "")
11055 (match_operand:SSEMODE124 1 "register_operand" "")
11056 (match_operand:SSEMODE124 2 "register_operand" "")]
11059 rtx neg = gen_reg_rtx (<MODE>mode);
11060 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11061 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11065 (define_expand "vashr<mode>3"
11066 [(match_operand:SSEMODE124 0 "register_operand" "")
11067 (match_operand:SSEMODE124 1 "register_operand" "")
11068 (match_operand:SSEMODE124 2 "register_operand" "")]
11071 rtx neg = gen_reg_rtx (<MODE>mode);
11072 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11073 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11077 (define_expand "vashl<mode>3"
11078 [(match_operand:SSEMODE124 0 "register_operand" "")
11079 (match_operand:SSEMODE124 1 "register_operand" "")
11080 (match_operand:SSEMODE124 2 "register_operand" "")]
11083 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11087 (define_insn "sse5_ashl<mode>3"
11088 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11089 (if_then_else:SSEMODE1248
11091 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11093 (ashift:SSEMODE1248
11094 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11096 (ashiftrt:SSEMODE1248
11098 (neg:SSEMODE1248 (match_dup 2)))))]
11099 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11100 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11101 [(set_attr "type" "sseishft")
11102 (set_attr "mode" "TI")])
11104 (define_insn "sse5_lshl<mode>3"
11105 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11106 (if_then_else:SSEMODE1248
11108 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11110 (ashift:SSEMODE1248
11111 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11113 (lshiftrt:SSEMODE1248
11115 (neg:SSEMODE1248 (match_dup 2)))))]
11116 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11117 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11118 [(set_attr "type" "sseishft")
11119 (set_attr "mode" "TI")])
11121 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11122 (define_expand "ashlv16qi3"
11123 [(match_operand:V16QI 0 "register_operand" "")
11124 (match_operand:V16QI 1 "register_operand" "")
11125 (match_operand:SI 2 "nonmemory_operand" "")]
11128 rtvec vs = rtvec_alloc (16);
11129 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11130 rtx reg = gen_reg_rtx (V16QImode);
11132 for (i = 0; i < 16; i++)
11133 RTVEC_ELT (vs, i) = operands[2];
11135 emit_insn (gen_vec_initv16qi (reg, par));
11136 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11140 (define_expand "lshlv16qi3"
11141 [(match_operand:V16QI 0 "register_operand" "")
11142 (match_operand:V16QI 1 "register_operand" "")
11143 (match_operand:SI 2 "nonmemory_operand" "")]
11146 rtvec vs = rtvec_alloc (16);
11147 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11148 rtx reg = gen_reg_rtx (V16QImode);
11150 for (i = 0; i < 16; i++)
11151 RTVEC_ELT (vs, i) = operands[2];
11153 emit_insn (gen_vec_initv16qi (reg, par));
11154 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11158 (define_expand "ashrv16qi3"
11159 [(match_operand:V16QI 0 "register_operand" "")
11160 (match_operand:V16QI 1 "register_operand" "")
11161 (match_operand:SI 2 "nonmemory_operand" "")]
11164 rtvec vs = rtvec_alloc (16);
11165 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11166 rtx reg = gen_reg_rtx (V16QImode);
11168 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
11169 ? GEN_INT (- INTVAL (operands[2]))
11172 for (i = 0; i < 16; i++)
11173 RTVEC_ELT (vs, i) = ele;
11175 emit_insn (gen_vec_initv16qi (reg, par));
11177 if (GET_CODE (operands[2]) != CONST_INT)
11179 rtx neg = gen_reg_rtx (V16QImode);
11180 emit_insn (gen_negv16qi2 (neg, reg));
11181 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11184 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11189 (define_expand "ashrv2di3"
11190 [(match_operand:V2DI 0 "register_operand" "")
11191 (match_operand:V2DI 1 "register_operand" "")
11192 (match_operand:DI 2 "nonmemory_operand" "")]
11195 rtvec vs = rtvec_alloc (2);
11196 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11197 rtx reg = gen_reg_rtx (V2DImode);
11200 if (GET_CODE (operands[2]) == CONST_INT)
11201 ele = GEN_INT (- INTVAL (operands[2]));
11202 else if (GET_MODE (operands[2]) != DImode)
11204 rtx move = gen_reg_rtx (DImode);
11205 ele = gen_reg_rtx (DImode);
11206 convert_move (move, operands[2], false);
11207 emit_insn (gen_negdi2 (ele, move));
11211 ele = gen_reg_rtx (DImode);
11212 emit_insn (gen_negdi2 (ele, operands[2]));
11215 RTVEC_ELT (vs, 0) = ele;
11216 RTVEC_ELT (vs, 1) = ele;
11217 emit_insn (gen_vec_initv2di (reg, par));
11218 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11222 ;; SSE5 FRCZ support
11224 (define_insn "sse5_frcz<mode>2"
11225 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11227 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11230 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11231 [(set_attr "type" "ssecvt1")
11232 (set_attr "prefix_extra" "1")
11233 (set_attr "mode" "<MODE>")])
11236 (define_insn "sse5_vmfrcz<mode>2"
11237 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11238 (vec_merge:SSEMODEF2P
11240 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11242 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11245 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11246 [(set_attr "type" "ssecvt1")
11247 (set_attr "prefix_extra" "1")
11248 (set_attr "mode" "<MODE>")])
11250 (define_insn "sse5_cvtph2ps"
11251 [(set (match_operand:V4SF 0 "register_operand" "=x")
11252 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11255 "cvtph2ps\t{%1, %0|%0, %1}"
11256 [(set_attr "type" "ssecvt")
11257 (set_attr "mode" "V4SF")])
11259 (define_insn "sse5_cvtps2ph"
11260 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11261 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11264 "cvtps2ph\t{%1, %0|%0, %1}"
11265 [(set_attr "type" "ssecvt")
11266 (set_attr "mode" "V4SF")])
11268 ;; Scalar versions of the com instructions that use vector types that are
11269 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11270 ;; com instructions fill in 0's in the upper bits instead of leaving them
11271 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11272 (define_expand "sse5_vmmaskcmp<mode>3"
11273 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11274 (vec_merge:SSEMODEF2P
11275 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11276 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11277 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11282 operands[4] = CONST0_RTX (<MODE>mode);
11285 (define_insn "*sse5_vmmaskcmp<mode>3"
11286 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11287 (vec_merge:SSEMODEF2P
11288 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11289 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11290 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11291 (match_operand:SSEMODEF2P 4 "")
11294 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11295 [(set_attr "type" "sse4arg")
11296 (set_attr "mode" "<ssescalarmode>")])
11298 ;; We don't have a comparison operator that always returns true/false, so
11299 ;; handle comfalse and comtrue specially.
11300 (define_insn "sse5_com_tf<mode>3"
11301 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11303 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11304 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11305 (match_operand:SI 3 "const_int_operand" "n")]
11306 UNSPEC_SSE5_TRUEFALSE))]
11309 const char *ret = NULL;
11311 switch (INTVAL (operands[3]))
11314 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11318 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11322 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11326 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11330 gcc_unreachable ();
11335 [(set_attr "type" "ssecmp")
11336 (set_attr "mode" "<MODE>")])
11338 (define_insn "sse5_maskcmp<mode>3"
11339 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11340 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11341 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11342 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11344 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11345 [(set_attr "type" "ssecmp")
11346 (set_attr "mode" "<MODE>")])
11348 (define_insn "sse5_maskcmp<mode>3"
11349 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11350 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11351 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11352 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11354 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11355 [(set_attr "type" "sse4arg")
11356 (set_attr "mode" "TI")])
11358 (define_insn "sse5_maskcmp_uns<mode>3"
11359 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11360 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11361 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11362 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11364 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11365 [(set_attr "type" "ssecmp")
11366 (set_attr "mode" "TI")])
11368 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11369 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11370 ;; the exact instruction generated for the intrinsic.
11371 (define_insn "sse5_maskcmp_uns2<mode>3"
11372 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11373 (unspec:SSEMODE1248
11374 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11375 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11376 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11377 UNSPEC_SSE5_UNSIGNED_CMP))]
11379 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11380 [(set_attr "type" "ssecmp")
11381 (set_attr "mode" "TI")])
11383 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11384 ;; being added here to be complete.
11385 (define_insn "sse5_pcom_tf<mode>3"
11386 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11387 (unspec:SSEMODE1248
11388 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11389 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11390 (match_operand:SI 3 "const_int_operand" "n")]
11391 UNSPEC_SSE5_TRUEFALSE))]
11394 return ((INTVAL (operands[3]) != 0)
11395 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11396 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11398 [(set_attr "type" "ssecmp")
11399 (set_attr "mode" "TI")])
11401 (define_insn "*avx_aesenc"
11402 [(set (match_operand:V2DI 0 "register_operand" "=x")
11403 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11404 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11406 "TARGET_AES && TARGET_AVX"
11407 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11408 [(set_attr "type" "sselog1")
11409 (set_attr "prefix" "vex")
11410 (set_attr "mode" "TI")])
11412 (define_insn "aesenc"
11413 [(set (match_operand:V2DI 0 "register_operand" "=x")
11414 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11415 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11418 "aesenc\t{%2, %0|%0, %2}"
11419 [(set_attr "type" "sselog1")
11420 (set_attr "prefix_extra" "1")
11421 (set_attr "mode" "TI")])
11423 (define_insn "*avx_aesenclast"
11424 [(set (match_operand:V2DI 0 "register_operand" "=x")
11425 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11426 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11427 UNSPEC_AESENCLAST))]
11428 "TARGET_AES && TARGET_AVX"
11429 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11430 [(set_attr "type" "sselog1")
11431 (set_attr "prefix" "vex")
11432 (set_attr "mode" "TI")])
11434 (define_insn "aesenclast"
11435 [(set (match_operand:V2DI 0 "register_operand" "=x")
11436 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11437 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11438 UNSPEC_AESENCLAST))]
11440 "aesenclast\t{%2, %0|%0, %2}"
11441 [(set_attr "type" "sselog1")
11442 (set_attr "prefix_extra" "1")
11443 (set_attr "mode" "TI")])
11445 (define_insn "*avx_aesdec"
11446 [(set (match_operand:V2DI 0 "register_operand" "=x")
11447 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11448 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11450 "TARGET_AES && TARGET_AVX"
11451 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11452 [(set_attr "type" "sselog1")
11453 (set_attr "prefix" "vex")
11454 (set_attr "mode" "TI")])
11456 (define_insn "aesdec"
11457 [(set (match_operand:V2DI 0 "register_operand" "=x")
11458 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11459 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11462 "aesdec\t{%2, %0|%0, %2}"
11463 [(set_attr "type" "sselog1")
11464 (set_attr "prefix_extra" "1")
11465 (set_attr "mode" "TI")])
11467 (define_insn "*avx_aesdeclast"
11468 [(set (match_operand:V2DI 0 "register_operand" "=x")
11469 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11470 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11471 UNSPEC_AESDECLAST))]
11472 "TARGET_AES && TARGET_AVX"
11473 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11474 [(set_attr "type" "sselog1")
11475 (set_attr "prefix" "vex")
11476 (set_attr "mode" "TI")])
11478 (define_insn "aesdeclast"
11479 [(set (match_operand:V2DI 0 "register_operand" "=x")
11480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11481 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11482 UNSPEC_AESDECLAST))]
11484 "aesdeclast\t{%2, %0|%0, %2}"
11485 [(set_attr "type" "sselog1")
11486 (set_attr "prefix_extra" "1")
11487 (set_attr "mode" "TI")])
11489 (define_insn "aesimc"
11490 [(set (match_operand:V2DI 0 "register_operand" "=x")
11491 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11494 "%vaesimc\t{%1, %0|%0, %1}"
11495 [(set_attr "type" "sselog1")
11496 (set_attr "prefix_extra" "1")
11497 (set_attr "prefix" "maybe_vex")
11498 (set_attr "mode" "TI")])
11500 (define_insn "aeskeygenassist"
11501 [(set (match_operand:V2DI 0 "register_operand" "=x")
11502 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11503 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11504 UNSPEC_AESKEYGENASSIST))]
11506 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11507 [(set_attr "type" "sselog1")
11508 (set_attr "prefix_extra" "1")
11509 (set_attr "prefix" "maybe_vex")
11510 (set_attr "mode" "TI")])
11512 (define_insn "*vpclmulqdq"
11513 [(set (match_operand:V2DI 0 "register_operand" "=x")
11514 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11515 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11516 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11518 "TARGET_PCLMUL && TARGET_AVX"
11519 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11520 [(set_attr "type" "sselog1")
11521 (set_attr "prefix" "vex")
11522 (set_attr "mode" "TI")])
11524 (define_insn "pclmulqdq"
11525 [(set (match_operand:V2DI 0 "register_operand" "=x")
11526 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11527 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11528 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11531 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11532 [(set_attr "type" "sselog1")
11533 (set_attr "prefix_extra" "1")
11534 (set_attr "mode" "TI")])
11536 (define_expand "avx_vzeroall"
11537 [(match_par_dup 0 [(const_int 0)])]
11540 int nregs = TARGET_64BIT ? 16 : 8;
11543 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11545 XVECEXP (operands[0], 0, 0)
11546 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11549 for (regno = 0; regno < nregs; regno++)
11550 XVECEXP (operands[0], 0, regno + 1)
11551 = gen_rtx_SET (VOIDmode,
11552 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11553 CONST0_RTX (V8SImode));
11556 (define_insn "*avx_vzeroall"
11557 [(match_parallel 0 "vzeroall_operation"
11558 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11559 (set (match_operand 1 "register_operand" "=x")
11560 (match_operand 2 "const0_operand" "X"))])]
11563 [(set_attr "type" "sse")
11564 (set_attr "memory" "none")
11565 (set_attr "prefix" "vex")
11566 (set_attr "mode" "OI")])
11568 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11569 (define_insn "avx_vzeroupper"
11570 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11571 (clobber (reg:V8SI XMM0_REG))
11572 (clobber (reg:V8SI XMM1_REG))
11573 (clobber (reg:V8SI XMM2_REG))
11574 (clobber (reg:V8SI XMM3_REG))
11575 (clobber (reg:V8SI XMM4_REG))
11576 (clobber (reg:V8SI XMM5_REG))
11577 (clobber (reg:V8SI XMM6_REG))
11578 (clobber (reg:V8SI XMM7_REG))]
11579 "TARGET_AVX && !TARGET_64BIT"
11581 [(set_attr "type" "sse")
11582 (set_attr "memory" "none")
11583 (set_attr "prefix" "vex")
11584 (set_attr "mode" "OI")])
11586 (define_insn "avx_vzeroupper_rex64"
11587 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11588 (clobber (reg:V8SI XMM0_REG))
11589 (clobber (reg:V8SI XMM1_REG))
11590 (clobber (reg:V8SI XMM2_REG))
11591 (clobber (reg:V8SI XMM3_REG))
11592 (clobber (reg:V8SI XMM4_REG))
11593 (clobber (reg:V8SI XMM5_REG))
11594 (clobber (reg:V8SI XMM6_REG))
11595 (clobber (reg:V8SI XMM7_REG))
11596 (clobber (reg:V8SI XMM8_REG))
11597 (clobber (reg:V8SI XMM9_REG))
11598 (clobber (reg:V8SI XMM10_REG))
11599 (clobber (reg:V8SI XMM11_REG))
11600 (clobber (reg:V8SI XMM12_REG))
11601 (clobber (reg:V8SI XMM13_REG))
11602 (clobber (reg:V8SI XMM14_REG))
11603 (clobber (reg:V8SI XMM15_REG))]
11604 "TARGET_AVX && TARGET_64BIT"
11606 [(set_attr "type" "sse")
11607 (set_attr "memory" "none")
11608 (set_attr "prefix" "vex")
11609 (set_attr "mode" "OI")])
11611 (define_insn "avx_vpermil<mode>"
11612 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11614 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11615 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11618 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11619 [(set_attr "type" "sselog")
11620 (set_attr "prefix" "vex")
11621 (set_attr "mode" "<MODE>")])
11623 (define_insn "avx_vpermilvar<mode>3"
11624 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11626 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11627 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11630 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11631 [(set_attr "type" "sselog")
11632 (set_attr "prefix" "vex")
11633 (set_attr "mode" "<MODE>")])
11635 (define_insn "avx_vperm2f128<mode>3"
11636 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11637 (unspec:AVX256MODE2P
11638 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11639 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11640 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11641 UNSPEC_VPERMIL2F128))]
11643 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11644 [(set_attr "type" "sselog")
11645 (set_attr "prefix" "vex")
11646 (set_attr "mode" "V8SF")])
11648 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11649 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11650 (vec_concat:AVXMODEF4P
11651 (vec_concat:<avxhalfvecmode>
11652 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11654 (vec_concat:<avxhalfvecmode>
11658 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11659 [(set_attr "type" "ssemov")
11660 (set_attr "prefix" "vex")
11661 (set_attr "mode" "<avxscalarmode>")])
11663 (define_insn "avx_vbroadcastss256"
11664 [(set (match_operand:V8SF 0 "register_operand" "=x")
11668 (match_operand:SF 1 "memory_operand" "m")
11681 "vbroadcastss\t{%1, %0|%0, %1}"
11682 [(set_attr "type" "ssemov")
11683 (set_attr "prefix" "vex")
11684 (set_attr "mode" "SF")])
11686 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11687 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11688 (vec_concat:AVX256MODEF2P
11689 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11692 "vbroadcastf128\t{%1, %0|%0, %1}"
11693 [(set_attr "type" "ssemov")
11694 (set_attr "prefix" "vex")
11695 (set_attr "mode" "V4SF")])
11697 (define_expand "avx_vinsertf128<mode>"
11698 [(match_operand:AVX256MODE 0 "register_operand" "")
11699 (match_operand:AVX256MODE 1 "register_operand" "")
11700 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11701 (match_operand:SI 3 "const_0_to_1_operand" "")]
11704 switch (INTVAL (operands[3]))
11707 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11711 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11715 gcc_unreachable ();
11720 (define_insn "vec_set_lo_<mode>"
11721 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11722 (vec_concat:AVX256MODE4P
11723 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11724 (vec_select:<avxhalfvecmode>
11725 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11726 (parallel [(const_int 2) (const_int 3)]))))]
11728 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11729 [(set_attr "type" "sselog")
11730 (set_attr "prefix" "vex")
11731 (set_attr "mode" "V8SF")])
11733 (define_insn "vec_set_hi_<mode>"
11734 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11735 (vec_concat:AVX256MODE4P
11736 (vec_select:<avxhalfvecmode>
11737 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11738 (parallel [(const_int 0) (const_int 1)]))
11739 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11741 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11742 [(set_attr "type" "sselog")
11743 (set_attr "prefix" "vex")
11744 (set_attr "mode" "V8SF")])
11746 (define_insn "vec_set_lo_<mode>"
11747 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11748 (vec_concat:AVX256MODE8P
11749 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11750 (vec_select:<avxhalfvecmode>
11751 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11752 (parallel [(const_int 4) (const_int 5)
11753 (const_int 6) (const_int 7)]))))]
11755 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11756 [(set_attr "type" "sselog")
11757 (set_attr "prefix" "vex")
11758 (set_attr "mode" "V8SF")])
11760 (define_insn "vec_set_hi_<mode>"
11761 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11762 (vec_concat:AVX256MODE8P
11763 (vec_select:<avxhalfvecmode>
11764 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11765 (parallel [(const_int 0) (const_int 1)
11766 (const_int 2) (const_int 3)]))
11767 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11769 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11770 [(set_attr "type" "sselog")
11771 (set_attr "prefix" "vex")
11772 (set_attr "mode" "V8SF")])
11774 (define_insn "vec_set_lo_v16hi"
11775 [(set (match_operand:V16HI 0 "register_operand" "=x")
11777 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11779 (match_operand:V16HI 1 "register_operand" "x")
11780 (parallel [(const_int 8) (const_int 9)
11781 (const_int 10) (const_int 11)
11782 (const_int 12) (const_int 13)
11783 (const_int 14) (const_int 15)]))))]
11785 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11786 [(set_attr "type" "sselog")
11787 (set_attr "prefix" "vex")
11788 (set_attr "mode" "V8SF")])
11790 (define_insn "vec_set_hi_v16hi"
11791 [(set (match_operand:V16HI 0 "register_operand" "=x")
11794 (match_operand:V16HI 1 "register_operand" "x")
11795 (parallel [(const_int 0) (const_int 1)
11796 (const_int 2) (const_int 3)
11797 (const_int 4) (const_int 5)
11798 (const_int 6) (const_int 7)]))
11799 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11801 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11802 [(set_attr "type" "sselog")
11803 (set_attr "prefix" "vex")
11804 (set_attr "mode" "V8SF")])
11806 (define_insn "vec_set_lo_v32qi"
11807 [(set (match_operand:V32QI 0 "register_operand" "=x")
11809 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11811 (match_operand:V32QI 1 "register_operand" "x")
11812 (parallel [(const_int 16) (const_int 17)
11813 (const_int 18) (const_int 19)
11814 (const_int 20) (const_int 21)
11815 (const_int 22) (const_int 23)
11816 (const_int 24) (const_int 25)
11817 (const_int 26) (const_int 27)
11818 (const_int 28) (const_int 29)
11819 (const_int 30) (const_int 31)]))))]
11821 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11822 [(set_attr "type" "sselog")
11823 (set_attr "prefix" "vex")
11824 (set_attr "mode" "V8SF")])
11826 (define_insn "vec_set_hi_v32qi"
11827 [(set (match_operand:V32QI 0 "register_operand" "=x")
11830 (match_operand:V32QI 1 "register_operand" "x")
11831 (parallel [(const_int 0) (const_int 1)
11832 (const_int 2) (const_int 3)
11833 (const_int 4) (const_int 5)
11834 (const_int 6) (const_int 7)
11835 (const_int 8) (const_int 9)
11836 (const_int 10) (const_int 11)
11837 (const_int 12) (const_int 13)
11838 (const_int 14) (const_int 15)]))
11839 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11841 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11842 [(set_attr "type" "sselog")
11843 (set_attr "prefix" "vex")
11844 (set_attr "mode" "V8SF")])
11846 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11847 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11849 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11850 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11854 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11855 [(set_attr "type" "sselog1")
11856 (set_attr "prefix" "vex")
11857 (set_attr "mode" "<MODE>")])
11859 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11860 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11862 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11863 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11865 UNSPEC_MASKSTORE))]
11867 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11868 [(set_attr "type" "sselog1")
11869 (set_attr "prefix" "vex")
11870 (set_attr "mode" "<MODE>")])
11872 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11873 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11874 (unspec:AVX256MODE2P
11875 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
11879 switch (which_alternative)
11884 switch (get_attr_mode (insn))
11887 return "vmovaps\t{%1, %x0|%x0, %1}";
11889 return "vmovapd\t{%1, %x0|%x0, %1}";
11891 return "vmovdqa\t{%1, %x0|%x0, %1}";
11898 gcc_unreachable ();
11900 [(set_attr "type" "ssemov")
11901 (set_attr "prefix" "vex")
11902 (set_attr "mode" "<avxvecmode>")
11903 (set (attr "length")
11904 (if_then_else (eq_attr "alternative" "0")
11906 (const_string "*")))])
11908 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
11909 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
11910 (unspec:<avxhalfvecmode>
11911 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
11915 switch (which_alternative)
11920 switch (get_attr_mode (insn))
11923 return "vmovaps\t{%x1, %0|%0, %x1}";
11925 return "vmovapd\t{%x1, %0|%0, %x1}";
11927 return "vmovdqa\t{%x1, %0|%0, %x1}";
11934 gcc_unreachable ();
11936 [(set_attr "type" "ssemov")
11937 (set_attr "prefix" "vex")
11938 (set_attr "mode" "<avxvecmode>")
11939 (set (attr "length")
11940 (if_then_else (eq_attr "alternative" "0")
11942 (const_string "*")))])
11944 (define_expand "vec_init<mode>"
11945 [(match_operand:AVX256MODE 0 "register_operand" "")
11946 (match_operand 1 "" "")]
11949 ix86_expand_vector_init (false, operands[0], operands[1]);
11953 (define_insn "*vec_concat<mode>_avx"
11954 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11955 (vec_concat:AVX256MODE
11956 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11957 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11960 switch (which_alternative)
11963 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11965 switch (get_attr_mode (insn))
11968 return "vmovaps\t{%1, %x0|%x0, %1}";
11970 return "vmovapd\t{%1, %x0|%x0, %1}";
11972 return "vmovdqa\t{%1, %x0|%x0, %1}";
11975 gcc_unreachable ();
11978 [(set_attr "type" "sselog,ssemov")
11979 (set_attr "prefix" "vex")
11980 (set_attr "mode" "<avxvecmode>")])