1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Modes handled by integer vcond pattern
68 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
69 (V2DI "TARGET_SSE4_2 || TARGET_SSE5")])
71 ;; Mapping from float mode to required SSE level
72 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
74 ;; Mapping from integer vector mode to mnemonic suffix
75 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
77 ;; Mapping of the sse5 suffix
78 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
79 (V4SF "ps") (V2DF "pd")])
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
81 (V4SF "ss") (V2DF "sd")])
82 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
84 ;; Mapping of the max integer size for sse5 rotate immediate constraint
85 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
87 ;; Mapping of vector modes back to the scalar modes
88 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
89 (V16QI "QI") (V8HI "HI")
90 (V4SI "SI") (V2DI "DI")])
92 ;; Mapping of vector modes to a vector mode of double size
93 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
94 (V4SF "V8SF") (V4SI "V8SI")])
96 ;; Number of scalar elements in each vector type
97 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
98 (V16QI "16") (V8HI "8")
99 (V4SI "4") (V2DI "2")])
102 (define_mode_attr avxvecmode
103 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
104 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
105 (V8SF "V8SF") (V4DF "V4DF")])
106 (define_mode_attr avxvecpsmode
107 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
108 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
109 (define_mode_attr avxhalfvecmode
110 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
111 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
112 (define_mode_attr avxscalarmode
113 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
114 (V8SF "SF") (V4DF "DF")])
115 (define_mode_attr avxcvtvecmode
116 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
117 (define_mode_attr avxpermvecmode
118 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
119 (define_mode_attr avxmodesuffixf2c
120 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
121 (define_mode_attr avxmodesuffixp
122 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
124 (define_mode_attr avxmodesuffixs
125 [(V16QI "b") (V8HI "w") (V4SI "d")])
126 (define_mode_attr avxmodesuffix
127 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
128 (V8SI "256") (V8SF "256") (V4DF "256")])
130 ;; Mapping of immediate bits for blend instructions
131 (define_mode_attr blendbits
132 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
134 ;; Mapping of immediate bits for vpermil instructions
135 (define_mode_attr vpermilbits
136 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
138 ;; Mapping of immediate bits for pinsr instructions
139 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
141 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
149 (define_expand "mov<mode>"
150 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
151 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
154 ix86_expand_vector_move (<MODE>mode, operands);
158 (define_insn "*avx_mov<mode>_internal"
159 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
160 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
162 && (register_operand (operands[0], <MODE>mode)
163 || register_operand (operands[1], <MODE>mode))"
165 switch (which_alternative)
168 return standard_sse_constant_opcode (insn, operands[1]);
171 switch (get_attr_mode (insn))
175 return "vmovaps\t{%1, %0|%0, %1}";
178 return "vmovapd\t{%1, %0|%0, %1}";
180 return "vmovdqa\t{%1, %0|%0, %1}";
186 [(set_attr "type" "sselog1,ssemov,ssemov")
187 (set_attr "prefix" "vex")
188 (set_attr "mode" "<avxvecmode>")])
190 ;; All of these patterns are enabled for SSE1 as well as SSE2.
191 ;; This is essential for maintaining stable calling conventions.
193 (define_expand "mov<mode>"
194 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
195 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
198 ix86_expand_vector_move (<MODE>mode, operands);
202 (define_insn "*mov<mode>_internal"
203 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
204 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
206 && (register_operand (operands[0], <MODE>mode)
207 || register_operand (operands[1], <MODE>mode))"
209 switch (which_alternative)
212 return standard_sse_constant_opcode (insn, operands[1]);
215 switch (get_attr_mode (insn))
218 return "movaps\t{%1, %0|%0, %1}";
220 return "movapd\t{%1, %0|%0, %1}";
222 return "movdqa\t{%1, %0|%0, %1}";
228 [(set_attr "type" "sselog1,ssemov,ssemov")
230 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
231 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
232 (and (eq_attr "alternative" "2")
233 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
235 (const_string "V4SF")
236 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
237 (const_string "V4SF")
238 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
239 (const_string "V2DF")
241 (const_string "TI")))])
243 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
244 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
245 ;; from memory, we'd prefer to load the memory directly into the %xmm
246 ;; register. To facilitate this happy circumstance, this pattern won't
247 ;; split until after register allocation. If the 64-bit value didn't
248 ;; come from memory, this is the best we can do. This is much better
249 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
252 (define_insn_and_split "movdi_to_sse"
254 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
255 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
256 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
257 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
259 "&& reload_completed"
262 if (register_operand (operands[1], DImode))
264 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
265 Assemble the 64-bit DImode value in an xmm register. */
266 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
267 gen_rtx_SUBREG (SImode, operands[1], 0)));
268 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
269 gen_rtx_SUBREG (SImode, operands[1], 4)));
270 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
272 else if (memory_operand (operands[1], DImode))
273 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
279 [(set (match_operand:V4SF 0 "register_operand" "")
280 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
281 "TARGET_SSE && reload_completed"
284 (vec_duplicate:V4SF (match_dup 1))
288 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
289 operands[2] = CONST0_RTX (V4SFmode);
293 [(set (match_operand:V2DF 0 "register_operand" "")
294 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
295 "TARGET_SSE2 && reload_completed"
296 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
298 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
299 operands[2] = CONST0_RTX (DFmode);
302 (define_expand "push<mode>1"
303 [(match_operand:AVX256MODE 0 "register_operand" "")]
306 ix86_expand_push (<MODE>mode, operands[0]);
310 (define_expand "push<mode>1"
311 [(match_operand:SSEMODE 0 "register_operand" "")]
314 ix86_expand_push (<MODE>mode, operands[0]);
318 (define_expand "movmisalign<mode>"
319 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
320 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
323 ix86_expand_vector_move_misalign (<MODE>mode, operands);
327 (define_expand "movmisalign<mode>"
328 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
329 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
332 ix86_expand_vector_move_misalign (<MODE>mode, operands);
336 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
337 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
339 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
341 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
343 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
344 [(set_attr "type" "ssemov")
345 (set_attr "prefix" "vex")
346 (set_attr "mode" "<MODE>")])
348 (define_insn "sse2_movq128"
349 [(set (match_operand:V2DI 0 "register_operand" "=x")
352 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
353 (parallel [(const_int 0)]))
356 "%vmovq\t{%1, %0|%0, %1}"
357 [(set_attr "type" "ssemov")
358 (set_attr "prefix" "maybe_vex")
359 (set_attr "mode" "TI")])
361 (define_insn "<sse>_movup<ssemodesuffixf2c>"
362 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
364 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
366 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
367 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
368 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
369 [(set_attr "type" "ssemov")
370 (set_attr "mode" "<MODE>")])
372 (define_insn "avx_movdqu<avxmodesuffix>"
373 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
375 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
377 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
378 "vmovdqu\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "vex")
381 (set_attr "mode" "<avxvecmode>")])
383 (define_insn "sse2_movdqu"
384 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
385 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
387 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
388 "movdqu\t{%1, %0|%0, %1}"
389 [(set_attr "type" "ssemov")
390 (set_attr "prefix_data16" "1")
391 (set_attr "mode" "TI")])
393 (define_insn "avx_movnt<mode>"
394 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
396 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
398 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
399 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
400 [(set_attr "type" "ssemov")
401 (set_attr "prefix" "vex")
402 (set_attr "mode" "<MODE>")])
404 (define_insn "<sse>_movnt<mode>"
405 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
407 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
409 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
410 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
411 [(set_attr "type" "ssemov")
412 (set_attr "mode" "<MODE>")])
414 (define_insn "avx_movnt<mode>"
415 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
417 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
420 "vmovntdq\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssecvt")
422 (set_attr "prefix" "vex")
423 (set_attr "mode" "<avxvecmode>")])
425 (define_insn "sse2_movntv2di"
426 [(set (match_operand:V2DI 0 "memory_operand" "=m")
427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
430 "movntdq\t{%1, %0|%0, %1}"
431 [(set_attr "type" "ssecvt")
432 (set_attr "prefix_data16" "1")
433 (set_attr "mode" "TI")])
435 (define_insn "sse2_movntsi"
436 [(set (match_operand:SI 0 "memory_operand" "=m")
437 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
440 "movnti\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssecvt")
442 (set_attr "mode" "V2DF")])
444 (define_insn "avx_lddqu<avxmodesuffix>"
445 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
447 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
450 "vlddqu\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssecvt")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse3_lddqu"
456 [(set (match_operand:V16QI 0 "register_operand" "=x")
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
460 "lddqu\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssecvt")
462 (set_attr "prefix_rep" "1")
463 (set_attr "mode" "TI")])
465 ; Expand patterns for non-temporal stores. At the moment, only those
466 ; that directly map to insns are defined; it would be possible to
467 ; define patterns for other modes that would expand to several insns.
469 (define_expand "storent<mode>"
470 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
472 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
474 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
477 (define_expand "storent<mode>"
478 [(set (match_operand:MODEF 0 "memory_operand" "")
480 [(match_operand:MODEF 1 "register_operand" "")]
485 (define_expand "storentv2di"
486 [(set (match_operand:V2DI 0 "memory_operand" "")
487 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
492 (define_expand "storentsi"
493 [(set (match_operand:SI 0 "memory_operand" "")
494 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 ;; Parallel floating point arithmetic
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 (define_expand "<code><mode>2"
506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
508 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
509 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
510 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
512 (define_expand "<plusminus_insn><mode>3"
513 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
514 (plusminus:AVX256MODEF2P
515 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
516 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
517 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
518 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
520 (define_insn "*avx_<plusminus_insn><mode>3"
521 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
522 (plusminus:AVXMODEF2P
523 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
524 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
525 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
526 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
527 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
528 [(set_attr "type" "sseadd")
529 (set_attr "prefix" "vex")
530 (set_attr "mode" "<avxvecmode>")])
532 (define_expand "<plusminus_insn><mode>3"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
534 (plusminus:SSEMODEF2P
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
538 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
540 (define_insn "*<plusminus_insn><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
542 (plusminus:SSEMODEF2P
543 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
546 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
547 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
548 [(set_attr "type" "sseadd")
549 (set_attr "mode" "<MODE>")])
551 (define_insn "*avx_vm<plusminus_insn><mode>3"
552 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
553 (vec_merge:SSEMODEF2P
554 (plusminus:SSEMODEF2P
555 (match_operand:SSEMODEF2P 1 "register_operand" "x")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
559 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
560 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<ssescalarmode>")])
565 (define_insn "<sse>_vm<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
567 (vec_merge:SSEMODEF2P
568 (plusminus:SSEMODEF2P
569 (match_operand:SSEMODEF2P 1 "register_operand" "0")
570 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
573 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
574 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
575 [(set_attr "type" "sseadd")
576 (set_attr "mode" "<ssescalarmode>")])
578 (define_expand "mul<mode>3"
579 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
581 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
582 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
583 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
584 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
586 (define_insn "*avx_mul<mode>3"
587 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
589 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
590 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
591 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
592 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
593 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "ssemul")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<avxvecmode>")])
598 (define_expand "mul<mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
601 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
602 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
603 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
604 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
606 (define_insn "*mul<mode>3"
607 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
609 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
610 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
611 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
612 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
613 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
614 [(set_attr "type" "ssemul")
615 (set_attr "mode" "<MODE>")])
617 (define_insn "*avx_vmmul<mode>3"
618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
619 (vec_merge:SSEMODEF2P
621 (match_operand:SSEMODEF2P 1 "register_operand" "x")
622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
625 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
626 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<ssescalarmode>")])
631 (define_insn "<sse>_vmmul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
633 (vec_merge:SSEMODEF2P
635 (match_operand:SSEMODEF2P 1 "register_operand" "0")
636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
639 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
640 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
641 [(set_attr "type" "ssemul")
642 (set_attr "mode" "<ssescalarmode>")])
644 (define_expand "divv8sf3"
645 [(set (match_operand:V8SF 0 "register_operand" "")
646 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
647 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
650 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
652 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
653 && flag_finite_math_only && !flag_trapping_math
654 && flag_unsafe_math_optimizations)
656 ix86_emit_swdivsf (operands[0], operands[1],
657 operands[2], V8SFmode);
662 (define_expand "divv4df3"
663 [(set (match_operand:V4DF 0 "register_operand" "")
664 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
665 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
667 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
669 (define_insn "avx_div<mode>3"
670 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
672 (match_operand:AVXMODEF2P 1 "register_operand" "x")
673 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
674 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
675 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
676 [(set_attr "type" "ssediv")
677 (set_attr "prefix" "vex")
678 (set_attr "mode" "<MODE>")])
680 (define_expand "divv4sf3"
681 [(set (match_operand:V4SF 0 "register_operand" "")
682 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
683 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
686 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
687 && flag_finite_math_only && !flag_trapping_math
688 && flag_unsafe_math_optimizations)
690 ix86_emit_swdivsf (operands[0], operands[1],
691 operands[2], V4SFmode);
696 (define_expand "divv2df3"
697 [(set (match_operand:V2DF 0 "register_operand" "")
698 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
703 (define_insn "*avx_div<mode>3"
704 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
706 (match_operand:SSEMODEF2P 1 "register_operand" "x")
707 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
708 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
710 [(set_attr "type" "ssediv")
711 (set_attr "prefix" "vex")
712 (set_attr "mode" "<MODE>")])
714 (define_insn "<sse>_div<mode>3"
715 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
717 (match_operand:SSEMODEF2P 1 "register_operand" "0")
718 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
719 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
721 [(set_attr "type" "ssediv")
722 (set_attr "mode" "<MODE>")])
724 (define_insn "*avx_vmdiv<mode>3"
725 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
726 (vec_merge:SSEMODEF2P
728 (match_operand:SSEMODEF2P 1 "register_operand" "x")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
732 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
733 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
734 [(set_attr "type" "ssediv")
735 (set_attr "prefix" "vex")
736 (set_attr "mode" "<ssescalarmode>")])
738 (define_insn "<sse>_vmdiv<mode>3"
739 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
740 (vec_merge:SSEMODEF2P
742 (match_operand:SSEMODEF2P 1 "register_operand" "0")
743 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
746 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
747 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
748 [(set_attr "type" "ssediv")
749 (set_attr "mode" "<ssescalarmode>")])
751 (define_insn "avx_rcpv8sf2"
752 [(set (match_operand:V8SF 0 "register_operand" "=x")
754 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
756 "vrcpps\t{%1, %0|%0, %1}"
757 [(set_attr "type" "sse")
758 (set_attr "prefix" "vex")
759 (set_attr "mode" "V8SF")])
761 (define_insn "sse_rcpv4sf2"
762 [(set (match_operand:V4SF 0 "register_operand" "=x")
764 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
766 "%vrcpps\t{%1, %0|%0, %1}"
767 [(set_attr "type" "sse")
768 (set_attr "prefix" "maybe_vex")
769 (set_attr "mode" "V4SF")])
771 (define_insn "*avx_vmrcpv4sf2"
772 [(set (match_operand:V4SF 0 "register_operand" "=x")
774 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
776 (match_operand:V4SF 2 "register_operand" "x")
779 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
780 [(set_attr "type" "sse")
781 (set_attr "prefix" "vex")
782 (set_attr "mode" "SF")])
784 (define_insn "sse_vmrcpv4sf2"
785 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
789 (match_operand:V4SF 2 "register_operand" "0")
792 "rcpss\t{%1, %0|%0, %1}"
793 [(set_attr "type" "sse")
794 (set_attr "mode" "SF")])
796 (define_expand "sqrtv8sf2"
797 [(set (match_operand:V8SF 0 "register_operand" "")
798 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
801 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
802 && flag_finite_math_only && !flag_trapping_math
803 && flag_unsafe_math_optimizations)
805 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
810 (define_insn "avx_sqrtv8sf2"
811 [(set (match_operand:V8SF 0 "register_operand" "=x")
812 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
814 "vsqrtps\t{%1, %0|%0, %1}"
815 [(set_attr "type" "sse")
816 (set_attr "prefix" "vex")
817 (set_attr "mode" "V8SF")])
819 (define_expand "sqrtv4sf2"
820 [(set (match_operand:V4SF 0 "register_operand" "")
821 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
824 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
825 && flag_finite_math_only && !flag_trapping_math
826 && flag_unsafe_math_optimizations)
828 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
833 (define_insn "sse_sqrtv4sf2"
834 [(set (match_operand:V4SF 0 "register_operand" "=x")
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
837 "%vsqrtps\t{%1, %0|%0, %1}"
838 [(set_attr "type" "sse")
839 (set_attr "prefix" "maybe_vex")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sqrtv4df2"
843 [(set (match_operand:V4DF 0 "register_operand" "=x")
844 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
846 "vsqrtpd\t{%1, %0|%0, %1}"
847 [(set_attr "type" "sse")
848 (set_attr "prefix" "vex")
849 (set_attr "mode" "V4DF")])
851 (define_insn "sqrtv2df2"
852 [(set (match_operand:V2DF 0 "register_operand" "=x")
853 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
855 "%vsqrtpd\t{%1, %0|%0, %1}"
856 [(set_attr "type" "sse")
857 (set_attr "prefix" "maybe_vex")
858 (set_attr "mode" "V2DF")])
860 (define_insn "*avx_vmsqrt<mode>2"
861 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
862 (vec_merge:SSEMODEF2P
864 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
865 (match_operand:SSEMODEF2P 2 "register_operand" "x")
867 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
868 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "prefix" "vex")
871 (set_attr "mode" "<ssescalarmode>")])
873 (define_insn "<sse>_vmsqrt<mode>2"
874 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
875 (vec_merge:SSEMODEF2P
877 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
878 (match_operand:SSEMODEF2P 2 "register_operand" "0")
880 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
881 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse")
883 (set_attr "mode" "<ssescalarmode>")])
885 (define_expand "rsqrtv8sf2"
886 [(set (match_operand:V8SF 0 "register_operand" "")
888 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
889 "TARGET_AVX && TARGET_SSE_MATH"
891 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
895 (define_insn "avx_rsqrtv8sf2"
896 [(set (match_operand:V8SF 0 "register_operand" "=x")
898 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
900 "vrsqrtps\t{%1, %0|%0, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "V8SF")])
905 (define_expand "rsqrtv4sf2"
906 [(set (match_operand:V4SF 0 "register_operand" "")
908 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
911 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
915 (define_insn "sse_rsqrtv4sf2"
916 [(set (match_operand:V4SF 0 "register_operand" "=x")
918 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
920 "%vrsqrtps\t{%1, %0|%0, %1}"
921 [(set_attr "type" "sse")
922 (set_attr "prefix" "maybe_vex")
923 (set_attr "mode" "V4SF")])
925 (define_insn "*avx_vmrsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x")
928 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
930 (match_operand:V4SF 2 "register_operand" "x")
933 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "SF")])
938 (define_insn "sse_vmrsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "=x")
941 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
943 (match_operand:V4SF 2 "register_operand" "0")
946 "rsqrtss\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sse")
948 (set_attr "mode" "SF")])
950 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
951 ;; isn't really correct, as those rtl operators aren't defined when
952 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
954 (define_expand "<code><mode>3"
955 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
956 (smaxmin:AVX256MODEF2P
957 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
958 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
959 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
961 if (!flag_finite_math_only)
962 operands[1] = force_reg (<MODE>mode, operands[1]);
963 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
966 (define_expand "<code><mode>3"
967 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
969 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
970 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
971 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
973 if (!flag_finite_math_only)
974 operands[1] = force_reg (<MODE>mode, operands[1]);
975 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
978 (define_insn "*avx_<code><mode>3_finite"
979 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
981 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
982 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
983 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
985 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
986 [(set_attr "type" "sseadd")
987 (set_attr "prefix" "vex")
988 (set_attr "mode" "<MODE>")])
990 (define_insn "*<code><mode>3_finite"
991 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
993 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
994 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
995 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
996 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
997 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
998 [(set_attr "type" "sseadd")
999 (set_attr "mode" "<MODE>")])
1001 (define_insn "*avx_<code><mode>3"
1002 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1004 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1005 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1006 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1007 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1008 [(set_attr "type" "sseadd")
1009 (set_attr "prefix" "vex")
1010 (set_attr "mode" "<avxvecmode>")])
1012 (define_insn "*<code><mode>3"
1013 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1015 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1016 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1017 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1018 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "mode" "<MODE>")])
1022 (define_insn "*avx_vm<code><mode>3"
1023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1024 (vec_merge:SSEMODEF2P
1026 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1030 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1031 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1032 [(set_attr "type" "sse")
1033 (set_attr "prefix" "vex")
1034 (set_attr "mode" "<ssescalarmode>")])
1036 (define_insn "<sse>_vm<code><mode>3"
1037 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1038 (vec_merge:SSEMODEF2P
1040 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1041 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1044 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1045 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1046 [(set_attr "type" "sse")
1047 (set_attr "mode" "<ssescalarmode>")])
1049 ;; These versions of the min/max patterns implement exactly the operations
1050 ;; min = (op1 < op2 ? op1 : op2)
1051 ;; max = (!(op1 < op2) ? op1 : op2)
1052 ;; Their operands are not commutative, and thus they may be used in the
1053 ;; presence of -0.0 and NaN.
1055 (define_insn "*avx_ieee_smin<mode>3"
1056 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1058 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1059 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1061 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1062 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1063 [(set_attr "type" "sseadd")
1064 (set_attr "prefix" "vex")
1065 (set_attr "mode" "<avxvecmode>")])
1067 (define_insn "*avx_ieee_smax<mode>3"
1068 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1070 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1071 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1073 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1074 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1075 [(set_attr "type" "sseadd")
1076 (set_attr "prefix" "vex")
1077 (set_attr "mode" "<avxvecmode>")])
1079 (define_insn "*ieee_smin<mode>3"
1080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1082 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1083 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1085 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1086 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1087 [(set_attr "type" "sseadd")
1088 (set_attr "mode" "<MODE>")])
1090 (define_insn "*ieee_smax<mode>3"
1091 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1093 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1094 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1096 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1097 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1098 [(set_attr "type" "sseadd")
1099 (set_attr "mode" "<MODE>")])
1101 (define_insn "avx_addsubv8sf3"
1102 [(set (match_operand:V8SF 0 "register_operand" "=x")
1105 (match_operand:V8SF 1 "register_operand" "x")
1106 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1107 (minus:V8SF (match_dup 1) (match_dup 2))
1110 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sseadd")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "V8SF")])
1115 (define_insn "avx_addsubv4df3"
1116 [(set (match_operand:V4DF 0 "register_operand" "=x")
1119 (match_operand:V4DF 1 "register_operand" "x")
1120 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1121 (minus:V4DF (match_dup 1) (match_dup 2))
1124 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1125 [(set_attr "type" "sseadd")
1126 (set_attr "prefix" "vex")
1127 (set_attr "mode" "V4DF")])
1129 (define_insn "*avx_addsubv4sf3"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1133 (match_operand:V4SF 1 "register_operand" "x")
1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1135 (minus:V4SF (match_dup 1) (match_dup 2))
1138 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "type" "sseadd")
1140 (set_attr "prefix" "vex")
1141 (set_attr "mode" "V4SF")])
1143 (define_insn "sse3_addsubv4sf3"
1144 [(set (match_operand:V4SF 0 "register_operand" "=x")
1147 (match_operand:V4SF 1 "register_operand" "0")
1148 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1149 (minus:V4SF (match_dup 1) (match_dup 2))
1152 "addsubps\t{%2, %0|%0, %2}"
1153 [(set_attr "type" "sseadd")
1154 (set_attr "prefix_rep" "1")
1155 (set_attr "mode" "V4SF")])
1157 (define_insn "*avx_addsubv2df3"
1158 [(set (match_operand:V2DF 0 "register_operand" "=x")
1161 (match_operand:V2DF 1 "register_operand" "x")
1162 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1163 (minus:V2DF (match_dup 1) (match_dup 2))
1166 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1167 [(set_attr "type" "sseadd")
1168 (set_attr "prefix" "vex")
1169 (set_attr "mode" "V2DF")])
1171 (define_insn "sse3_addsubv2df3"
1172 [(set (match_operand:V2DF 0 "register_operand" "=x")
1175 (match_operand:V2DF 1 "register_operand" "0")
1176 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1177 (minus:V2DF (match_dup 1) (match_dup 2))
1180 "addsubpd\t{%2, %0|%0, %2}"
1181 [(set_attr "type" "sseadd")
1182 (set_attr "mode" "V2DF")])
1184 (define_insn "avx_h<plusminus_insn>v4df3"
1185 [(set (match_operand:V4DF 0 "register_operand" "=x")
1190 (match_operand:V4DF 1 "register_operand" "x")
1191 (parallel [(const_int 0)]))
1192 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1194 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1195 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1199 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1200 (parallel [(const_int 0)]))
1201 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1203 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1204 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1206 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1207 [(set_attr "type" "sseadd")
1208 (set_attr "prefix" "vex")
1209 (set_attr "mode" "V4DF")])
1211 (define_insn "avx_h<plusminus_insn>v8sf3"
1212 [(set (match_operand:V8SF 0 "register_operand" "=x")
1218 (match_operand:V8SF 1 "register_operand" "x")
1219 (parallel [(const_int 0)]))
1220 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1222 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1223 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1227 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1228 (parallel [(const_int 0)]))
1229 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1231 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1232 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1237 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1239 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1240 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1243 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1249 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1250 [(set_attr "type" "sseadd")
1251 (set_attr "prefix" "vex")
1252 (set_attr "mode" "V8SF")])
1254 (define_insn "*avx_h<plusminus_insn>v4sf3"
1255 [(set (match_operand:V4SF 0 "register_operand" "=x")
1260 (match_operand:V4SF 1 "register_operand" "x")
1261 (parallel [(const_int 0)]))
1262 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1264 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1265 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1269 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1270 (parallel [(const_int 0)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1273 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1274 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1276 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1277 [(set_attr "type" "sseadd")
1278 (set_attr "prefix" "vex")
1279 (set_attr "mode" "V4SF")])
1281 (define_insn "sse3_h<plusminus_insn>v4sf3"
1282 [(set (match_operand:V4SF 0 "register_operand" "=x")
1287 (match_operand:V4SF 1 "register_operand" "0")
1288 (parallel [(const_int 0)]))
1289 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1291 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1292 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1297 (parallel [(const_int 0)]))
1298 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1300 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1301 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1303 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1304 [(set_attr "type" "sseadd")
1305 (set_attr "prefix_rep" "1")
1306 (set_attr "mode" "V4SF")])
1308 (define_insn "*avx_h<plusminus_insn>v2df3"
1309 [(set (match_operand:V2DF 0 "register_operand" "=x")
1313 (match_operand:V2DF 1 "register_operand" "x")
1314 (parallel [(const_int 0)]))
1315 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1318 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1319 (parallel [(const_int 0)]))
1320 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1322 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1323 [(set_attr "type" "sseadd")
1324 (set_attr "prefix" "vex")
1325 (set_attr "mode" "V2DF")])
1327 (define_insn "sse3_h<plusminus_insn>v2df3"
1328 [(set (match_operand:V2DF 0 "register_operand" "=x")
1332 (match_operand:V2DF 1 "register_operand" "0")
1333 (parallel [(const_int 0)]))
1334 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1337 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1338 (parallel [(const_int 0)]))
1339 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1341 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1342 [(set_attr "type" "sseadd")
1343 (set_attr "mode" "V2DF")])
1345 (define_expand "reduc_splus_v4sf"
1346 [(match_operand:V4SF 0 "register_operand" "")
1347 (match_operand:V4SF 1 "register_operand" "")]
1352 rtx tmp = gen_reg_rtx (V4SFmode);
1353 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1354 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1357 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1361 (define_expand "reduc_splus_v2df"
1362 [(match_operand:V2DF 0 "register_operand" "")
1363 (match_operand:V2DF 1 "register_operand" "")]
1366 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1370 (define_expand "reduc_smax_v4sf"
1371 [(match_operand:V4SF 0 "register_operand" "")
1372 (match_operand:V4SF 1 "register_operand" "")]
1375 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1379 (define_expand "reduc_smin_v4sf"
1380 [(match_operand:V4SF 0 "register_operand" "")
1381 (match_operand:V4SF 1 "register_operand" "")]
1384 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 ;; Parallel floating point comparisons
1392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1394 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1395 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1397 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1398 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1402 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp")
1404 (set_attr "prefix" "vex")
1405 (set_attr "mode" "<MODE>")])
1407 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1408 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1409 (vec_merge:SSEMODEF2P
1411 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1412 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1413 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1418 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1419 [(set_attr "type" "ssecmp")
1420 (set_attr "prefix" "vex")
1421 (set_attr "mode" "<ssescalarmode>")])
1423 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1424 ;; may generate 256bit vector compare instructions.
1425 (define_insn "*avx_maskcmp<mode>3"
1426 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1427 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1428 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1429 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1430 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1431 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1432 [(set_attr "type" "ssecmp")
1433 (set_attr "prefix" "vex")
1434 (set_attr "mode" "<avxvecmode>")])
1436 (define_insn "<sse>_maskcmp<mode>3"
1437 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1438 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1439 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1440 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1441 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1443 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1444 [(set_attr "type" "ssecmp")
1445 (set_attr "mode" "<MODE>")])
1447 (define_insn "<sse>_vmmaskcmp<mode>3"
1448 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1449 (vec_merge:SSEMODEF2P
1450 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1451 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1452 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1455 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1456 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1457 [(set_attr "type" "ssecmp")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 (define_insn "<sse>_comi"
1461 [(set (reg:CCFP FLAGS_REG)
1464 (match_operand:<ssevecmode> 0 "register_operand" "x")
1465 (parallel [(const_int 0)]))
1467 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1468 (parallel [(const_int 0)]))))]
1469 "SSE_FLOAT_MODE_P (<MODE>mode)"
1470 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1471 [(set_attr "type" "ssecomi")
1472 (set_attr "prefix" "maybe_vex")
1473 (set_attr "mode" "<MODE>")])
1475 (define_insn "<sse>_ucomi"
1476 [(set (reg:CCFPU FLAGS_REG)
1479 (match_operand:<ssevecmode> 0 "register_operand" "x")
1480 (parallel [(const_int 0)]))
1482 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1483 (parallel [(const_int 0)]))))]
1484 "SSE_FLOAT_MODE_P (<MODE>mode)"
1485 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1486 [(set_attr "type" "ssecomi")
1487 (set_attr "prefix" "maybe_vex")
1488 (set_attr "mode" "<MODE>")])
1490 (define_expand "vcond<mode>"
1491 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1492 (if_then_else:SSEMODEF2P
1493 (match_operator 3 ""
1494 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1495 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1496 (match_operand:SSEMODEF2P 1 "general_operand" "")
1497 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1498 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1500 bool ok = ix86_expand_fp_vcond (operands);
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1507 ;; Parallel floating point logical operations
1509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1511 (define_insn "avx_andnot<mode>3"
1512 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1515 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1516 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1517 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1518 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1519 [(set_attr "type" "sselog")
1520 (set_attr "prefix" "vex")
1521 (set_attr "mode" "<avxvecmode>")])
1523 (define_insn "<sse>_andnot<mode>3"
1524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1527 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1528 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1529 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1530 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1531 [(set_attr "type" "sselog")
1532 (set_attr "mode" "<MODE>")])
1534 (define_expand "<code><mode>3"
1535 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1536 (plogic:AVX256MODEF2P
1537 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1538 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1539 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1542 (define_insn "*avx_<code><mode>3"
1543 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1545 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1546 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1547 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1549 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1550 [(set_attr "type" "sselog")
1551 (set_attr "prefix" "vex")
1552 (set_attr "mode" "<avxvecmode>")])
1554 (define_expand "<code><mode>3"
1555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1557 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1559 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1562 (define_insn "*<code><mode>3"
1563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1567 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1568 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1569 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1570 [(set_attr "type" "sselog")
1571 (set_attr "mode" "<MODE>")])
1573 ;; Also define scalar versions. These are used for abs, neg, and
1574 ;; conditional move. Using subregs into vector modes causes register
1575 ;; allocation lossage. These patterns do not allow memory operands
1576 ;; because the native instructions read the full 128-bits.
1578 (define_insn "*avx_andnot<mode>3"
1579 [(set (match_operand:MODEF 0 "register_operand" "=x")
1582 (match_operand:MODEF 1 "register_operand" "x"))
1583 (match_operand:MODEF 2 "register_operand" "x")))]
1584 "AVX_FLOAT_MODE_P (<MODE>mode)"
1585 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1586 [(set_attr "type" "sselog")
1587 (set_attr "prefix" "vex")
1588 (set_attr "mode" "<ssevecmode>")])
1590 (define_insn "*andnot<mode>3"
1591 [(set (match_operand:MODEF 0 "register_operand" "=x")
1594 (match_operand:MODEF 1 "register_operand" "0"))
1595 (match_operand:MODEF 2 "register_operand" "x")))]
1596 "SSE_FLOAT_MODE_P (<MODE>mode)"
1597 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1598 [(set_attr "type" "sselog")
1599 (set_attr "mode" "<ssevecmode>")])
1601 (define_insn "*avx_<code><mode>3"
1602 [(set (match_operand:MODEF 0 "register_operand" "=x")
1604 (match_operand:MODEF 1 "register_operand" "x")
1605 (match_operand:MODEF 2 "register_operand" "x")))]
1606 "AVX_FLOAT_MODE_P (<MODE>mode)"
1607 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1608 [(set_attr "type" "sselog")
1609 (set_attr "prefix" "vex")
1610 (set_attr "mode" "<ssevecmode>")])
1612 (define_insn "*<code><mode>3"
1613 [(set (match_operand:MODEF 0 "register_operand" "=x")
1615 (match_operand:MODEF 1 "register_operand" "0")
1616 (match_operand:MODEF 2 "register_operand" "x")))]
1617 "SSE_FLOAT_MODE_P (<MODE>mode)"
1618 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1619 [(set_attr "type" "sselog")
1620 (set_attr "mode" "<ssevecmode>")])
1622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1624 ;; SSE5 floating point multiply/accumulate instructions This includes the
1625 ;; scalar version of the instructions as well as the vector
1627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1629 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1630 ;; combine to generate a multiply/add with two memory references. We then
1631 ;; split this insn, into loading up the destination register with one of the
1632 ;; memory operations. If we don't manage to split the insn, reload will
1633 ;; generate the appropriate moves. The reason this is needed, is that combine
1634 ;; has already folded one of the memory references into both the multiply and
1635 ;; add insns, and it can't generate a new pseudo. I.e.:
1636 ;; (set (reg1) (mem (addr1)))
1637 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1638 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1640 (define_insn "sse5_fmadd<mode>4"
1641 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1644 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1645 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1646 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1647 "TARGET_SSE5 && TARGET_FUSED_MADD
1648 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1649 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1650 [(set_attr "type" "ssemuladd")
1651 (set_attr "mode" "<MODE>")])
1653 ;; Split fmadd with two memory operands into a load and the fmadd.
1655 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1658 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1659 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1660 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1662 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1664 && !reg_mentioned_p (operands[0], operands[1])
1665 && !reg_mentioned_p (operands[0], operands[2])
1666 && !reg_mentioned_p (operands[0], operands[3])"
1669 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1670 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1671 operands[2], operands[3]));
1675 ;; For the scalar operations, use operand1 for the upper words that aren't
1676 ;; modified, so restrict the forms that are generated.
1677 ;; Scalar version of fmadd
1678 (define_insn "sse5_vmfmadd<mode>4"
1679 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1680 (vec_merge:SSEMODEF2P
1683 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1684 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1685 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1688 "TARGET_SSE5 && TARGET_FUSED_MADD
1689 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1690 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1691 [(set_attr "type" "ssemuladd")
1692 (set_attr "mode" "<MODE>")])
1694 ;; Floating multiply and subtract
1695 ;; Allow two memory operands the same as fmadd
1696 (define_insn "sse5_fmsub<mode>4"
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1703 "TARGET_SSE5 && TARGET_FUSED_MADD
1704 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1705 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1706 [(set_attr "type" "ssemuladd")
1707 (set_attr "mode" "<MODE>")])
1709 ;; Split fmsub with two memory operands into a load and the fmsub.
1711 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1714 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1715 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1716 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1718 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1719 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1720 && !reg_mentioned_p (operands[0], operands[1])
1721 && !reg_mentioned_p (operands[0], operands[2])
1722 && !reg_mentioned_p (operands[0], operands[3])"
1725 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1726 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1727 operands[2], operands[3]));
1731 ;; For the scalar operations, use operand1 for the upper words that aren't
1732 ;; modified, so restrict the forms that are generated.
1733 ;; Scalar version of fmsub
1734 (define_insn "sse5_vmfmsub<mode>4"
1735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1736 (vec_merge:SSEMODEF2P
1739 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1741 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1744 "TARGET_SSE5 && TARGET_FUSED_MADD
1745 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1746 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")])
1750 ;; Floating point negative multiply and add
1751 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1752 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1753 ;; Allow two memory operands to help in optimizing.
1754 (define_insn "sse5_fnmadd<mode>4"
1755 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1757 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1759 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1760 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1761 "TARGET_SSE5 && TARGET_FUSED_MADD
1762 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1763 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1764 [(set_attr "type" "ssemuladd")
1765 (set_attr "mode" "<MODE>")])
1767 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1769 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1771 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1773 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1774 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1776 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1777 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1778 && !reg_mentioned_p (operands[0], operands[1])
1779 && !reg_mentioned_p (operands[0], operands[2])
1780 && !reg_mentioned_p (operands[0], operands[3])"
1783 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1784 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1785 operands[2], operands[3]));
1789 ;; For the scalar operations, use operand1 for the upper words that aren't
1790 ;; modified, so restrict the forms that are generated.
1791 ;; Scalar version of fnmadd
1792 (define_insn "sse5_vmfnmadd<mode>4"
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1794 (vec_merge:SSEMODEF2P
1796 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1798 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1799 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1802 "TARGET_SSE5 && TARGET_FUSED_MADD
1803 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1804 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1805 [(set_attr "type" "ssemuladd")
1806 (set_attr "mode" "<MODE>")])
1808 ;; Floating point negative multiply and subtract
1809 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1810 ;; Allow 2 memory operands to help with optimization
1811 (define_insn "sse5_fnmsub<mode>4"
1812 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1816 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1817 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1818 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1819 "TARGET_SSE5 && TARGET_FUSED_MADD
1820 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1821 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1822 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")])
1825 ;; Split fnmsub with two memory operands into a load and the fmsub.
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1833 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1835 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1836 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1837 && !reg_mentioned_p (operands[0], operands[1])
1838 && !reg_mentioned_p (operands[0], operands[2])
1839 && !reg_mentioned_p (operands[0], operands[3])"
1842 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1843 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1844 operands[2], operands[3]));
1848 ;; For the scalar operations, use operand1 for the upper words that aren't
1849 ;; modified, so restrict the forms that are generated.
1850 ;; Scalar version of fnmsub
1851 (define_insn "sse5_vmfnmsub<mode>4"
1852 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1853 (vec_merge:SSEMODEF2P
1857 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1858 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1859 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1862 "TARGET_SSE5 && TARGET_FUSED_MADD
1863 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1864 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1865 [(set_attr "type" "ssemuladd")
1866 (set_attr "mode" "<MODE>")])
1868 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1869 ;; even if the user used -mno-fused-madd
1870 ;; Parallel instructions. During instruction generation, just default
1871 ;; to registers, and let combine later build the appropriate instruction.
1872 (define_expand "sse5i_fmadd<mode>4"
1873 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1877 (match_operand:SSEMODEF2P 1 "register_operand" "")
1878 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1879 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1880 UNSPEC_SSE5_INTRINSIC))]
1883 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1884 if (TARGET_FUSED_MADD)
1886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1887 operands[2], operands[3]));
1892 (define_insn "*sse5i_fmadd<mode>4"
1893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1897 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1898 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1899 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1900 UNSPEC_SSE5_INTRINSIC))]
1901 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1902 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1903 [(set_attr "type" "ssemuladd")
1904 (set_attr "mode" "<MODE>")])
1906 (define_expand "sse5i_fmsub<mode>4"
1907 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1911 (match_operand:SSEMODEF2P 1 "register_operand" "")
1912 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1913 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1914 UNSPEC_SSE5_INTRINSIC))]
1917 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1918 if (TARGET_FUSED_MADD)
1920 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1921 operands[2], operands[3]));
1926 (define_insn "*sse5i_fmsub<mode>4"
1927 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1931 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1932 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1933 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1934 UNSPEC_SSE5_INTRINSIC))]
1935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1936 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")])
1940 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1941 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1942 (define_expand "sse5i_fnmadd<mode>4"
1943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1946 (match_operand:SSEMODEF2P 3 "register_operand" "")
1948 (match_operand:SSEMODEF2P 1 "register_operand" "")
1949 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1950 UNSPEC_SSE5_INTRINSIC))]
1953 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1954 if (TARGET_FUSED_MADD)
1956 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1957 operands[2], operands[3]));
1962 (define_insn "*sse5i_fnmadd<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1966 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1968 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1969 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1970 UNSPEC_SSE5_INTRINSIC))]
1971 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1972 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1977 (define_expand "sse5i_fnmsub<mode>4"
1978 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1983 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1984 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1985 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1986 UNSPEC_SSE5_INTRINSIC))]
1989 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1990 if (TARGET_FUSED_MADD)
1992 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1993 operands[2], operands[3]));
1998 (define_insn "*sse5i_fnmsub<mode>4"
1999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2007 UNSPEC_SSE5_INTRINSIC))]
2008 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2009 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2013 ;; Scalar instructions
2014 (define_expand "sse5i_vmfmadd<mode>4"
2015 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2017 [(vec_merge:SSEMODEF2P
2020 (match_operand:SSEMODEF2P 1 "register_operand" "")
2021 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2022 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2025 UNSPEC_SSE5_INTRINSIC))]
2028 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2029 if (TARGET_FUSED_MADD)
2031 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2032 operands[2], operands[3]));
2037 ;; For the scalar operations, use operand1 for the upper words that aren't
2038 ;; modified, so restrict the forms that are accepted.
2039 (define_insn "*sse5i_vmfmadd<mode>4"
2040 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2042 [(vec_merge:SSEMODEF2P
2045 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2046 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2047 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2050 UNSPEC_SSE5_INTRINSIC))]
2051 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2052 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2053 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<ssescalarmode>")])
2056 (define_expand "sse5i_vmfmsub<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2059 [(vec_merge:SSEMODEF2P
2062 (match_operand:SSEMODEF2P 1 "register_operand" "")
2063 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2064 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2067 UNSPEC_SSE5_INTRINSIC))]
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2073 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2079 (define_insn "*sse5i_vmfmsub<mode>4"
2080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2082 [(vec_merge:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2087 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2090 UNSPEC_SSE5_INTRINSIC))]
2091 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2092 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2093 [(set_attr "type" "ssemuladd")
2094 (set_attr "mode" "<ssescalarmode>")])
2096 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2097 (define_expand "sse5i_vmfnmadd<mode>4"
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2100 [(vec_merge:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 3 "register_operand" "")
2104 (match_operand:SSEMODEF2P 1 "register_operand" "")
2105 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2108 UNSPEC_SSE5_INTRINSIC))]
2111 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2112 if (TARGET_FUSED_MADD)
2114 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2115 operands[2], operands[3]));
2120 (define_insn "*sse5i_vmfnmadd<mode>4"
2121 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2123 [(vec_merge:SSEMODEF2P
2125 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2127 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2131 UNSPEC_SSE5_INTRINSIC))]
2132 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2133 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2134 [(set_attr "type" "ssemuladd")
2135 (set_attr "mode" "<ssescalarmode>")])
2137 (define_expand "sse5i_vmfnmsub<mode>4"
2138 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2140 [(vec_merge:SSEMODEF2P
2144 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2145 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2146 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2149 UNSPEC_SSE5_INTRINSIC))]
2152 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2153 if (TARGET_FUSED_MADD)
2155 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2156 operands[2], operands[3]));
2161 (define_insn "*sse5i_vmfnmsub<mode>4"
2162 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2164 [(vec_merge:SSEMODEF2P
2168 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2169 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2170 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2173 UNSPEC_SSE5_INTRINSIC))]
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2175 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2176 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<ssescalarmode>")])
2179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2181 ;; Parallel single-precision floating point conversion operations
2183 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2185 (define_insn "sse_cvtpi2ps"
2186 [(set (match_operand:V4SF 0 "register_operand" "=x")
2189 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2190 (match_operand:V4SF 1 "register_operand" "0")
2193 "cvtpi2ps\t{%2, %0|%0, %2}"
2194 [(set_attr "type" "ssecvt")
2195 (set_attr "mode" "V4SF")])
2197 (define_insn "sse_cvtps2pi"
2198 [(set (match_operand:V2SI 0 "register_operand" "=y")
2200 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2202 (parallel [(const_int 0) (const_int 1)])))]
2204 "cvtps2pi\t{%1, %0|%0, %1}"
2205 [(set_attr "type" "ssecvt")
2206 (set_attr "unit" "mmx")
2207 (set_attr "mode" "DI")])
2209 (define_insn "sse_cvttps2pi"
2210 [(set (match_operand:V2SI 0 "register_operand" "=y")
2212 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2213 (parallel [(const_int 0) (const_int 1)])))]
2215 "cvttps2pi\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "ssecvt")
2217 (set_attr "unit" "mmx")
2218 (set_attr "mode" "SF")])
2220 (define_insn "*avx_cvtsi2ss"
2221 [(set (match_operand:V4SF 0 "register_operand" "=x")
2224 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2225 (match_operand:V4SF 1 "register_operand" "x")
2228 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2229 [(set_attr "type" "sseicvt")
2230 (set_attr "prefix" "vex")
2231 (set_attr "mode" "SF")])
2233 (define_insn "sse_cvtsi2ss"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2237 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2238 (match_operand:V4SF 1 "register_operand" "0,0")
2241 "cvtsi2ss\t{%2, %0|%0, %2}"
2242 [(set_attr "type" "sseicvt")
2243 (set_attr "athlon_decode" "vector,double")
2244 (set_attr "amdfam10_decode" "vector,double")
2245 (set_attr "mode" "SF")])
2247 (define_insn "*avx_cvtsi2ssq"
2248 [(set (match_operand:V4SF 0 "register_operand" "=x")
2251 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2252 (match_operand:V4SF 1 "register_operand" "x")
2254 "TARGET_AVX && TARGET_64BIT"
2255 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2256 [(set_attr "type" "sseicvt")
2257 (set_attr "prefix" "vex")
2258 (set_attr "mode" "SF")])
2260 (define_insn "sse_cvtsi2ssq"
2261 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2264 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2265 (match_operand:V4SF 1 "register_operand" "0,0")
2267 "TARGET_SSE && TARGET_64BIT"
2268 "cvtsi2ssq\t{%2, %0|%0, %2}"
2269 [(set_attr "type" "sseicvt")
2270 (set_attr "athlon_decode" "vector,double")
2271 (set_attr "amdfam10_decode" "vector,double")
2272 (set_attr "mode" "SF")])
2274 (define_insn "sse_cvtss2si"
2275 [(set (match_operand:SI 0 "register_operand" "=r,r")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279 (parallel [(const_int 0)]))]
2280 UNSPEC_FIX_NOTRUNC))]
2282 "%vcvtss2si\t{%1, %0|%0, %1}"
2283 [(set_attr "type" "sseicvt")
2284 (set_attr "athlon_decode" "double,vector")
2285 (set_attr "prefix_rep" "1")
2286 (set_attr "prefix" "maybe_vex")
2287 (set_attr "mode" "SI")])
2289 (define_insn "sse_cvtss2si_2"
2290 [(set (match_operand:SI 0 "register_operand" "=r,r")
2291 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2292 UNSPEC_FIX_NOTRUNC))]
2294 "%vcvtss2si\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "sseicvt")
2296 (set_attr "athlon_decode" "double,vector")
2297 (set_attr "amdfam10_decode" "double,double")
2298 (set_attr "prefix_rep" "1")
2299 (set_attr "prefix" "maybe_vex")
2300 (set_attr "mode" "SI")])
2302 (define_insn "sse_cvtss2siq"
2303 [(set (match_operand:DI 0 "register_operand" "=r,r")
2306 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2307 (parallel [(const_int 0)]))]
2308 UNSPEC_FIX_NOTRUNC))]
2309 "TARGET_SSE && TARGET_64BIT"
2310 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "sseicvt")
2312 (set_attr "athlon_decode" "double,vector")
2313 (set_attr "prefix_rep" "1")
2314 (set_attr "prefix" "maybe_vex")
2315 (set_attr "mode" "DI")])
2317 (define_insn "sse_cvtss2siq_2"
2318 [(set (match_operand:DI 0 "register_operand" "=r,r")
2319 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2320 UNSPEC_FIX_NOTRUNC))]
2321 "TARGET_SSE && TARGET_64BIT"
2322 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "sseicvt")
2324 (set_attr "athlon_decode" "double,vector")
2325 (set_attr "amdfam10_decode" "double,double")
2326 (set_attr "prefix_rep" "1")
2327 (set_attr "prefix" "maybe_vex")
2328 (set_attr "mode" "DI")])
2330 (define_insn "sse_cvttss2si"
2331 [(set (match_operand:SI 0 "register_operand" "=r,r")
2334 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2335 (parallel [(const_int 0)]))))]
2337 "%vcvttss2si\t{%1, %0|%0, %1}"
2338 [(set_attr "type" "sseicvt")
2339 (set_attr "athlon_decode" "double,vector")
2340 (set_attr "amdfam10_decode" "double,double")
2341 (set_attr "prefix_rep" "1")
2342 (set_attr "prefix" "maybe_vex")
2343 (set_attr "mode" "SI")])
2345 (define_insn "sse_cvttss2siq"
2346 [(set (match_operand:DI 0 "register_operand" "=r,r")
2349 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2350 (parallel [(const_int 0)]))))]
2351 "TARGET_SSE && TARGET_64BIT"
2352 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2353 [(set_attr "type" "sseicvt")
2354 (set_attr "athlon_decode" "double,vector")
2355 (set_attr "amdfam10_decode" "double,double")
2356 (set_attr "prefix_rep" "1")
2357 (set_attr "prefix" "maybe_vex")
2358 (set_attr "mode" "DI")])
2360 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2361 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2362 (float:AVXMODEDCVTDQ2PS
2363 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2365 "vcvtdq2ps\t{%1, %0|%0, %1}"
2366 [(set_attr "type" "ssecvt")
2367 (set_attr "prefix" "vex")
2368 (set_attr "mode" "<avxvecmode>")])
2370 (define_insn "sse2_cvtdq2ps"
2371 [(set (match_operand:V4SF 0 "register_operand" "=x")
2372 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2374 "cvtdq2ps\t{%1, %0|%0, %1}"
2375 [(set_attr "type" "ssecvt")
2376 (set_attr "mode" "V4SF")])
2378 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2379 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2380 (unspec:AVXMODEDCVTPS2DQ
2381 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2382 UNSPEC_FIX_NOTRUNC))]
2384 "vcvtps2dq\t{%1, %0|%0, %1}"
2385 [(set_attr "type" "ssecvt")
2386 (set_attr "prefix" "vex")
2387 (set_attr "mode" "<avxvecmode>")])
2389 (define_insn "sse2_cvtps2dq"
2390 [(set (match_operand:V4SI 0 "register_operand" "=x")
2391 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2392 UNSPEC_FIX_NOTRUNC))]
2394 "cvtps2dq\t{%1, %0|%0, %1}"
2395 [(set_attr "type" "ssecvt")
2396 (set_attr "prefix_data16" "1")
2397 (set_attr "mode" "TI")])
2399 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2400 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2401 (fix:AVXMODEDCVTPS2DQ
2402 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2404 "vcvttps2dq\t{%1, %0|%0, %1}"
2405 [(set_attr "type" "ssecvt")
2406 (set_attr "prefix" "vex")
2407 (set_attr "mode" "<avxvecmode>")])
2409 (define_insn "sse2_cvttps2dq"
2410 [(set (match_operand:V4SI 0 "register_operand" "=x")
2411 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2413 "cvttps2dq\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "prefix_rep" "1")
2416 (set_attr "mode" "TI")])
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;; Parallel double-precision floating point conversion operations
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424 (define_insn "sse2_cvtpi2pd"
2425 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2426 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2428 "cvtpi2pd\t{%1, %0|%0, %1}"
2429 [(set_attr "type" "ssecvt")
2430 (set_attr "unit" "mmx,*")
2431 (set_attr "mode" "V2DF")])
2433 (define_insn "sse2_cvtpd2pi"
2434 [(set (match_operand:V2SI 0 "register_operand" "=y")
2435 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2436 UNSPEC_FIX_NOTRUNC))]
2438 "cvtpd2pi\t{%1, %0|%0, %1}"
2439 [(set_attr "type" "ssecvt")
2440 (set_attr "unit" "mmx")
2441 (set_attr "prefix_data16" "1")
2442 (set_attr "mode" "DI")])
2444 (define_insn "sse2_cvttpd2pi"
2445 [(set (match_operand:V2SI 0 "register_operand" "=y")
2446 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2448 "cvttpd2pi\t{%1, %0|%0, %1}"
2449 [(set_attr "type" "ssecvt")
2450 (set_attr "unit" "mmx")
2451 (set_attr "prefix_data16" "1")
2452 (set_attr "mode" "TI")])
2454 (define_insn "*avx_cvtsi2sd"
2455 [(set (match_operand:V2DF 0 "register_operand" "=x")
2458 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2459 (match_operand:V2DF 1 "register_operand" "x")
2462 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2463 [(set_attr "type" "sseicvt")
2464 (set_attr "prefix" "vex")
2465 (set_attr "mode" "DF")])
2467 (define_insn "sse2_cvtsi2sd"
2468 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2471 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2472 (match_operand:V2DF 1 "register_operand" "0,0")
2475 "cvtsi2sd\t{%2, %0|%0, %2}"
2476 [(set_attr "type" "sseicvt")
2477 (set_attr "mode" "DF")
2478 (set_attr "athlon_decode" "double,direct")
2479 (set_attr "amdfam10_decode" "vector,double")])
2481 (define_insn "*avx_cvtsi2sdq"
2482 [(set (match_operand:V2DF 0 "register_operand" "=x")
2485 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2486 (match_operand:V2DF 1 "register_operand" "x")
2488 "TARGET_AVX && TARGET_64BIT"
2489 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "DF")])
2494 (define_insn "sse2_cvtsi2sdq"
2495 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2498 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2499 (match_operand:V2DF 1 "register_operand" "0,0")
2501 "TARGET_SSE2 && TARGET_64BIT"
2502 "cvtsi2sdq\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "mode" "DF")
2505 (set_attr "athlon_decode" "double,direct")
2506 (set_attr "amdfam10_decode" "vector,double")])
2508 (define_insn "sse2_cvtsd2si"
2509 [(set (match_operand:SI 0 "register_operand" "=r,r")
2512 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2513 (parallel [(const_int 0)]))]
2514 UNSPEC_FIX_NOTRUNC))]
2516 "%vcvtsd2si\t{%1, %0|%0, %1}"
2517 [(set_attr "type" "sseicvt")
2518 (set_attr "athlon_decode" "double,vector")
2519 (set_attr "prefix_rep" "1")
2520 (set_attr "prefix" "maybe_vex")
2521 (set_attr "mode" "SI")])
2523 (define_insn "sse2_cvtsd2si_2"
2524 [(set (match_operand:SI 0 "register_operand" "=r,r")
2525 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2526 UNSPEC_FIX_NOTRUNC))]
2528 "%vcvtsd2si\t{%1, %0|%0, %1}"
2529 [(set_attr "type" "sseicvt")
2530 (set_attr "athlon_decode" "double,vector")
2531 (set_attr "amdfam10_decode" "double,double")
2532 (set_attr "prefix_rep" "1")
2533 (set_attr "prefix" "maybe_vex")
2534 (set_attr "mode" "SI")])
2536 (define_insn "sse2_cvtsd2siq"
2537 [(set (match_operand:DI 0 "register_operand" "=r,r")
2540 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2541 (parallel [(const_int 0)]))]
2542 UNSPEC_FIX_NOTRUNC))]
2543 "TARGET_SSE2 && TARGET_64BIT"
2544 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2545 [(set_attr "type" "sseicvt")
2546 (set_attr "athlon_decode" "double,vector")
2547 (set_attr "prefix_rep" "1")
2548 (set_attr "prefix" "maybe_vex")
2549 (set_attr "mode" "DI")])
2551 (define_insn "sse2_cvtsd2siq_2"
2552 [(set (match_operand:DI 0 "register_operand" "=r,r")
2553 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2554 UNSPEC_FIX_NOTRUNC))]
2555 "TARGET_SSE2 && TARGET_64BIT"
2556 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2557 [(set_attr "type" "sseicvt")
2558 (set_attr "athlon_decode" "double,vector")
2559 (set_attr "amdfam10_decode" "double,double")
2560 (set_attr "prefix_rep" "1")
2561 (set_attr "prefix" "maybe_vex")
2562 (set_attr "mode" "DI")])
2564 (define_insn "sse2_cvttsd2si"
2565 [(set (match_operand:SI 0 "register_operand" "=r,r")
2568 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2569 (parallel [(const_int 0)]))))]
2571 "%vcvttsd2si\t{%1, %0|%0, %1}"
2572 [(set_attr "type" "sseicvt")
2573 (set_attr "prefix_rep" "1")
2574 (set_attr "prefix" "maybe_vex")
2575 (set_attr "mode" "SI")
2576 (set_attr "athlon_decode" "double,vector")
2577 (set_attr "amdfam10_decode" "double,double")])
2579 (define_insn "sse2_cvttsd2siq"
2580 [(set (match_operand:DI 0 "register_operand" "=r,r")
2583 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2584 (parallel [(const_int 0)]))))]
2585 "TARGET_SSE2 && TARGET_64BIT"
2586 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2587 [(set_attr "type" "sseicvt")
2588 (set_attr "prefix_rep" "1")
2589 (set_attr "prefix" "maybe_vex")
2590 (set_attr "mode" "DI")
2591 (set_attr "athlon_decode" "double,vector")
2592 (set_attr "amdfam10_decode" "double,double")])
2594 (define_insn "avx_cvtdq2pd256"
2595 [(set (match_operand:V4DF 0 "register_operand" "=x")
2596 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2598 "vcvtdq2pd\t{%1, %0|%0, %1}"
2599 [(set_attr "type" "ssecvt")
2600 (set_attr "prefix" "vex")
2601 (set_attr "mode" "V4DF")])
2603 (define_insn "sse2_cvtdq2pd"
2604 [(set (match_operand:V2DF 0 "register_operand" "=x")
2607 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2608 (parallel [(const_int 0) (const_int 1)]))))]
2610 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2611 [(set_attr "type" "ssecvt")
2612 (set_attr "prefix" "maybe_vex")
2613 (set_attr "mode" "V2DF")])
2615 (define_insn "avx_cvtpd2dq256"
2616 [(set (match_operand:V4SI 0 "register_operand" "=x")
2617 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2618 UNSPEC_FIX_NOTRUNC))]
2620 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2621 [(set_attr "type" "ssecvt")
2622 (set_attr "prefix" "vex")
2623 (set_attr "mode" "OI")])
2625 (define_expand "sse2_cvtpd2dq"
2626 [(set (match_operand:V4SI 0 "register_operand" "")
2628 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2632 "operands[2] = CONST0_RTX (V2SImode);")
2634 (define_insn "*sse2_cvtpd2dq"
2635 [(set (match_operand:V4SI 0 "register_operand" "=x")
2637 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2639 (match_operand:V2SI 2 "const0_operand" "")))]
2641 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2642 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2643 [(set_attr "type" "ssecvt")
2644 (set_attr "prefix_rep" "1")
2645 (set_attr "prefix" "maybe_vex")
2646 (set_attr "mode" "TI")
2647 (set_attr "amdfam10_decode" "double")])
2649 (define_insn "avx_cvttpd2dq256"
2650 [(set (match_operand:V4SI 0 "register_operand" "=x")
2651 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2653 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2654 [(set_attr "type" "ssecvt")
2655 (set_attr "prefix" "vex")
2656 (set_attr "mode" "OI")])
2658 (define_expand "sse2_cvttpd2dq"
2659 [(set (match_operand:V4SI 0 "register_operand" "")
2661 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2664 "operands[2] = CONST0_RTX (V2SImode);")
2666 (define_insn "*sse2_cvttpd2dq"
2667 [(set (match_operand:V4SI 0 "register_operand" "=x")
2669 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2670 (match_operand:V2SI 2 "const0_operand" "")))]
2672 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2673 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2674 [(set_attr "type" "ssecvt")
2675 (set_attr "prefix_rep" "1")
2676 (set_attr "prefix" "maybe_vex")
2677 (set_attr "mode" "TI")
2678 (set_attr "amdfam10_decode" "double")])
2680 (define_insn "*avx_cvtsd2ss"
2681 [(set (match_operand:V4SF 0 "register_operand" "=x")
2684 (float_truncate:V2SF
2685 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2686 (match_operand:V4SF 1 "register_operand" "x")
2689 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2690 [(set_attr "type" "ssecvt")
2691 (set_attr "prefix" "vex")
2692 (set_attr "mode" "SF")])
2694 (define_insn "sse2_cvtsd2ss"
2695 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2698 (float_truncate:V2SF
2699 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2700 (match_operand:V4SF 1 "register_operand" "0,0")
2703 "cvtsd2ss\t{%2, %0|%0, %2}"
2704 [(set_attr "type" "ssecvt")
2705 (set_attr "athlon_decode" "vector,double")
2706 (set_attr "amdfam10_decode" "vector,double")
2707 (set_attr "mode" "SF")])
2709 (define_insn "*avx_cvtss2sd"
2710 [(set (match_operand:V2DF 0 "register_operand" "=x")
2714 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2715 (parallel [(const_int 0) (const_int 1)])))
2716 (match_operand:V2DF 1 "register_operand" "x")
2719 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2720 [(set_attr "type" "ssecvt")
2721 (set_attr "prefix" "vex")
2722 (set_attr "mode" "DF")])
2724 (define_insn "sse2_cvtss2sd"
2725 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2729 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2730 (parallel [(const_int 0) (const_int 1)])))
2731 (match_operand:V2DF 1 "register_operand" "0,0")
2734 "cvtss2sd\t{%2, %0|%0, %2}"
2735 [(set_attr "type" "ssecvt")
2736 (set_attr "amdfam10_decode" "vector,double")
2737 (set_attr "mode" "DF")])
2739 (define_insn "avx_cvtpd2ps256"
2740 [(set (match_operand:V4SF 0 "register_operand" "=x")
2741 (float_truncate:V4SF
2742 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2744 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2745 [(set_attr "type" "ssecvt")
2746 (set_attr "prefix" "vex")
2747 (set_attr "mode" "V4SF")])
2749 (define_expand "sse2_cvtpd2ps"
2750 [(set (match_operand:V4SF 0 "register_operand" "")
2752 (float_truncate:V2SF
2753 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2756 "operands[2] = CONST0_RTX (V2SFmode);")
2758 (define_insn "*sse2_cvtpd2ps"
2759 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V2SF
2762 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2763 (match_operand:V2SF 2 "const0_operand" "")))]
2765 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2766 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2767 [(set_attr "type" "ssecvt")
2768 (set_attr "prefix_data16" "1")
2769 (set_attr "prefix" "maybe_vex")
2770 (set_attr "mode" "V4SF")
2771 (set_attr "amdfam10_decode" "double")])
2773 (define_insn "avx_cvtps2pd256"
2774 [(set (match_operand:V4DF 0 "register_operand" "=x")
2776 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2778 "vcvtps2pd\t{%1, %0|%0, %1}"
2779 [(set_attr "type" "ssecvt")
2780 (set_attr "prefix" "vex")
2781 (set_attr "mode" "V4DF")])
2783 (define_insn "sse2_cvtps2pd"
2784 [(set (match_operand:V2DF 0 "register_operand" "=x")
2787 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2788 (parallel [(const_int 0) (const_int 1)]))))]
2790 "%vcvtps2pd\t{%1, %0|%0, %1}"
2791 [(set_attr "type" "ssecvt")
2792 (set_attr "prefix" "maybe_vex")
2793 (set_attr "mode" "V2DF")
2794 (set_attr "amdfam10_decode" "direct")])
2796 (define_expand "vec_unpacks_hi_v4sf"
2801 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2802 (parallel [(const_int 6)
2806 (set (match_operand:V2DF 0 "register_operand" "")
2810 (parallel [(const_int 0) (const_int 1)]))))]
2813 operands[2] = gen_reg_rtx (V4SFmode);
2816 (define_expand "vec_unpacks_lo_v4sf"
2817 [(set (match_operand:V2DF 0 "register_operand" "")
2820 (match_operand:V4SF 1 "nonimmediate_operand" "")
2821 (parallel [(const_int 0) (const_int 1)]))))]
2824 (define_expand "vec_unpacks_float_hi_v8hi"
2825 [(match_operand:V4SF 0 "register_operand" "")
2826 (match_operand:V8HI 1 "register_operand" "")]
2829 rtx tmp = gen_reg_rtx (V4SImode);
2831 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2832 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2836 (define_expand "vec_unpacks_float_lo_v8hi"
2837 [(match_operand:V4SF 0 "register_operand" "")
2838 (match_operand:V8HI 1 "register_operand" "")]
2841 rtx tmp = gen_reg_rtx (V4SImode);
2843 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2844 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2848 (define_expand "vec_unpacku_float_hi_v8hi"
2849 [(match_operand:V4SF 0 "register_operand" "")
2850 (match_operand:V8HI 1 "register_operand" "")]
2853 rtx tmp = gen_reg_rtx (V4SImode);
2855 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2856 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2860 (define_expand "vec_unpacku_float_lo_v8hi"
2861 [(match_operand:V4SF 0 "register_operand" "")
2862 (match_operand:V8HI 1 "register_operand" "")]
2865 rtx tmp = gen_reg_rtx (V4SImode);
2867 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2868 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2872 (define_expand "vec_unpacks_float_hi_v4si"
2875 (match_operand:V4SI 1 "nonimmediate_operand" "")
2876 (parallel [(const_int 2)
2880 (set (match_operand:V2DF 0 "register_operand" "")
2884 (parallel [(const_int 0) (const_int 1)]))))]
2887 operands[2] = gen_reg_rtx (V4SImode);
2890 (define_expand "vec_unpacks_float_lo_v4si"
2891 [(set (match_operand:V2DF 0 "register_operand" "")
2894 (match_operand:V4SI 1 "nonimmediate_operand" "")
2895 (parallel [(const_int 0) (const_int 1)]))))]
2898 (define_expand "vec_pack_trunc_v2df"
2899 [(match_operand:V4SF 0 "register_operand" "")
2900 (match_operand:V2DF 1 "nonimmediate_operand" "")
2901 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2906 r1 = gen_reg_rtx (V4SFmode);
2907 r2 = gen_reg_rtx (V4SFmode);
2909 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2910 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2911 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2915 (define_expand "vec_pack_sfix_trunc_v2df"
2916 [(match_operand:V4SI 0 "register_operand" "")
2917 (match_operand:V2DF 1 "nonimmediate_operand" "")
2918 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2923 r1 = gen_reg_rtx (V4SImode);
2924 r2 = gen_reg_rtx (V4SImode);
2926 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2927 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2928 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2929 gen_lowpart (V2DImode, r1),
2930 gen_lowpart (V2DImode, r2)));
2934 (define_expand "vec_pack_sfix_v2df"
2935 [(match_operand:V4SI 0 "register_operand" "")
2936 (match_operand:V2DF 1 "nonimmediate_operand" "")
2937 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2942 r1 = gen_reg_rtx (V4SImode);
2943 r2 = gen_reg_rtx (V4SImode);
2945 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2946 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2947 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2948 gen_lowpart (V2DImode, r1),
2949 gen_lowpart (V2DImode, r2)));
2953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2955 ;; Parallel single-precision floating point element swizzling
2957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2959 (define_expand "sse_movhlps_exp"
2960 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2963 (match_operand:V4SF 1 "nonimmediate_operand" "")
2964 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2965 (parallel [(const_int 6)
2970 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2972 (define_insn "*avx_movhlps"
2973 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2976 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
2977 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2978 (parallel [(const_int 6)
2982 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2984 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2985 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2986 vmovhps\t{%2, %0|%0, %2}"
2987 [(set_attr "type" "ssemov")
2988 (set_attr "prefix" "vex")
2989 (set_attr "mode" "V4SF,V2SF,V2SF")])
2991 (define_insn "sse_movhlps"
2992 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2995 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2996 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2997 (parallel [(const_int 6)
3001 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3003 movhlps\t{%2, %0|%0, %2}
3004 movlps\t{%H2, %0|%0, %H2}
3005 movhps\t{%2, %0|%0, %2}"
3006 [(set_attr "type" "ssemov")
3007 (set_attr "mode" "V4SF,V2SF,V2SF")])
3009 (define_expand "sse_movlhps_exp"
3010 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3013 (match_operand:V4SF 1 "nonimmediate_operand" "")
3014 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3015 (parallel [(const_int 0)
3020 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3022 (define_insn "*avx_movlhps"
3023 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3026 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3027 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3028 (parallel [(const_int 0)
3032 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3034 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3035 vmovhps\t{%2, %1, %0|%0, %1, %2}
3036 vmovlps\t{%2, %H0|%H0, %2}"
3037 [(set_attr "type" "ssemov")
3038 (set_attr "prefix" "vex")
3039 (set_attr "mode" "V4SF,V2SF,V2SF")])
3041 (define_insn "sse_movlhps"
3042 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3045 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3046 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3047 (parallel [(const_int 0)
3051 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3053 movlhps\t{%2, %0|%0, %2}
3054 movhps\t{%2, %0|%0, %2}
3055 movlps\t{%2, %H0|%H0, %2}"
3056 [(set_attr "type" "ssemov")
3057 (set_attr "mode" "V4SF,V2SF,V2SF")])
3059 (define_insn "avx_unpckhps256"
3060 [(set (match_operand:V8SF 0 "register_operand" "=x")
3063 (match_operand:V8SF 1 "register_operand" "x")
3064 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3065 (parallel [(const_int 2) (const_int 10)
3066 (const_int 3) (const_int 11)
3067 (const_int 6) (const_int 14)
3068 (const_int 7) (const_int 15)])))]
3070 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3071 [(set_attr "type" "sselog")
3072 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V8SF")])
3075 (define_insn "*avx_unpckhps"
3076 [(set (match_operand:V4SF 0 "register_operand" "=x")
3079 (match_operand:V4SF 1 "register_operand" "x")
3080 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3081 (parallel [(const_int 2) (const_int 6)
3082 (const_int 3) (const_int 7)])))]
3084 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3085 [(set_attr "type" "sselog")
3086 (set_attr "prefix" "vex")
3087 (set_attr "mode" "V4SF")])
3089 (define_insn "sse_unpckhps"
3090 [(set (match_operand:V4SF 0 "register_operand" "=x")
3093 (match_operand:V4SF 1 "register_operand" "0")
3094 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3095 (parallel [(const_int 2) (const_int 6)
3096 (const_int 3) (const_int 7)])))]
3098 "unpckhps\t{%2, %0|%0, %2}"
3099 [(set_attr "type" "sselog")
3100 (set_attr "mode" "V4SF")])
3102 (define_insn "avx_unpcklps256"
3103 [(set (match_operand:V8SF 0 "register_operand" "=x")
3106 (match_operand:V8SF 1 "register_operand" "x")
3107 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3108 (parallel [(const_int 0) (const_int 8)
3109 (const_int 1) (const_int 9)
3110 (const_int 4) (const_int 12)
3111 (const_int 5) (const_int 13)])))]
3113 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3114 [(set_attr "type" "sselog")
3115 (set_attr "prefix" "vex")
3116 (set_attr "mode" "V8SF")])
3118 (define_insn "*avx_unpcklps"
3119 [(set (match_operand:V4SF 0 "register_operand" "=x")
3122 (match_operand:V4SF 1 "register_operand" "x")
3123 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3124 (parallel [(const_int 0) (const_int 4)
3125 (const_int 1) (const_int 5)])))]
3127 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3128 [(set_attr "type" "sselog")
3129 (set_attr "prefix" "vex")
3130 (set_attr "mode" "V4SF")])
3132 (define_insn "sse_unpcklps"
3133 [(set (match_operand:V4SF 0 "register_operand" "=x")
3136 (match_operand:V4SF 1 "register_operand" "0")
3137 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3138 (parallel [(const_int 0) (const_int 4)
3139 (const_int 1) (const_int 5)])))]
3141 "unpcklps\t{%2, %0|%0, %2}"
3142 [(set_attr "type" "sselog")
3143 (set_attr "mode" "V4SF")])
3145 ;; These are modeled with the same vec_concat as the others so that we
3146 ;; capture users of shufps that can use the new instructions
3147 (define_insn "avx_movshdup256"
3148 [(set (match_operand:V8SF 0 "register_operand" "=x")
3151 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3153 (parallel [(const_int 1) (const_int 1)
3154 (const_int 3) (const_int 3)
3155 (const_int 5) (const_int 5)
3156 (const_int 7) (const_int 7)])))]
3158 "vmovshdup\t{%1, %0|%0, %1}"
3159 [(set_attr "type" "sse")
3160 (set_attr "prefix" "vex")
3161 (set_attr "mode" "V8SF")])
3163 (define_insn "sse3_movshdup"
3164 [(set (match_operand:V4SF 0 "register_operand" "=x")
3167 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3169 (parallel [(const_int 1)
3174 "%vmovshdup\t{%1, %0|%0, %1}"
3175 [(set_attr "type" "sse")
3176 (set_attr "prefix_rep" "1")
3177 (set_attr "prefix" "maybe_vex")
3178 (set_attr "mode" "V4SF")])
3180 (define_insn "avx_movsldup256"
3181 [(set (match_operand:V8SF 0 "register_operand" "=x")
3184 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3186 (parallel [(const_int 0) (const_int 0)
3187 (const_int 2) (const_int 2)
3188 (const_int 4) (const_int 4)
3189 (const_int 6) (const_int 6)])))]
3191 "vmovsldup\t{%1, %0|%0, %1}"
3192 [(set_attr "type" "sse")
3193 (set_attr "prefix" "vex")
3194 (set_attr "mode" "V8SF")])
3196 (define_insn "sse3_movsldup"
3197 [(set (match_operand:V4SF 0 "register_operand" "=x")
3200 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3202 (parallel [(const_int 0)
3207 "%vmovsldup\t{%1, %0|%0, %1}"
3208 [(set_attr "type" "sse")
3209 (set_attr "prefix_rep" "1")
3210 (set_attr "prefix" "maybe_vex")
3211 (set_attr "mode" "V4SF")])
3213 (define_expand "avx_shufps256"
3214 [(match_operand:V8SF 0 "register_operand" "")
3215 (match_operand:V8SF 1 "register_operand" "")
3216 (match_operand:V8SF 2 "nonimmediate_operand" "")
3217 (match_operand:SI 3 "const_int_operand" "")]
3220 int mask = INTVAL (operands[3]);
3221 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3222 GEN_INT ((mask >> 0) & 3),
3223 GEN_INT ((mask >> 2) & 3),
3224 GEN_INT (((mask >> 4) & 3) + 8),
3225 GEN_INT (((mask >> 6) & 3) + 8),
3226 GEN_INT (((mask >> 0) & 3) + 4),
3227 GEN_INT (((mask >> 2) & 3) + 4),
3228 GEN_INT (((mask >> 4) & 3) + 12),
3229 GEN_INT (((mask >> 6) & 3) + 12)));
3233 ;; One bit in mask selects 2 elements.
3234 (define_insn "avx_shufps256_1"
3235 [(set (match_operand:V8SF 0 "register_operand" "=x")
3238 (match_operand:V8SF 1 "register_operand" "x")
3239 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3240 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3241 (match_operand 4 "const_0_to_3_operand" "")
3242 (match_operand 5 "const_8_to_11_operand" "")
3243 (match_operand 6 "const_8_to_11_operand" "")
3244 (match_operand 7 "const_4_to_7_operand" "")
3245 (match_operand 8 "const_4_to_7_operand" "")
3246 (match_operand 9 "const_12_to_15_operand" "")
3247 (match_operand 10 "const_12_to_15_operand" "")])))]
3249 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3250 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3251 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3252 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3255 mask = INTVAL (operands[3]);
3256 mask |= INTVAL (operands[4]) << 2;
3257 mask |= (INTVAL (operands[5]) - 8) << 4;
3258 mask |= (INTVAL (operands[6]) - 8) << 6;
3259 operands[3] = GEN_INT (mask);
3261 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3263 [(set_attr "type" "sselog")
3264 (set_attr "prefix" "vex")
3265 (set_attr "mode" "V8SF")])
3267 (define_expand "sse_shufps"
3268 [(match_operand:V4SF 0 "register_operand" "")
3269 (match_operand:V4SF 1 "register_operand" "")
3270 (match_operand:V4SF 2 "nonimmediate_operand" "")
3271 (match_operand:SI 3 "const_int_operand" "")]
3274 int mask = INTVAL (operands[3]);
3275 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3276 GEN_INT ((mask >> 0) & 3),
3277 GEN_INT ((mask >> 2) & 3),
3278 GEN_INT (((mask >> 4) & 3) + 4),
3279 GEN_INT (((mask >> 6) & 3) + 4)));
3283 (define_insn "*avx_shufps_<mode>"
3284 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3285 (vec_select:SSEMODE4S
3286 (vec_concat:<ssedoublesizemode>
3287 (match_operand:SSEMODE4S 1 "register_operand" "x")
3288 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3289 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3290 (match_operand 4 "const_0_to_3_operand" "")
3291 (match_operand 5 "const_4_to_7_operand" "")
3292 (match_operand 6 "const_4_to_7_operand" "")])))]
3296 mask |= INTVAL (operands[3]) << 0;
3297 mask |= INTVAL (operands[4]) << 2;
3298 mask |= (INTVAL (operands[5]) - 4) << 4;
3299 mask |= (INTVAL (operands[6]) - 4) << 6;
3300 operands[3] = GEN_INT (mask);
3302 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3304 [(set_attr "type" "sselog")
3305 (set_attr "prefix" "vex")
3306 (set_attr "mode" "V4SF")])
3308 (define_insn "sse_shufps_<mode>"
3309 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3310 (vec_select:SSEMODE4S
3311 (vec_concat:<ssedoublesizemode>
3312 (match_operand:SSEMODE4S 1 "register_operand" "0")
3313 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3314 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3315 (match_operand 4 "const_0_to_3_operand" "")
3316 (match_operand 5 "const_4_to_7_operand" "")
3317 (match_operand 6 "const_4_to_7_operand" "")])))]
3321 mask |= INTVAL (operands[3]) << 0;
3322 mask |= INTVAL (operands[4]) << 2;
3323 mask |= (INTVAL (operands[5]) - 4) << 4;
3324 mask |= (INTVAL (operands[6]) - 4) << 6;
3325 operands[3] = GEN_INT (mask);
3327 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3329 [(set_attr "type" "sselog")
3330 (set_attr "mode" "V4SF")])
3332 (define_insn "sse_storehps"
3333 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3335 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3336 (parallel [(const_int 2) (const_int 3)])))]
3339 %vmovhps\t{%1, %0|%0, %1}
3340 %vmovhlps\t{%1, %d0|%d0, %1}
3341 %vmovlps\t{%H1, %d0|%d0, %H1}"
3342 [(set_attr "type" "ssemov")
3343 (set_attr "prefix" "maybe_vex")
3344 (set_attr "mode" "V2SF,V4SF,V2SF")])
3346 (define_expand "sse_loadhps_exp"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3350 (match_operand:V4SF 1 "nonimmediate_operand" "")
3351 (parallel [(const_int 0) (const_int 1)]))
3352 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3354 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3356 (define_insn "*avx_loadhps"
3357 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3360 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3361 (parallel [(const_int 0) (const_int 1)]))
3362 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3365 vmovhps\t{%2, %1, %0|%0, %1, %2}
3366 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3367 vmovlps\t{%2, %H0|%H0, %2}"
3368 [(set_attr "type" "ssemov")
3369 (set_attr "prefix" "vex")
3370 (set_attr "mode" "V2SF,V4SF,V2SF")])
3372 (define_insn "sse_loadhps"
3373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3376 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3377 (parallel [(const_int 0) (const_int 1)]))
3378 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3381 movhps\t{%2, %0|%0, %2}
3382 movlhps\t{%2, %0|%0, %2}
3383 movlps\t{%2, %H0|%H0, %2}"
3384 [(set_attr "type" "ssemov")
3385 (set_attr "mode" "V2SF,V4SF,V2SF")])
3387 (define_insn "*avx_storelps"
3388 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3390 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3391 (parallel [(const_int 0) (const_int 1)])))]
3394 vmovlps\t{%1, %0|%0, %1}
3395 vmovaps\t{%1, %0|%0, %1}
3396 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3397 [(set_attr "type" "ssemov")
3398 (set_attr "prefix" "vex")
3399 (set_attr "mode" "V2SF,V2DF,V2SF")])
3401 (define_insn "sse_storelps"
3402 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3404 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3405 (parallel [(const_int 0) (const_int 1)])))]
3408 movlps\t{%1, %0|%0, %1}
3409 movaps\t{%1, %0|%0, %1}
3410 movlps\t{%1, %0|%0, %1}"
3411 [(set_attr "type" "ssemov")
3412 (set_attr "mode" "V2SF,V4SF,V2SF")])
3414 (define_expand "sse_loadlps_exp"
3415 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3417 (match_operand:V2SF 2 "nonimmediate_operand" "")
3419 (match_operand:V4SF 1 "nonimmediate_operand" "")
3420 (parallel [(const_int 2) (const_int 3)]))))]
3422 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3424 (define_insn "*avx_loadlps"
3425 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3427 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3429 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3430 (parallel [(const_int 2) (const_int 3)]))))]
3433 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3434 vmovlps\t{%2, %1, %0|%0, %1, %2}
3435 vmovlps\t{%2, %0|%0, %2}"
3436 [(set_attr "type" "sselog,ssemov,ssemov")
3437 (set_attr "prefix" "vex")
3438 (set_attr "mode" "V4SF,V2SF,V2SF")])
3440 (define_insn "sse_loadlps"
3441 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3443 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3445 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3446 (parallel [(const_int 2) (const_int 3)]))))]
3449 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3450 movlps\t{%2, %0|%0, %2}
3451 movlps\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sselog,ssemov,ssemov")
3453 (set_attr "mode" "V4SF,V2SF,V2SF")])
3455 (define_insn "*avx_movss"
3456 [(set (match_operand:V4SF 0 "register_operand" "=x")
3458 (match_operand:V4SF 2 "register_operand" "x")
3459 (match_operand:V4SF 1 "register_operand" "x")
3462 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3463 [(set_attr "type" "ssemov")
3464 (set_attr "prefix" "vex")
3465 (set_attr "mode" "SF")])
3467 (define_insn "sse_movss"
3468 [(set (match_operand:V4SF 0 "register_operand" "=x")
3470 (match_operand:V4SF 2 "register_operand" "x")
3471 (match_operand:V4SF 1 "register_operand" "0")
3474 "movss\t{%2, %0|%0, %2}"
3475 [(set_attr "type" "ssemov")
3476 (set_attr "mode" "SF")])
3478 (define_insn "*vec_dupv4sf_avx"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x")
3481 (match_operand:SF 1 "register_operand" "x")))]
3483 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3484 [(set_attr "type" "sselog1")
3485 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")])
3488 (define_insn "*vec_dupv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x")
3491 (match_operand:SF 1 "register_operand" "0")))]
3493 "shufps\t{$0, %0, %0|%0, %0, 0}"
3494 [(set_attr "type" "sselog1")
3495 (set_attr "mode" "V4SF")])
3497 (define_insn "*vec_concatv2sf_avx"
3498 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3500 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3501 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3504 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3505 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3506 vmovss\t{%1, %0|%0, %1}
3507 punpckldq\t{%2, %0|%0, %2}
3508 movd\t{%1, %0|%0, %1}"
3509 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3510 (set (attr "prefix")
3511 (if_then_else (eq_attr "alternative" "3,4")
3512 (const_string "orig")
3513 (const_string "vex")))
3514 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3516 ;; Although insertps takes register source, we prefer
3517 ;; unpcklps with register source since it is shorter.
3518 (define_insn "*vec_concatv2sf_sse4_1"
3519 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3521 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3522 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3525 unpcklps\t{%2, %0|%0, %2}
3526 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3527 movss\t{%1, %0|%0, %1}
3528 punpckldq\t{%2, %0|%0, %2}
3529 movd\t{%1, %0|%0, %1}"
3530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3531 (set_attr "prefix_extra" "*,1,*,*,*")
3532 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3534 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3535 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3536 ;; alternatives pretty much forces the MMX alternative to be chosen.
3537 (define_insn "*vec_concatv2sf_sse"
3538 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3540 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3541 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3544 unpcklps\t{%2, %0|%0, %2}
3545 movss\t{%1, %0|%0, %1}
3546 punpckldq\t{%2, %0|%0, %2}
3547 movd\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3549 (set_attr "mode" "V4SF,SF,DI,DI")])
3551 (define_insn "*vec_concatv4sf_avx"
3552 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3554 (match_operand:V2SF 1 "register_operand" " x,x")
3555 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3558 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3559 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3560 [(set_attr "type" "ssemov")
3561 (set_attr "prefix" "vex")
3562 (set_attr "mode" "V4SF,V2SF")])
3564 (define_insn "*vec_concatv4sf_sse"
3565 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3567 (match_operand:V2SF 1 "register_operand" " 0,0")
3568 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3571 movlhps\t{%2, %0|%0, %2}
3572 movhps\t{%2, %0|%0, %2}"
3573 [(set_attr "type" "ssemov")
3574 (set_attr "mode" "V4SF,V2SF")])
3576 (define_expand "vec_init<mode>"
3577 [(match_operand:SSEMODE 0 "register_operand" "")
3578 (match_operand 1 "" "")]
3581 ix86_expand_vector_init (false, operands[0], operands[1]);
3585 (define_insn "*vec_setv4sf_0_avx"
3586 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3589 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3590 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3594 vmovss\t{%2, %1, %0|%0, %1, %2}
3595 vmovss\t{%2, %0|%0, %2}
3596 vmovd\t{%2, %0|%0, %2}
3598 [(set_attr "type" "ssemov")
3599 (set_attr "prefix" "vex")
3600 (set_attr "mode" "SF")])
3602 (define_insn "vec_setv4sf_0"
3603 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3606 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3607 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3611 movss\t{%2, %0|%0, %2}
3612 movss\t{%2, %0|%0, %2}
3613 movd\t{%2, %0|%0, %2}
3615 [(set_attr "type" "ssemov")
3616 (set_attr "mode" "SF")])
3618 ;; A subset is vec_setv4sf.
3619 (define_insn "*vec_setv4sf_avx"
3620 [(set (match_operand:V4SF 0 "register_operand" "=x")
3623 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3624 (match_operand:V4SF 1 "register_operand" "x")
3625 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3628 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3629 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3631 [(set_attr "type" "sselog")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V4SF")])
3635 (define_insn "*vec_setv4sf_sse4_1"
3636 [(set (match_operand:V4SF 0 "register_operand" "=x")
3639 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3640 (match_operand:V4SF 1 "register_operand" "0")
3641 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3644 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3645 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3647 [(set_attr "type" "sselog")
3648 (set_attr "prefix_extra" "1")
3649 (set_attr "mode" "V4SF")])
3651 (define_insn "*avx_insertps"
3652 [(set (match_operand:V4SF 0 "register_operand" "=x")
3653 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3654 (match_operand:V4SF 1 "register_operand" "x")
3655 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3659 if (MEM_P (operands[2]))
3661 unsigned count_s = INTVAL (operands[3]) >> 6;
3663 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3664 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3666 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3668 [(set_attr "type" "sselog")
3669 (set_attr "prefix" "vex")
3670 (set_attr "mode" "V4SF")])
3672 (define_insn "sse4_1_insertps"
3673 [(set (match_operand:V4SF 0 "register_operand" "=x")
3674 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3675 (match_operand:V4SF 1 "register_operand" "0")
3676 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3680 if (MEM_P (operands[2]))
3682 unsigned count_s = INTVAL (operands[3]) >> 6;
3684 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3685 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3687 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3689 [(set_attr "type" "sselog")
3690 (set_attr "prefix_extra" "1")
3691 (set_attr "mode" "V4SF")])
3694 [(set (match_operand:V4SF 0 "memory_operand" "")
3697 (match_operand:SF 1 "nonmemory_operand" ""))
3700 "TARGET_SSE && reload_completed"
3703 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3707 (define_expand "vec_set<mode>"
3708 [(match_operand:SSEMODE 0 "register_operand" "")
3709 (match_operand:<ssescalarmode> 1 "register_operand" "")
3710 (match_operand 2 "const_int_operand" "")]
3713 ix86_expand_vector_set (false, operands[0], operands[1],
3714 INTVAL (operands[2]));
3718 (define_insn_and_split "*vec_extractv4sf_0"
3719 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3721 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3722 (parallel [(const_int 0)])))]
3723 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3725 "&& reload_completed"
3728 rtx op1 = operands[1];
3730 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3732 op1 = gen_lowpart (SFmode, op1);
3733 emit_move_insn (operands[0], op1);
3737 (define_expand "avx_vextractf128<mode>"
3738 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3739 (match_operand:AVX256MODE 1 "register_operand" "")
3740 (match_operand:SI 2 "const_0_to_1_operand" "")]
3743 switch (INTVAL (operands[2]))
3746 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3749 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3757 (define_insn "vec_extract_lo_<mode>"
3758 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3759 (vec_select:<avxhalfvecmode>
3760 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3761 (parallel [(const_int 0) (const_int 1)])))]
3763 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3764 [(set_attr "type" "sselog")
3765 (set_attr "memory" "none,store")
3766 (set_attr "prefix" "vex")
3767 (set_attr "mode" "V8SF")])
3769 (define_insn "vec_extract_hi_<mode>"
3770 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3771 (vec_select:<avxhalfvecmode>
3772 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3773 (parallel [(const_int 2) (const_int 3)])))]
3775 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3776 [(set_attr "type" "sselog")
3777 (set_attr "memory" "none,store")
3778 (set_attr "prefix" "vex")
3779 (set_attr "mode" "V8SF")])
3781 (define_insn "vec_extract_lo_<mode>"
3782 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3783 (vec_select:<avxhalfvecmode>
3784 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3785 (parallel [(const_int 0) (const_int 1)
3786 (const_int 2) (const_int 3)])))]
3788 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3789 [(set_attr "type" "sselog")
3790 (set_attr "memory" "none,store")
3791 (set_attr "prefix" "vex")
3792 (set_attr "mode" "V8SF")])
3794 (define_insn "vec_extract_hi_<mode>"
3795 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3796 (vec_select:<avxhalfvecmode>
3797 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3798 (parallel [(const_int 4) (const_int 5)
3799 (const_int 6) (const_int 7)])))]
3801 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3802 [(set_attr "type" "sselog")
3803 (set_attr "memory" "none,store")
3804 (set_attr "prefix" "vex")
3805 (set_attr "mode" "V8SF")])
3807 (define_insn "vec_extract_lo_v16hi"
3808 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3810 (match_operand:V16HI 1 "register_operand" "x,x")
3811 (parallel [(const_int 0) (const_int 1)
3812 (const_int 2) (const_int 3)
3813 (const_int 4) (const_int 5)
3814 (const_int 6) (const_int 7)])))]
3816 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3817 [(set_attr "type" "sselog")
3818 (set_attr "memory" "none,store")
3819 (set_attr "prefix" "vex")
3820 (set_attr "mode" "V8SF")])
3822 (define_insn "vec_extract_hi_v16hi"
3823 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3825 (match_operand:V16HI 1 "register_operand" "x,x")
3826 (parallel [(const_int 8) (const_int 9)
3827 (const_int 10) (const_int 11)
3828 (const_int 12) (const_int 13)
3829 (const_int 14) (const_int 15)])))]
3831 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3832 [(set_attr "type" "sselog")
3833 (set_attr "memory" "none,store")
3834 (set_attr "prefix" "vex")
3835 (set_attr "mode" "V8SF")])
3837 (define_insn "vec_extract_lo_v32qi"
3838 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3840 (match_operand:V32QI 1 "register_operand" "x,x")
3841 (parallel [(const_int 0) (const_int 1)
3842 (const_int 2) (const_int 3)
3843 (const_int 4) (const_int 5)
3844 (const_int 6) (const_int 7)
3845 (const_int 8) (const_int 9)
3846 (const_int 10) (const_int 11)
3847 (const_int 12) (const_int 13)
3848 (const_int 14) (const_int 15)])))]
3850 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3851 [(set_attr "type" "sselog")
3852 (set_attr "memory" "none,store")
3853 (set_attr "prefix" "vex")
3854 (set_attr "mode" "V8SF")])
3856 (define_insn "vec_extract_hi_v32qi"
3857 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3859 (match_operand:V32QI 1 "register_operand" "x,x")
3860 (parallel [(const_int 16) (const_int 17)
3861 (const_int 18) (const_int 19)
3862 (const_int 20) (const_int 21)
3863 (const_int 22) (const_int 23)
3864 (const_int 24) (const_int 25)
3865 (const_int 26) (const_int 27)
3866 (const_int 28) (const_int 29)
3867 (const_int 30) (const_int 31)])))]
3869 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3870 [(set_attr "type" "sselog")
3871 (set_attr "memory" "none,store")
3872 (set_attr "prefix" "vex")
3873 (set_attr "mode" "V8SF")])
3875 (define_insn "*sse4_1_extractps"
3876 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3878 (match_operand:V4SF 1 "register_operand" "x")
3879 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3881 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3882 [(set_attr "type" "sselog")
3883 (set_attr "prefix_extra" "1")
3884 (set_attr "prefix" "maybe_vex")
3885 (set_attr "mode" "V4SF")])
3887 (define_insn_and_split "*vec_extract_v4sf_mem"
3888 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3890 (match_operand:V4SF 1 "memory_operand" "o")
3891 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3897 int i = INTVAL (operands[2]);
3899 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3903 (define_expand "vec_extract<mode>"
3904 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3905 (match_operand:SSEMODE 1 "register_operand" "")
3906 (match_operand 2 "const_int_operand" "")]
3909 ix86_expand_vector_extract (false, operands[0], operands[1],
3910 INTVAL (operands[2]));
3914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3916 ;; Parallel double-precision floating point element swizzling
3918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3920 (define_insn "avx_unpckhpd256"
3921 [(set (match_operand:V4DF 0 "register_operand" "=x")
3924 (match_operand:V4DF 1 "register_operand" "x")
3925 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3926 (parallel [(const_int 1) (const_int 5)
3927 (const_int 3) (const_int 7)])))]
3929 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3930 [(set_attr "type" "sselog")
3931 (set_attr "prefix" "vex")
3932 (set_attr "mode" "V4DF")])
3934 (define_expand "sse2_unpckhpd_exp"
3935 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
3938 (match_operand:V2DF 1 "nonimmediate_operand" "")
3939 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3940 (parallel [(const_int 1)
3943 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
3945 (define_insn "*avx_unpckhpd"
3946 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3949 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
3950 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
3951 (parallel [(const_int 1)
3953 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3955 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3956 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3957 vmovhpd\t{%1, %0|%0, %1}"
3958 [(set_attr "type" "sselog,ssemov,ssemov")
3959 (set_attr "prefix" "vex")
3960 (set_attr "mode" "V2DF,V1DF,V1DF")])
3962 (define_insn "sse2_unpckhpd"
3963 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3966 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3967 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3968 (parallel [(const_int 1)
3970 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3972 unpckhpd\t{%2, %0|%0, %2}
3973 movlpd\t{%H1, %0|%0, %H1}
3974 movhpd\t{%1, %0|%0, %1}"
3975 [(set_attr "type" "sselog,ssemov,ssemov")
3976 (set_attr "mode" "V2DF,V1DF,V1DF")])
3978 (define_insn "avx_movddup256"
3979 [(set (match_operand:V4DF 0 "register_operand" "=x")
3982 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
3984 (parallel [(const_int 0) (const_int 2)
3985 (const_int 4) (const_int 6)])))]
3987 "vmovddup\t{%1, %0|%0, %1}"
3988 [(set_attr "type" "sselog1")
3989 (set_attr "prefix" "vex")
3990 (set_attr "mode" "V4DF")])
3992 (define_insn "*avx_movddup"
3993 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3996 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3998 (parallel [(const_int 0)
4000 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4002 vmovddup\t{%1, %0|%0, %1}
4004 [(set_attr "type" "sselog1,ssemov")
4005 (set_attr "prefix" "vex")
4006 (set_attr "mode" "V2DF")])
4008 (define_insn "*sse3_movddup"
4009 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4012 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4014 (parallel [(const_int 0)
4016 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4018 movddup\t{%1, %0|%0, %1}
4020 [(set_attr "type" "sselog1,ssemov")
4021 (set_attr "mode" "V2DF")])
4024 [(set (match_operand:V2DF 0 "memory_operand" "")
4027 (match_operand:V2DF 1 "register_operand" "")
4029 (parallel [(const_int 0)
4031 "TARGET_SSE3 && reload_completed"
4034 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4035 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4036 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4040 (define_insn "avx_unpcklpd256"
4041 [(set (match_operand:V4DF 0 "register_operand" "=x")
4044 (match_operand:V4DF 1 "register_operand" "x")
4045 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4046 (parallel [(const_int 0) (const_int 4)
4047 (const_int 2) (const_int 6)])))]
4049 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4050 [(set_attr "type" "sselog")
4051 (set_attr "prefix" "vex")
4052 (set_attr "mode" "V4DF")])
4054 (define_expand "sse2_unpcklpd_exp"
4055 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4058 (match_operand:V2DF 1 "nonimmediate_operand" "")
4059 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4060 (parallel [(const_int 0)
4063 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4065 (define_insn "*avx_unpcklpd"
4066 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4069 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4070 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4071 (parallel [(const_int 0)
4073 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4075 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4076 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4077 vmovlpd\t{%2, %H0|%H0, %2}"
4078 [(set_attr "type" "sselog,ssemov,ssemov")
4079 (set_attr "prefix" "vex")
4080 (set_attr "mode" "V2DF,V1DF,V1DF")])
4082 (define_insn "sse2_unpcklpd"
4083 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4086 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4087 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4088 (parallel [(const_int 0)
4090 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4092 unpcklpd\t{%2, %0|%0, %2}
4093 movhpd\t{%2, %0|%0, %2}
4094 movlpd\t{%2, %H0|%H0, %2}"
4095 [(set_attr "type" "sselog,ssemov,ssemov")
4096 (set_attr "mode" "V2DF,V1DF,V1DF")])
4098 (define_expand "avx_shufpd256"
4099 [(match_operand:V4DF 0 "register_operand" "")
4100 (match_operand:V4DF 1 "register_operand" "")
4101 (match_operand:V4DF 2 "nonimmediate_operand" "")
4102 (match_operand:SI 3 "const_int_operand" "")]
4105 int mask = INTVAL (operands[3]);
4106 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4108 GEN_INT (mask & 2 ? 5 : 4),
4109 GEN_INT (mask & 4 ? 3 : 2),
4110 GEN_INT (mask & 8 ? 7 : 6)));
4114 (define_insn "avx_shufpd256_1"
4115 [(set (match_operand:V4DF 0 "register_operand" "=x")
4118 (match_operand:V4DF 1 "register_operand" "x")
4119 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4120 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4121 (match_operand 4 "const_4_to_5_operand" "")
4122 (match_operand 5 "const_2_to_3_operand" "")
4123 (match_operand 6 "const_6_to_7_operand" "")])))]
4127 mask = INTVAL (operands[3]);
4128 mask |= (INTVAL (operands[4]) - 4) << 1;
4129 mask |= (INTVAL (operands[5]) - 2) << 2;
4130 mask |= (INTVAL (operands[6]) - 6) << 3;
4131 operands[3] = GEN_INT (mask);
4133 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4135 [(set_attr "type" "sselog")
4136 (set_attr "prefix" "vex")
4137 (set_attr "mode" "V4DF")])
4139 (define_expand "sse2_shufpd"
4140 [(match_operand:V2DF 0 "register_operand" "")
4141 (match_operand:V2DF 1 "register_operand" "")
4142 (match_operand:V2DF 2 "nonimmediate_operand" "")
4143 (match_operand:SI 3 "const_int_operand" "")]
4146 int mask = INTVAL (operands[3]);
4147 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4149 GEN_INT (mask & 2 ? 3 : 2)));
4153 (define_expand "vec_extract_even<mode>"
4154 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4155 (vec_select:SSEMODE4S
4156 (vec_concat:<ssedoublesizemode>
4157 (match_operand:SSEMODE4S 1 "register_operand" "")
4158 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4159 (parallel [(const_int 0)
4165 (define_expand "vec_extract_odd<mode>"
4166 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4167 (vec_select:SSEMODE4S
4168 (vec_concat:<ssedoublesizemode>
4169 (match_operand:SSEMODE4S 1 "register_operand" "")
4170 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4171 (parallel [(const_int 1)
4177 (define_expand "vec_extract_even<mode>"
4178 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4179 (vec_select:SSEMODE2D
4180 (vec_concat:<ssedoublesizemode>
4181 (match_operand:SSEMODE2D 1 "register_operand" "")
4182 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4183 (parallel [(const_int 0)
4187 (define_expand "vec_extract_odd<mode>"
4188 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4189 (vec_select:SSEMODE2D
4190 (vec_concat:<ssedoublesizemode>
4191 (match_operand:SSEMODE2D 1 "register_operand" "")
4192 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4193 (parallel [(const_int 1)
4197 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4198 (define_insn "*avx_punpckhqdq"
4199 [(set (match_operand:V2DI 0 "register_operand" "=x")
4202 (match_operand:V2DI 1 "register_operand" "x")
4203 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4204 (parallel [(const_int 1)
4207 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4208 [(set_attr "type" "sselog")
4209 (set_attr "prefix" "vex")
4210 (set_attr "mode" "TI")])
4212 (define_insn "sse2_punpckhqdq"
4213 [(set (match_operand:V2DI 0 "register_operand" "=x")
4216 (match_operand:V2DI 1 "register_operand" "0")
4217 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4218 (parallel [(const_int 1)
4221 "punpckhqdq\t{%2, %0|%0, %2}"
4222 [(set_attr "type" "sselog")
4223 (set_attr "prefix_data16" "1")
4224 (set_attr "mode" "TI")])
4226 (define_insn "*avx_punpcklqdq"
4227 [(set (match_operand:V2DI 0 "register_operand" "=x")
4230 (match_operand:V2DI 1 "register_operand" "x")
4231 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4232 (parallel [(const_int 0)
4235 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4236 [(set_attr "type" "sselog")
4237 (set_attr "prefix" "vex")
4238 (set_attr "mode" "TI")])
4240 (define_insn "sse2_punpcklqdq"
4241 [(set (match_operand:V2DI 0 "register_operand" "=x")
4244 (match_operand:V2DI 1 "register_operand" "0")
4245 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4246 (parallel [(const_int 0)
4249 "punpcklqdq\t{%2, %0|%0, %2}"
4250 [(set_attr "type" "sselog")
4251 (set_attr "prefix_data16" "1")
4252 (set_attr "mode" "TI")])
4254 (define_insn "*avx_shufpd_<mode>"
4255 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4256 (vec_select:SSEMODE2D
4257 (vec_concat:<ssedoublesizemode>
4258 (match_operand:SSEMODE2D 1 "register_operand" "x")
4259 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4260 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4261 (match_operand 4 "const_2_to_3_operand" "")])))]
4265 mask = INTVAL (operands[3]);
4266 mask |= (INTVAL (operands[4]) - 2) << 1;
4267 operands[3] = GEN_INT (mask);
4269 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4271 [(set_attr "type" "sselog")
4272 (set_attr "prefix" "vex")
4273 (set_attr "mode" "V2DF")])
4275 (define_insn "sse2_shufpd_<mode>"
4276 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4277 (vec_select:SSEMODE2D
4278 (vec_concat:<ssedoublesizemode>
4279 (match_operand:SSEMODE2D 1 "register_operand" "0")
4280 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4281 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4282 (match_operand 4 "const_2_to_3_operand" "")])))]
4286 mask = INTVAL (operands[3]);
4287 mask |= (INTVAL (operands[4]) - 2) << 1;
4288 operands[3] = GEN_INT (mask);
4290 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4292 [(set_attr "type" "sselog")
4293 (set_attr "mode" "V2DF")])
4295 ;; Avoid combining registers from different units in a single alternative,
4296 ;; see comment above inline_secondary_memory_needed function in i386.c
4297 (define_insn "*avx_storehpd"
4298 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4300 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4301 (parallel [(const_int 1)])))]
4302 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4304 vmovhpd\t{%1, %0|%0, %1}
4305 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4309 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4310 (set_attr "prefix" "vex")
4311 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4313 (define_insn "sse2_storehpd"
4314 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4316 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4317 (parallel [(const_int 1)])))]
4318 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4320 movhpd\t{%1, %0|%0, %1}
4325 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4326 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4329 [(set (match_operand:DF 0 "register_operand" "")
4331 (match_operand:V2DF 1 "memory_operand" "")
4332 (parallel [(const_int 1)])))]
4333 "TARGET_SSE2 && reload_completed"
4334 [(set (match_dup 0) (match_dup 1))]
4336 operands[1] = adjust_address (operands[1], DFmode, 8);
4339 ;; Avoid combining registers from different units in a single alternative,
4340 ;; see comment above inline_secondary_memory_needed function in i386.c
4341 (define_insn "sse2_storelpd"
4342 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4344 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4345 (parallel [(const_int 0)])))]
4346 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4348 %vmovlpd\t{%1, %0|%0, %1}
4353 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4354 (set_attr "prefix" "maybe_vex")
4355 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4358 [(set (match_operand:DF 0 "register_operand" "")
4360 (match_operand:V2DF 1 "nonimmediate_operand" "")
4361 (parallel [(const_int 0)])))]
4362 "TARGET_SSE2 && reload_completed"
4365 rtx op1 = operands[1];
4367 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4369 op1 = gen_lowpart (DFmode, op1);
4370 emit_move_insn (operands[0], op1);
4374 (define_expand "sse2_loadhpd_exp"
4375 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4378 (match_operand:V2DF 1 "nonimmediate_operand" "")
4379 (parallel [(const_int 0)]))
4380 (match_operand:DF 2 "nonimmediate_operand" "")))]
4382 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4384 ;; Avoid combining registers from different units in a single alternative,
4385 ;; see comment above inline_secondary_memory_needed function in i386.c
4386 (define_insn "*avx_loadhpd"
4387 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4390 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4391 (parallel [(const_int 0)]))
4392 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4393 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4395 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4396 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4400 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4401 (set_attr "prefix" "vex")
4402 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4404 (define_insn "sse2_loadhpd"
4405 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4408 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,0,0")
4409 (parallel [(const_int 0)]))
4410 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4411 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4413 movhpd\t{%2, %0|%0, %2}
4414 unpcklpd\t{%2, %0|%0, %2}
4418 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4419 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4422 [(set (match_operand:V2DF 0 "memory_operand" "")
4424 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4425 (match_operand:DF 1 "register_operand" "")))]
4426 "TARGET_SSE2 && reload_completed"
4427 [(set (match_dup 0) (match_dup 1))]
4429 operands[0] = adjust_address (operands[0], DFmode, 8);
4432 (define_expand "sse2_loadlpd_exp"
4433 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4435 (match_operand:DF 2 "nonimmediate_operand" "")
4437 (match_operand:V2DF 1 "nonimmediate_operand" "")
4438 (parallel [(const_int 1)]))))]
4440 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4442 ;; Avoid combining registers from different units in a single alternative,
4443 ;; see comment above inline_secondary_memory_needed function in i386.c
4444 (define_insn "*avx_loadlpd"
4445 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4447 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4449 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4450 (parallel [(const_int 1)]))))]
4451 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4453 vmovsd\t{%2, %0|%0, %2}
4454 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4455 vmovsd\t{%2, %1, %0|%0, %1, %2}
4456 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4460 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4461 (set_attr "prefix" "vex")
4462 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4464 (define_insn "sse2_loadlpd"
4465 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4467 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4469 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4470 (parallel [(const_int 1)]))))]
4471 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4473 movsd\t{%2, %0|%0, %2}
4474 movlpd\t{%2, %0|%0, %2}
4475 movsd\t{%2, %0|%0, %2}
4476 shufpd\t{$2, %1, %0|%0, %1, 2}
4477 movhpd\t{%H1, %0|%0, %H1}
4481 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4482 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4485 [(set (match_operand:V2DF 0 "memory_operand" "")
4487 (match_operand:DF 1 "register_operand" "")
4488 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4489 "TARGET_SSE2 && reload_completed"
4490 [(set (match_dup 0) (match_dup 1))]
4492 operands[0] = adjust_address (operands[0], DFmode, 8);
4495 ;; Not sure these two are ever used, but it doesn't hurt to have
4497 (define_insn "*vec_extractv2df_1_sse"
4498 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4500 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4501 (parallel [(const_int 1)])))]
4502 "!TARGET_SSE2 && TARGET_SSE
4503 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4505 movhps\t{%1, %0|%0, %1}
4506 movhlps\t{%1, %0|%0, %1}
4507 movlps\t{%H1, %0|%0, %H1}"
4508 [(set_attr "type" "ssemov")
4509 (set_attr "mode" "V2SF,V4SF,V2SF")])
4511 (define_insn "*vec_extractv2df_0_sse"
4512 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4514 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4515 (parallel [(const_int 0)])))]
4516 "!TARGET_SSE2 && TARGET_SSE
4517 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4519 movlps\t{%1, %0|%0, %1}
4520 movaps\t{%1, %0|%0, %1}
4521 movlps\t{%1, %0|%0, %1}"
4522 [(set_attr "type" "ssemov")
4523 (set_attr "mode" "V2SF,V4SF,V2SF")])
4525 (define_insn "*avx_movsd"
4526 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4528 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4529 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4533 vmovsd\t{%2, %1, %0|%0, %1, %2}
4534 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4535 vmovlpd\t{%2, %0|%0, %2}
4536 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4537 vmovhps\t{%1, %H0|%H0, %1}"
4538 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4539 (set_attr "prefix" "vex")
4540 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4542 (define_insn "sse2_movsd"
4543 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4545 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4546 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4550 movsd\t{%2, %0|%0, %2}
4551 movlpd\t{%2, %0|%0, %2}
4552 movlpd\t{%2, %0|%0, %2}
4553 shufpd\t{$2, %1, %0|%0, %1, 2}
4554 movhps\t{%H1, %0|%0, %H1}
4555 movhps\t{%1, %H0|%H0, %1}"
4556 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4557 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4559 (define_insn "*vec_dupv2df_sse3"
4560 [(set (match_operand:V2DF 0 "register_operand" "=x")
4562 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4564 "%vmovddup\t{%1, %0|%0, %1}"
4565 [(set_attr "type" "sselog1")
4566 (set_attr "prefix" "maybe_vex")
4567 (set_attr "mode" "DF")])
4569 (define_insn "vec_dupv2df"
4570 [(set (match_operand:V2DF 0 "register_operand" "=x")
4572 (match_operand:DF 1 "register_operand" "0")))]
4575 [(set_attr "type" "sselog1")
4576 (set_attr "mode" "V2DF")])
4578 (define_insn "*vec_concatv2df_sse3"
4579 [(set (match_operand:V2DF 0 "register_operand" "=x")
4581 (match_operand:DF 1 "nonimmediate_operand" "xm")
4584 "%vmovddup\t{%1, %0|%0, %1}"
4585 [(set_attr "type" "sselog1")
4586 (set_attr "prefix" "maybe_vex")
4587 (set_attr "mode" "DF")])
4589 (define_insn "*vec_concatv2df_avx"
4590 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4592 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4593 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4596 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4597 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4598 vmovsd\t{%1, %0|%0, %1}"
4599 [(set_attr "type" "ssemov")
4600 (set_attr "prefix" "vex")
4601 (set_attr "mode" "DF,V1DF,DF")])
4603 (define_insn "*vec_concatv2df"
4604 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4606 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4607 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4610 unpcklpd\t{%2, %0|%0, %2}
4611 movhpd\t{%2, %0|%0, %2}
4612 movsd\t{%1, %0|%0, %1}
4613 movlhps\t{%2, %0|%0, %2}
4614 movhps\t{%2, %0|%0, %2}"
4615 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4616 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4618 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4620 ;; Parallel integral arithmetic
4622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4624 (define_expand "neg<mode>2"
4625 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4628 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4630 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4632 (define_expand "<plusminus_insn><mode>3"
4633 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4635 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4636 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4638 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4640 (define_insn "*avx_<plusminus_insn><mode>3"
4641 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4643 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4644 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4645 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4646 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4647 [(set_attr "type" "sseiadd")
4648 (set_attr "prefix" "vex")
4649 (set_attr "mode" "TI")])
4651 (define_insn "*<plusminus_insn><mode>3"
4652 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4654 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4655 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4656 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4657 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4658 [(set_attr "type" "sseiadd")
4659 (set_attr "prefix_data16" "1")
4660 (set_attr "mode" "TI")])
4662 (define_expand "sse2_<plusminus_insn><mode>3"
4663 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4664 (sat_plusminus:SSEMODE12
4665 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4666 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4668 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4670 (define_insn "*avx_<plusminus_insn><mode>3"
4671 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4672 (sat_plusminus:SSEMODE12
4673 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4674 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4675 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4676 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4677 [(set_attr "type" "sseiadd")
4678 (set_attr "prefix" "vex")
4679 (set_attr "mode" "TI")])
4681 (define_insn "*sse2_<plusminus_insn><mode>3"
4682 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4683 (sat_plusminus:SSEMODE12
4684 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4685 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4686 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4687 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4688 [(set_attr "type" "sseiadd")
4689 (set_attr "prefix_data16" "1")
4690 (set_attr "mode" "TI")])
4692 (define_insn_and_split "mulv16qi3"
4693 [(set (match_operand:V16QI 0 "register_operand" "")
4694 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4695 (match_operand:V16QI 2 "register_operand" "")))]
4697 && !(reload_completed || reload_in_progress)"
4702 rtx t[12], op0, op[3];
4707 /* On SSE5, we can take advantage of the pperm instruction to pack and
4708 unpack the bytes. Unpack data such that we've got a source byte in
4709 each low byte of each word. We don't care what goes into the high
4710 byte, so put 0 there. */
4711 for (i = 0; i < 6; ++i)
4712 t[i] = gen_reg_rtx (V8HImode);
4714 for (i = 0; i < 2; i++)
4717 op[1] = operands[i+1];
4718 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4721 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4724 /* Multiply words. */
4725 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4726 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4728 /* Pack the low byte of each word back into a single xmm */
4729 op[0] = operands[0];
4732 ix86_expand_sse5_pack (op);
4736 for (i = 0; i < 12; ++i)
4737 t[i] = gen_reg_rtx (V16QImode);
4739 /* Unpack data such that we've got a source byte in each low byte of
4740 each word. We don't care what goes into the high byte of each word.
4741 Rather than trying to get zero in there, most convenient is to let
4742 it be a copy of the low byte. */
4743 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4744 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4745 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4746 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4748 /* Multiply words. The end-of-line annotations here give a picture of what
4749 the output of that instruction looks like. Dot means don't care; the
4750 letters are the bytes of the result with A being the most significant. */
4751 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4752 gen_lowpart (V8HImode, t[0]),
4753 gen_lowpart (V8HImode, t[1])));
4754 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4755 gen_lowpart (V8HImode, t[2]),
4756 gen_lowpart (V8HImode, t[3])));
4758 /* Extract the relevant bytes and merge them back together. */
4759 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4760 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4761 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4762 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4763 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4764 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4767 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4771 (define_expand "mulv8hi3"
4772 [(set (match_operand:V8HI 0 "register_operand" "")
4773 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4774 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4776 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4778 (define_insn "*avx_mulv8hi3"
4779 [(set (match_operand:V8HI 0 "register_operand" "=x")
4780 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4781 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4782 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4783 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4784 [(set_attr "type" "sseimul")
4785 (set_attr "prefix" "vex")
4786 (set_attr "mode" "TI")])
4788 (define_insn "*mulv8hi3"
4789 [(set (match_operand:V8HI 0 "register_operand" "=x")
4790 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4791 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4792 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4793 "pmullw\t{%2, %0|%0, %2}"
4794 [(set_attr "type" "sseimul")
4795 (set_attr "prefix_data16" "1")
4796 (set_attr "mode" "TI")])
4798 (define_expand "smulv8hi3_highpart"
4799 [(set (match_operand:V8HI 0 "register_operand" "")
4804 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4806 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4809 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4811 (define_insn "*avxv8hi3_highpart"
4812 [(set (match_operand:V8HI 0 "register_operand" "=x")
4817 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4819 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4821 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4822 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4823 [(set_attr "type" "sseimul")
4824 (set_attr "prefix" "vex")
4825 (set_attr "mode" "TI")])
4827 (define_insn "*smulv8hi3_highpart"
4828 [(set (match_operand:V8HI 0 "register_operand" "=x")
4833 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4835 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4837 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4838 "pmulhw\t{%2, %0|%0, %2}"
4839 [(set_attr "type" "sseimul")
4840 (set_attr "prefix_data16" "1")
4841 (set_attr "mode" "TI")])
4843 (define_expand "umulv8hi3_highpart"
4844 [(set (match_operand:V8HI 0 "register_operand" "")
4849 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4851 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4854 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4856 (define_insn "*avx_umulv8hi3_highpart"
4857 [(set (match_operand:V8HI 0 "register_operand" "=x")
4862 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4864 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4866 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4867 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4868 [(set_attr "type" "sseimul")
4869 (set_attr "prefix" "vex")
4870 (set_attr "mode" "TI")])
4872 (define_insn "*umulv8hi3_highpart"
4873 [(set (match_operand:V8HI 0 "register_operand" "=x")
4878 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4880 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4882 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4883 "pmulhuw\t{%2, %0|%0, %2}"
4884 [(set_attr "type" "sseimul")
4885 (set_attr "prefix_data16" "1")
4886 (set_attr "mode" "TI")])
4888 (define_expand "sse2_umulv2siv2di3"
4889 [(set (match_operand:V2DI 0 "register_operand" "")
4893 (match_operand:V4SI 1 "nonimmediate_operand" "")
4894 (parallel [(const_int 0) (const_int 2)])))
4897 (match_operand:V4SI 2 "nonimmediate_operand" "")
4898 (parallel [(const_int 0) (const_int 2)])))))]
4900 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4902 (define_insn "*avx_umulv2siv2di3"
4903 [(set (match_operand:V2DI 0 "register_operand" "=x")
4907 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4908 (parallel [(const_int 0) (const_int 2)])))
4911 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4912 (parallel [(const_int 0) (const_int 2)])))))]
4913 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4914 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4915 [(set_attr "type" "sseimul")
4916 (set_attr "prefix" "vex")
4917 (set_attr "mode" "TI")])
4919 (define_insn "*sse2_umulv2siv2di3"
4920 [(set (match_operand:V2DI 0 "register_operand" "=x")
4924 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4925 (parallel [(const_int 0) (const_int 2)])))
4928 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4929 (parallel [(const_int 0) (const_int 2)])))))]
4930 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4931 "pmuludq\t{%2, %0|%0, %2}"
4932 [(set_attr "type" "sseimul")
4933 (set_attr "prefix_data16" "1")
4934 (set_attr "mode" "TI")])
4936 (define_expand "sse4_1_mulv2siv2di3"
4937 [(set (match_operand:V2DI 0 "register_operand" "")
4941 (match_operand:V4SI 1 "nonimmediate_operand" "")
4942 (parallel [(const_int 0) (const_int 2)])))
4945 (match_operand:V4SI 2 "nonimmediate_operand" "")
4946 (parallel [(const_int 0) (const_int 2)])))))]
4948 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4950 (define_insn "*avx_mulv2siv2di3"
4951 [(set (match_operand:V2DI 0 "register_operand" "=x")
4955 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4956 (parallel [(const_int 0) (const_int 2)])))
4959 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4960 (parallel [(const_int 0) (const_int 2)])))))]
4961 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4962 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4963 [(set_attr "type" "sseimul")
4964 (set_attr "prefix" "vex")
4965 (set_attr "mode" "TI")])
4967 (define_insn "*sse4_1_mulv2siv2di3"
4968 [(set (match_operand:V2DI 0 "register_operand" "=x")
4972 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4973 (parallel [(const_int 0) (const_int 2)])))
4976 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4977 (parallel [(const_int 0) (const_int 2)])))))]
4978 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4979 "pmuldq\t{%2, %0|%0, %2}"
4980 [(set_attr "type" "sseimul")
4981 (set_attr "prefix_extra" "1")
4982 (set_attr "mode" "TI")])
4984 (define_expand "sse2_pmaddwd"
4985 [(set (match_operand:V4SI 0 "register_operand" "")
4990 (match_operand:V8HI 1 "nonimmediate_operand" "")
4991 (parallel [(const_int 0)
4997 (match_operand:V8HI 2 "nonimmediate_operand" "")
4998 (parallel [(const_int 0)
5004 (vec_select:V4HI (match_dup 1)
5005 (parallel [(const_int 1)
5010 (vec_select:V4HI (match_dup 2)
5011 (parallel [(const_int 1)
5014 (const_int 7)]))))))]
5016 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5018 (define_insn "*avx_pmaddwd"
5019 [(set (match_operand:V4SI 0 "register_operand" "=x")
5024 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5025 (parallel [(const_int 0)
5031 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5032 (parallel [(const_int 0)
5038 (vec_select:V4HI (match_dup 1)
5039 (parallel [(const_int 1)
5044 (vec_select:V4HI (match_dup 2)
5045 (parallel [(const_int 1)
5048 (const_int 7)]))))))]
5049 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5050 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5051 [(set_attr "type" "sseiadd")
5052 (set_attr "prefix" "vex")
5053 (set_attr "mode" "TI")])
5055 (define_insn "*sse2_pmaddwd"
5056 [(set (match_operand:V4SI 0 "register_operand" "=x")
5061 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5062 (parallel [(const_int 0)
5068 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5069 (parallel [(const_int 0)
5075 (vec_select:V4HI (match_dup 1)
5076 (parallel [(const_int 1)
5081 (vec_select:V4HI (match_dup 2)
5082 (parallel [(const_int 1)
5085 (const_int 7)]))))))]
5086 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5087 "pmaddwd\t{%2, %0|%0, %2}"
5088 [(set_attr "type" "sseiadd")
5089 (set_attr "prefix_data16" "1")
5090 (set_attr "mode" "TI")])
5092 (define_expand "mulv4si3"
5093 [(set (match_operand:V4SI 0 "register_operand" "")
5094 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5095 (match_operand:V4SI 2 "register_operand" "")))]
5098 if (TARGET_SSE4_1 || TARGET_SSE5)
5099 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5102 (define_insn "*avx_mulv4si3"
5103 [(set (match_operand:V4SI 0 "register_operand" "=x")
5104 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5105 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5106 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5107 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5108 [(set_attr "type" "sseimul")
5109 (set_attr "prefix" "vex")
5110 (set_attr "mode" "TI")])
5112 (define_insn "*sse4_1_mulv4si3"
5113 [(set (match_operand:V4SI 0 "register_operand" "=x")
5114 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5115 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5116 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5117 "pmulld\t{%2, %0|%0, %2}"
5118 [(set_attr "type" "sseimul")
5119 (set_attr "prefix_extra" "1")
5120 (set_attr "mode" "TI")])
5122 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5123 ;; multiply/add. In general, we expect the define_split to occur before
5124 ;; register allocation, so we have to handle the corner case where the target
5125 ;; is the same as one of the inputs.
5126 (define_insn_and_split "*sse5_mulv4si3"
5127 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5128 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5129 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5132 "&& (reload_completed
5133 || (!reg_mentioned_p (operands[0], operands[1])
5134 && !reg_mentioned_p (operands[0], operands[2])))"
5138 (plus:V4SI (mult:V4SI (match_dup 1)
5142 operands[3] = CONST0_RTX (V4SImode);
5144 [(set_attr "type" "ssemuladd")
5145 (set_attr "mode" "TI")])
5147 (define_insn_and_split "*sse2_mulv4si3"
5148 [(set (match_operand:V4SI 0 "register_operand" "")
5149 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5150 (match_operand:V4SI 2 "register_operand" "")))]
5151 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5152 && !(reload_completed || reload_in_progress)"
5157 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5163 t1 = gen_reg_rtx (V4SImode);
5164 t2 = gen_reg_rtx (V4SImode);
5165 t3 = gen_reg_rtx (V4SImode);
5166 t4 = gen_reg_rtx (V4SImode);
5167 t5 = gen_reg_rtx (V4SImode);
5168 t6 = gen_reg_rtx (V4SImode);
5169 thirtytwo = GEN_INT (32);
5171 /* Multiply elements 2 and 0. */
5172 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5175 /* Shift both input vectors down one element, so that elements 3
5176 and 1 are now in the slots for elements 2 and 0. For K8, at
5177 least, this is faster than using a shuffle. */
5178 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5179 gen_lowpart (TImode, op1),
5181 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5182 gen_lowpart (TImode, op2),
5184 /* Multiply elements 3 and 1. */
5185 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5188 /* Move the results in element 2 down to element 1; we don't care
5189 what goes in elements 2 and 3. */
5190 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5191 const0_rtx, const0_rtx));
5192 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5193 const0_rtx, const0_rtx));
5195 /* Merge the parts back together. */
5196 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5200 (define_insn_and_split "mulv2di3"
5201 [(set (match_operand:V2DI 0 "register_operand" "")
5202 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5203 (match_operand:V2DI 2 "register_operand" "")))]
5205 && !(reload_completed || reload_in_progress)"
5210 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5215 /* op1: A,B,C,D, op2: E,F,G,H */
5217 op1 = gen_lowpart (V4SImode, operands[1]);
5218 op2 = gen_lowpart (V4SImode, operands[2]);
5219 t1 = gen_reg_rtx (V4SImode);
5220 t2 = gen_reg_rtx (V4SImode);
5221 t3 = gen_reg_rtx (V4SImode);
5222 t4 = gen_reg_rtx (V2DImode);
5223 t5 = gen_reg_rtx (V2DImode);
5226 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5233 emit_move_insn (t2, CONST0_RTX (V4SImode));
5235 /* t3: (B*E),(A*F),(D*G),(C*H) */
5236 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5238 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5239 emit_insn (gen_sse5_phadddq (t4, t3));
5241 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5242 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5244 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5245 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5252 t1 = gen_reg_rtx (V2DImode);
5253 t2 = gen_reg_rtx (V2DImode);
5254 t3 = gen_reg_rtx (V2DImode);
5255 t4 = gen_reg_rtx (V2DImode);
5256 t5 = gen_reg_rtx (V2DImode);
5257 t6 = gen_reg_rtx (V2DImode);
5258 thirtytwo = GEN_INT (32);
5260 /* Multiply low parts. */
5261 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5262 gen_lowpart (V4SImode, op2)));
5264 /* Shift input vectors left 32 bits so we can multiply high parts. */
5265 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5266 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5268 /* Multiply high parts by low parts. */
5269 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5270 gen_lowpart (V4SImode, t3)));
5271 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5272 gen_lowpart (V4SImode, t2)));
5274 /* Shift them back. */
5275 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5276 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5278 /* Add the three parts together. */
5279 emit_insn (gen_addv2di3 (t6, t1, t4));
5280 emit_insn (gen_addv2di3 (op0, t6, t5));
5284 (define_expand "vec_widen_smult_hi_v8hi"
5285 [(match_operand:V4SI 0 "register_operand" "")
5286 (match_operand:V8HI 1 "register_operand" "")
5287 (match_operand:V8HI 2 "register_operand" "")]
5290 rtx op1, op2, t1, t2, dest;
5294 t1 = gen_reg_rtx (V8HImode);
5295 t2 = gen_reg_rtx (V8HImode);
5296 dest = gen_lowpart (V8HImode, operands[0]);
5298 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5299 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5300 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5304 (define_expand "vec_widen_smult_lo_v8hi"
5305 [(match_operand:V4SI 0 "register_operand" "")
5306 (match_operand:V8HI 1 "register_operand" "")
5307 (match_operand:V8HI 2 "register_operand" "")]
5310 rtx op1, op2, t1, t2, dest;
5314 t1 = gen_reg_rtx (V8HImode);
5315 t2 = gen_reg_rtx (V8HImode);
5316 dest = gen_lowpart (V8HImode, operands[0]);
5318 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5319 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5320 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5324 (define_expand "vec_widen_umult_hi_v8hi"
5325 [(match_operand:V4SI 0 "register_operand" "")
5326 (match_operand:V8HI 1 "register_operand" "")
5327 (match_operand:V8HI 2 "register_operand" "")]
5330 rtx op1, op2, t1, t2, dest;
5334 t1 = gen_reg_rtx (V8HImode);
5335 t2 = gen_reg_rtx (V8HImode);
5336 dest = gen_lowpart (V8HImode, operands[0]);
5338 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5339 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5340 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5344 (define_expand "vec_widen_umult_lo_v8hi"
5345 [(match_operand:V4SI 0 "register_operand" "")
5346 (match_operand:V8HI 1 "register_operand" "")
5347 (match_operand:V8HI 2 "register_operand" "")]
5350 rtx op1, op2, t1, t2, dest;
5354 t1 = gen_reg_rtx (V8HImode);
5355 t2 = gen_reg_rtx (V8HImode);
5356 dest = gen_lowpart (V8HImode, operands[0]);
5358 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5359 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5360 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5364 (define_expand "vec_widen_smult_hi_v4si"
5365 [(match_operand:V2DI 0 "register_operand" "")
5366 (match_operand:V4SI 1 "register_operand" "")
5367 (match_operand:V4SI 2 "register_operand" "")]
5372 t1 = gen_reg_rtx (V4SImode);
5373 t2 = gen_reg_rtx (V4SImode);
5375 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5380 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5385 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5389 (define_expand "vec_widen_smult_lo_v4si"
5390 [(match_operand:V2DI 0 "register_operand" "")
5391 (match_operand:V4SI 1 "register_operand" "")
5392 (match_operand:V4SI 2 "register_operand" "")]
5397 t1 = gen_reg_rtx (V4SImode);
5398 t2 = gen_reg_rtx (V4SImode);
5400 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5405 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5410 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5415 (define_expand "vec_widen_umult_hi_v4si"
5416 [(match_operand:V2DI 0 "register_operand" "")
5417 (match_operand:V4SI 1 "register_operand" "")
5418 (match_operand:V4SI 2 "register_operand" "")]
5421 rtx op1, op2, t1, t2;
5425 t1 = gen_reg_rtx (V4SImode);
5426 t2 = gen_reg_rtx (V4SImode);
5428 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5429 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5430 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5434 (define_expand "vec_widen_umult_lo_v4si"
5435 [(match_operand:V2DI 0 "register_operand" "")
5436 (match_operand:V4SI 1 "register_operand" "")
5437 (match_operand:V4SI 2 "register_operand" "")]
5440 rtx op1, op2, t1, t2;
5444 t1 = gen_reg_rtx (V4SImode);
5445 t2 = gen_reg_rtx (V4SImode);
5447 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5448 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5449 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5453 (define_expand "sdot_prodv8hi"
5454 [(match_operand:V4SI 0 "register_operand" "")
5455 (match_operand:V8HI 1 "register_operand" "")
5456 (match_operand:V8HI 2 "register_operand" "")
5457 (match_operand:V4SI 3 "register_operand" "")]
5460 rtx t = gen_reg_rtx (V4SImode);
5461 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5462 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5466 (define_expand "udot_prodv4si"
5467 [(match_operand:V2DI 0 "register_operand" "")
5468 (match_operand:V4SI 1 "register_operand" "")
5469 (match_operand:V4SI 2 "register_operand" "")
5470 (match_operand:V2DI 3 "register_operand" "")]
5475 t1 = gen_reg_rtx (V2DImode);
5476 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5477 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5479 t2 = gen_reg_rtx (V4SImode);
5480 t3 = gen_reg_rtx (V4SImode);
5481 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5482 gen_lowpart (TImode, operands[1]),
5484 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5485 gen_lowpart (TImode, operands[2]),
5488 t4 = gen_reg_rtx (V2DImode);
5489 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5491 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5495 (define_insn "*avx_ashr<mode>3"
5496 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5498 (match_operand:SSEMODE24 1 "register_operand" "x")
5499 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5501 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5502 [(set_attr "type" "sseishft")
5503 (set_attr "prefix" "vex")
5504 (set_attr "mode" "TI")])
5506 (define_insn "ashr<mode>3"
5507 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5509 (match_operand:SSEMODE24 1 "register_operand" "0")
5510 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5512 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5513 [(set_attr "type" "sseishft")
5514 (set_attr "prefix_data16" "1")
5515 (set_attr "mode" "TI")])
5517 (define_insn "*avx_lshr<mode>3"
5518 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5519 (lshiftrt:SSEMODE248
5520 (match_operand:SSEMODE248 1 "register_operand" "x")
5521 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5523 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5524 [(set_attr "type" "sseishft")
5525 (set_attr "prefix" "vex")
5526 (set_attr "mode" "TI")])
5528 (define_insn "lshr<mode>3"
5529 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5530 (lshiftrt:SSEMODE248
5531 (match_operand:SSEMODE248 1 "register_operand" "0")
5532 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5534 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5535 [(set_attr "type" "sseishft")
5536 (set_attr "prefix_data16" "1")
5537 (set_attr "mode" "TI")])
5539 (define_insn "*avx_ashl<mode>3"
5540 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5542 (match_operand:SSEMODE248 1 "register_operand" "x")
5543 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5545 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5546 [(set_attr "type" "sseishft")
5547 (set_attr "prefix" "vex")
5548 (set_attr "mode" "TI")])
5550 (define_insn "ashl<mode>3"
5551 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5553 (match_operand:SSEMODE248 1 "register_operand" "0")
5554 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5556 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5557 [(set_attr "type" "sseishft")
5558 (set_attr "prefix_data16" "1")
5559 (set_attr "mode" "TI")])
5561 (define_expand "vec_shl_<mode>"
5562 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5563 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5564 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5567 operands[0] = gen_lowpart (TImode, operands[0]);
5568 operands[1] = gen_lowpart (TImode, operands[1]);
5571 (define_expand "vec_shr_<mode>"
5572 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5573 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5574 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5577 operands[0] = gen_lowpart (TImode, operands[0]);
5578 operands[1] = gen_lowpart (TImode, operands[1]);
5581 (define_insn "*avx_<code><mode>3"
5582 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5584 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5585 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5586 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5587 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5588 [(set_attr "type" "sseiadd")
5589 (set_attr "prefix" "vex")
5590 (set_attr "mode" "TI")])
5592 (define_expand "<code>v16qi3"
5593 [(set (match_operand:V16QI 0 "register_operand" "")
5595 (match_operand:V16QI 1 "nonimmediate_operand" "")
5596 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5598 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5600 (define_insn "*<code>v16qi3"
5601 [(set (match_operand:V16QI 0 "register_operand" "=x")
5603 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5604 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5605 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5606 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5607 [(set_attr "type" "sseiadd")
5608 (set_attr "prefix_data16" "1")
5609 (set_attr "mode" "TI")])
5611 (define_expand "<code>v8hi3"
5612 [(set (match_operand:V8HI 0 "register_operand" "")
5614 (match_operand:V8HI 1 "nonimmediate_operand" "")
5615 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5617 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5619 (define_insn "*<code>v8hi3"
5620 [(set (match_operand:V8HI 0 "register_operand" "=x")
5622 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5623 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5624 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5625 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5626 [(set_attr "type" "sseiadd")
5627 (set_attr "prefix_data16" "1")
5628 (set_attr "mode" "TI")])
5630 (define_expand "umaxv8hi3"
5631 [(set (match_operand:V8HI 0 "register_operand" "")
5632 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5633 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5637 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5640 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5641 if (rtx_equal_p (op3, op2))
5642 op3 = gen_reg_rtx (V8HImode);
5643 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5644 emit_insn (gen_addv8hi3 (op0, op3, op2));
5649 (define_expand "smax<mode>3"
5650 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5651 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5652 (match_operand:SSEMODE14 2 "register_operand" "")))]
5656 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5662 xops[0] = operands[0];
5663 xops[1] = operands[1];
5664 xops[2] = operands[2];
5665 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5666 xops[4] = operands[1];
5667 xops[5] = operands[2];
5668 ok = ix86_expand_int_vcond (xops);
5674 (define_insn "*sse4_1_<code><mode>3"
5675 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5677 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5678 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5679 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5680 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5681 [(set_attr "type" "sseiadd")
5682 (set_attr "prefix_extra" "1")
5683 (set_attr "mode" "TI")])
5685 (define_expand "umaxv4si3"
5686 [(set (match_operand:V4SI 0 "register_operand" "")
5687 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5688 (match_operand:V4SI 2 "register_operand" "")))]
5692 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5698 xops[0] = operands[0];
5699 xops[1] = operands[1];
5700 xops[2] = operands[2];
5701 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5702 xops[4] = operands[1];
5703 xops[5] = operands[2];
5704 ok = ix86_expand_int_vcond (xops);
5710 (define_insn "*sse4_1_<code><mode>3"
5711 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5713 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5714 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5715 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5716 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5717 [(set_attr "type" "sseiadd")
5718 (set_attr "prefix_extra" "1")
5719 (set_attr "mode" "TI")])
5721 (define_expand "smin<mode>3"
5722 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5723 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5724 (match_operand:SSEMODE14 2 "register_operand" "")))]
5728 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5734 xops[0] = operands[0];
5735 xops[1] = operands[2];
5736 xops[2] = operands[1];
5737 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5738 xops[4] = operands[1];
5739 xops[5] = operands[2];
5740 ok = ix86_expand_int_vcond (xops);
5746 (define_expand "umin<mode>3"
5747 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5748 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5749 (match_operand:SSEMODE24 2 "register_operand" "")))]
5753 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5759 xops[0] = operands[0];
5760 xops[1] = operands[2];
5761 xops[2] = operands[1];
5762 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5763 xops[4] = operands[1];
5764 xops[5] = operands[2];
5765 ok = ix86_expand_int_vcond (xops);
5771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5773 ;; Parallel integral comparisons
5775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5777 (define_expand "sse2_eq<mode>3"
5778 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5780 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5781 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5782 "TARGET_SSE2 && !TARGET_SSE5"
5783 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5785 (define_insn "*avx_eq<mode>3"
5786 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5788 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5789 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5790 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5791 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5792 [(set_attr "type" "ssecmp")
5793 (set_attr "prefix" "vex")
5794 (set_attr "mode" "TI")])
5796 (define_insn "*sse2_eq<mode>3"
5797 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5799 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5800 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5801 "TARGET_SSE2 && !TARGET_SSE5
5802 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5803 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5804 [(set_attr "type" "ssecmp")
5805 (set_attr "prefix_data16" "1")
5806 (set_attr "mode" "TI")])
5808 (define_expand "sse4_1_eqv2di3"
5809 [(set (match_operand:V2DI 0 "register_operand" "")
5811 (match_operand:V2DI 1 "nonimmediate_operand" "")
5812 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5814 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5816 (define_insn "*sse4_1_eqv2di3"
5817 [(set (match_operand:V2DI 0 "register_operand" "=x")
5819 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5820 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5821 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5822 "pcmpeqq\t{%2, %0|%0, %2}"
5823 [(set_attr "type" "ssecmp")
5824 (set_attr "prefix_extra" "1")
5825 (set_attr "mode" "TI")])
5827 (define_insn "*avx_gt<mode>3"
5828 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5830 (match_operand:SSEMODE1248 1 "register_operand" "x")
5831 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5833 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5834 [(set_attr "type" "ssecmp")
5835 (set_attr "prefix" "vex")
5836 (set_attr "mode" "TI")])
5838 (define_insn "sse2_gt<mode>3"
5839 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5841 (match_operand:SSEMODE124 1 "register_operand" "0")
5842 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5843 "TARGET_SSE2 && !TARGET_SSE5"
5844 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5845 [(set_attr "type" "ssecmp")
5846 (set_attr "prefix_data16" "1")
5847 (set_attr "mode" "TI")])
5849 (define_insn "sse4_2_gtv2di3"
5850 [(set (match_operand:V2DI 0 "register_operand" "=x")
5852 (match_operand:V2DI 1 "register_operand" "0")
5853 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5855 "pcmpgtq\t{%2, %0|%0, %2}"
5856 [(set_attr "type" "ssecmp")
5857 (set_attr "mode" "TI")])
5859 (define_expand "vcond<mode>"
5860 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5861 (if_then_else:SSEMODE124C8
5862 (match_operator 3 ""
5863 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
5864 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
5865 (match_operand:SSEMODE124C8 1 "general_operand" "")
5866 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
5869 bool ok = ix86_expand_int_vcond (operands);
5874 (define_expand "vcondu<mode>"
5875 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5876 (if_then_else:SSEMODE124C8
5877 (match_operator 3 ""
5878 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
5879 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
5880 (match_operand:SSEMODE124C8 1 "general_operand" "")
5881 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
5884 bool ok = ix86_expand_int_vcond (operands);
5889 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5891 ;; Parallel bitwise logical operations
5893 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5895 (define_expand "one_cmpl<mode>2"
5896 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5897 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5901 int i, n = GET_MODE_NUNITS (<MODE>mode);
5902 rtvec v = rtvec_alloc (n);
5904 for (i = 0; i < n; ++i)
5905 RTVEC_ELT (v, i) = constm1_rtx;
5907 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5910 (define_insn "*avx_andnot<mode>3"
5911 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5913 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5914 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5916 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5917 [(set_attr "type" "sselog")
5918 (set_attr "prefix" "vex")
5919 (set_attr "mode" "<avxvecpsmode>")])
5921 (define_insn "*sse_andnot<mode>3"
5922 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5924 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5925 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5926 "(TARGET_SSE && !TARGET_SSE2)"
5927 "andnps\t{%2, %0|%0, %2}"
5928 [(set_attr "type" "sselog")
5929 (set_attr "mode" "V4SF")])
5931 (define_insn "*avx_andnot<mode>3"
5932 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5934 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5935 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5937 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5938 [(set_attr "type" "sselog")
5939 (set_attr "prefix" "vex")
5940 (set_attr "mode" "TI")])
5942 (define_insn "sse2_andnot<mode>3"
5943 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5945 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5946 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5948 "pandn\t{%2, %0|%0, %2}"
5949 [(set_attr "type" "sselog")
5950 (set_attr "prefix_data16" "1")
5951 (set_attr "mode" "TI")])
5953 (define_insn "*andnottf3"
5954 [(set (match_operand:TF 0 "register_operand" "=x")
5956 (not:TF (match_operand:TF 1 "register_operand" "0"))
5957 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5959 "pandn\t{%2, %0|%0, %2}"
5960 [(set_attr "type" "sselog")
5961 (set_attr "prefix_data16" "1")
5962 (set_attr "mode" "TI")])
5964 (define_expand "<code><mode>3"
5965 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5967 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5968 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5970 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5972 (define_insn "*avx_<code><mode>3"
5973 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5975 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5976 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5978 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5979 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5980 [(set_attr "type" "sselog")
5981 (set_attr "prefix" "vex")
5982 (set_attr "mode" "<avxvecpsmode>")])
5984 (define_insn "*sse_<code><mode>3"
5985 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5987 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5988 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5989 "(TARGET_SSE && !TARGET_SSE2)
5990 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5991 "<plogicprefix>ps\t{%2, %0|%0, %2}"
5992 [(set_attr "type" "sselog")
5993 (set_attr "mode" "V4SF")])
5995 (define_insn "*avx_<code><mode>3"
5996 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5998 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5999 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6001 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6002 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6003 [(set_attr "type" "sselog")
6004 (set_attr "prefix" "vex")
6005 (set_attr "mode" "TI")])
6007 (define_insn "*sse2_<code><mode>3"
6008 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6010 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6011 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6012 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6013 "p<plogicprefix>\t{%2, %0|%0, %2}"
6014 [(set_attr "type" "sselog")
6015 (set_attr "prefix_data16" "1")
6016 (set_attr "mode" "TI")])
6018 (define_expand "<code>tf3"
6019 [(set (match_operand:TF 0 "register_operand" "")
6021 (match_operand:TF 1 "nonimmediate_operand" "")
6022 (match_operand:TF 2 "nonimmediate_operand" "")))]
6024 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6026 (define_insn "*<code>tf3"
6027 [(set (match_operand:TF 0 "register_operand" "=x")
6029 (match_operand:TF 1 "nonimmediate_operand" "%0")
6030 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6031 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6032 "p<plogicprefix>\t{%2, %0|%0, %2}"
6033 [(set_attr "type" "sselog")
6034 (set_attr "prefix_data16" "1")
6035 (set_attr "mode" "TI")])
6037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6039 ;; Parallel integral element swizzling
6041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6044 ;; op1 = abcdefghijklmnop
6045 ;; op2 = qrstuvwxyz012345
6046 ;; h1 = aqbrcsdteufvgwhx
6047 ;; l1 = iyjzk0l1m2n3o4p5
6048 ;; h2 = aiqybjrzcks0dlt1
6049 ;; l2 = emu2fnv3gow4hpx5
6050 ;; h3 = aeimquy2bfjnrvz3
6051 ;; l3 = cgkosw04dhlptx15
6052 ;; result = bdfhjlnprtvxz135
6053 (define_expand "vec_pack_trunc_v8hi"
6054 [(match_operand:V16QI 0 "register_operand" "")
6055 (match_operand:V8HI 1 "register_operand" "")
6056 (match_operand:V8HI 2 "register_operand" "")]
6059 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6063 ix86_expand_sse5_pack (operands);
6067 op1 = gen_lowpart (V16QImode, operands[1]);
6068 op2 = gen_lowpart (V16QImode, operands[2]);
6069 h1 = gen_reg_rtx (V16QImode);
6070 l1 = gen_reg_rtx (V16QImode);
6071 h2 = gen_reg_rtx (V16QImode);
6072 l2 = gen_reg_rtx (V16QImode);
6073 h3 = gen_reg_rtx (V16QImode);
6074 l3 = gen_reg_rtx (V16QImode);
6076 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6077 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6078 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6079 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6080 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6081 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6082 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6093 ;; result = bdfhjlnp
6094 (define_expand "vec_pack_trunc_v4si"
6095 [(match_operand:V8HI 0 "register_operand" "")
6096 (match_operand:V4SI 1 "register_operand" "")
6097 (match_operand:V4SI 2 "register_operand" "")]
6100 rtx op1, op2, h1, l1, h2, l2;
6104 ix86_expand_sse5_pack (operands);
6108 op1 = gen_lowpart (V8HImode, operands[1]);
6109 op2 = gen_lowpart (V8HImode, operands[2]);
6110 h1 = gen_reg_rtx (V8HImode);
6111 l1 = gen_reg_rtx (V8HImode);
6112 h2 = gen_reg_rtx (V8HImode);
6113 l2 = gen_reg_rtx (V8HImode);
6115 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6116 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6117 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6118 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6119 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6129 (define_expand "vec_pack_trunc_v2di"
6130 [(match_operand:V4SI 0 "register_operand" "")
6131 (match_operand:V2DI 1 "register_operand" "")
6132 (match_operand:V2DI 2 "register_operand" "")]
6135 rtx op1, op2, h1, l1;
6139 ix86_expand_sse5_pack (operands);
6143 op1 = gen_lowpart (V4SImode, operands[1]);
6144 op2 = gen_lowpart (V4SImode, operands[2]);
6145 h1 = gen_reg_rtx (V4SImode);
6146 l1 = gen_reg_rtx (V4SImode);
6148 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6149 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6150 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6154 (define_expand "vec_interleave_highv16qi"
6155 [(set (match_operand:V16QI 0 "register_operand" "")
6158 (match_operand:V16QI 1 "register_operand" "")
6159 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6160 (parallel [(const_int 8) (const_int 24)
6161 (const_int 9) (const_int 25)
6162 (const_int 10) (const_int 26)
6163 (const_int 11) (const_int 27)
6164 (const_int 12) (const_int 28)
6165 (const_int 13) (const_int 29)
6166 (const_int 14) (const_int 30)
6167 (const_int 15) (const_int 31)])))]
6170 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6174 (define_expand "vec_interleave_lowv16qi"
6175 [(set (match_operand:V16QI 0 "register_operand" "")
6178 (match_operand:V16QI 1 "register_operand" "")
6179 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6180 (parallel [(const_int 0) (const_int 16)
6181 (const_int 1) (const_int 17)
6182 (const_int 2) (const_int 18)
6183 (const_int 3) (const_int 19)
6184 (const_int 4) (const_int 20)
6185 (const_int 5) (const_int 21)
6186 (const_int 6) (const_int 22)
6187 (const_int 7) (const_int 23)])))]
6190 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6194 (define_expand "vec_interleave_highv8hi"
6195 [(set (match_operand:V8HI 0 "register_operand" "=")
6198 (match_operand:V8HI 1 "register_operand" "")
6199 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6200 (parallel [(const_int 4) (const_int 12)
6201 (const_int 5) (const_int 13)
6202 (const_int 6) (const_int 14)
6203 (const_int 7) (const_int 15)])))]
6206 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6210 (define_expand "vec_interleave_lowv8hi"
6211 [(set (match_operand:V8HI 0 "register_operand" "")
6214 (match_operand:V8HI 1 "register_operand" "")
6215 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6216 (parallel [(const_int 0) (const_int 8)
6217 (const_int 1) (const_int 9)
6218 (const_int 2) (const_int 10)
6219 (const_int 3) (const_int 11)])))]
6222 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6226 (define_expand "vec_interleave_highv4si"
6227 [(set (match_operand:V4SI 0 "register_operand" "")
6230 (match_operand:V4SI 1 "register_operand" "")
6231 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6232 (parallel [(const_int 2) (const_int 6)
6233 (const_int 3) (const_int 7)])))]
6236 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6240 (define_expand "vec_interleave_lowv4si"
6241 [(set (match_operand:V4SI 0 "register_operand" "")
6244 (match_operand:V4SI 1 "register_operand" "")
6245 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6246 (parallel [(const_int 0) (const_int 4)
6247 (const_int 1) (const_int 5)])))]
6250 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6254 (define_expand "vec_interleave_highv2di"
6255 [(set (match_operand:V2DI 0 "register_operand" "")
6258 (match_operand:V2DI 1 "register_operand" "")
6259 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6260 (parallel [(const_int 1)
6264 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6268 (define_expand "vec_interleave_lowv2di"
6269 [(set (match_operand:V2DI 0 "register_operand" "")
6272 (match_operand:V2DI 1 "register_operand" "")
6273 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6274 (parallel [(const_int 0)
6278 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6282 (define_expand "vec_interleave_highv4sf"
6283 [(set (match_operand:V4SF 0 "register_operand" "")
6286 (match_operand:V4SF 1 "register_operand" "")
6287 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6288 (parallel [(const_int 2) (const_int 6)
6289 (const_int 3) (const_int 7)])))]
6292 (define_expand "vec_interleave_lowv4sf"
6293 [(set (match_operand:V4SF 0 "register_operand" "")
6296 (match_operand:V4SF 1 "register_operand" "")
6297 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6298 (parallel [(const_int 0) (const_int 4)
6299 (const_int 1) (const_int 5)])))]
6302 (define_expand "vec_interleave_highv2df"
6303 [(set (match_operand:V2DF 0 "register_operand" "")
6306 (match_operand:V2DF 1 "register_operand" "")
6307 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6308 (parallel [(const_int 1)
6312 (define_expand "vec_interleave_lowv2df"
6313 [(set (match_operand:V2DF 0 "register_operand" "")
6316 (match_operand:V2DF 1 "register_operand" "")
6317 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6318 (parallel [(const_int 0)
6322 (define_insn "*avx_packsswb"
6323 [(set (match_operand:V16QI 0 "register_operand" "=x")
6326 (match_operand:V8HI 1 "register_operand" "x"))
6328 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6330 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6331 [(set_attr "type" "sselog")
6332 (set_attr "prefix" "vex")
6333 (set_attr "mode" "TI")])
6335 (define_insn "sse2_packsswb"
6336 [(set (match_operand:V16QI 0 "register_operand" "=x")
6339 (match_operand:V8HI 1 "register_operand" "0"))
6341 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6343 "packsswb\t{%2, %0|%0, %2}"
6344 [(set_attr "type" "sselog")
6345 (set_attr "prefix_data16" "1")
6346 (set_attr "mode" "TI")])
6348 (define_insn "*avx_packssdw"
6349 [(set (match_operand:V8HI 0 "register_operand" "=x")
6352 (match_operand:V4SI 1 "register_operand" "x"))
6354 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6356 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6357 [(set_attr "type" "sselog")
6358 (set_attr "prefix" "vex")
6359 (set_attr "mode" "TI")])
6361 (define_insn "sse2_packssdw"
6362 [(set (match_operand:V8HI 0 "register_operand" "=x")
6365 (match_operand:V4SI 1 "register_operand" "0"))
6367 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6369 "packssdw\t{%2, %0|%0, %2}"
6370 [(set_attr "type" "sselog")
6371 (set_attr "prefix_data16" "1")
6372 (set_attr "mode" "TI")])
6374 (define_insn "*avx_packuswb"
6375 [(set (match_operand:V16QI 0 "register_operand" "=x")
6378 (match_operand:V8HI 1 "register_operand" "x"))
6380 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6382 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6383 [(set_attr "type" "sselog")
6384 (set_attr "prefix" "vex")
6385 (set_attr "mode" "TI")])
6387 (define_insn "sse2_packuswb"
6388 [(set (match_operand:V16QI 0 "register_operand" "=x")
6391 (match_operand:V8HI 1 "register_operand" "0"))
6393 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6395 "packuswb\t{%2, %0|%0, %2}"
6396 [(set_attr "type" "sselog")
6397 (set_attr "prefix_data16" "1")
6398 (set_attr "mode" "TI")])
6400 (define_insn "*avx_punpckhbw"
6401 [(set (match_operand:V16QI 0 "register_operand" "=x")
6404 (match_operand:V16QI 1 "register_operand" "x")
6405 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6406 (parallel [(const_int 8) (const_int 24)
6407 (const_int 9) (const_int 25)
6408 (const_int 10) (const_int 26)
6409 (const_int 11) (const_int 27)
6410 (const_int 12) (const_int 28)
6411 (const_int 13) (const_int 29)
6412 (const_int 14) (const_int 30)
6413 (const_int 15) (const_int 31)])))]
6415 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6416 [(set_attr "type" "sselog")
6417 (set_attr "prefix" "vex")
6418 (set_attr "mode" "TI")])
6420 (define_insn "sse2_punpckhbw"
6421 [(set (match_operand:V16QI 0 "register_operand" "=x")
6424 (match_operand:V16QI 1 "register_operand" "0")
6425 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6426 (parallel [(const_int 8) (const_int 24)
6427 (const_int 9) (const_int 25)
6428 (const_int 10) (const_int 26)
6429 (const_int 11) (const_int 27)
6430 (const_int 12) (const_int 28)
6431 (const_int 13) (const_int 29)
6432 (const_int 14) (const_int 30)
6433 (const_int 15) (const_int 31)])))]
6435 "punpckhbw\t{%2, %0|%0, %2}"
6436 [(set_attr "type" "sselog")
6437 (set_attr "prefix_data16" "1")
6438 (set_attr "mode" "TI")])
6440 (define_insn "*avx_punpcklbw"
6441 [(set (match_operand:V16QI 0 "register_operand" "=x")
6444 (match_operand:V16QI 1 "register_operand" "x")
6445 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6446 (parallel [(const_int 0) (const_int 16)
6447 (const_int 1) (const_int 17)
6448 (const_int 2) (const_int 18)
6449 (const_int 3) (const_int 19)
6450 (const_int 4) (const_int 20)
6451 (const_int 5) (const_int 21)
6452 (const_int 6) (const_int 22)
6453 (const_int 7) (const_int 23)])))]
6455 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6456 [(set_attr "type" "sselog")
6457 (set_attr "prefix" "vex")
6458 (set_attr "mode" "TI")])
6460 (define_insn "sse2_punpcklbw"
6461 [(set (match_operand:V16QI 0 "register_operand" "=x")
6464 (match_operand:V16QI 1 "register_operand" "0")
6465 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6466 (parallel [(const_int 0) (const_int 16)
6467 (const_int 1) (const_int 17)
6468 (const_int 2) (const_int 18)
6469 (const_int 3) (const_int 19)
6470 (const_int 4) (const_int 20)
6471 (const_int 5) (const_int 21)
6472 (const_int 6) (const_int 22)
6473 (const_int 7) (const_int 23)])))]
6475 "punpcklbw\t{%2, %0|%0, %2}"
6476 [(set_attr "type" "sselog")
6477 (set_attr "prefix_data16" "1")
6478 (set_attr "mode" "TI")])
6480 (define_insn "*avx_punpckhwd"
6481 [(set (match_operand:V8HI 0 "register_operand" "=x")
6484 (match_operand:V8HI 1 "register_operand" "x")
6485 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6486 (parallel [(const_int 4) (const_int 12)
6487 (const_int 5) (const_int 13)
6488 (const_int 6) (const_int 14)
6489 (const_int 7) (const_int 15)])))]
6491 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6492 [(set_attr "type" "sselog")
6493 (set_attr "prefix" "vex")
6494 (set_attr "mode" "TI")])
6496 (define_insn "sse2_punpckhwd"
6497 [(set (match_operand:V8HI 0 "register_operand" "=x")
6500 (match_operand:V8HI 1 "register_operand" "0")
6501 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6502 (parallel [(const_int 4) (const_int 12)
6503 (const_int 5) (const_int 13)
6504 (const_int 6) (const_int 14)
6505 (const_int 7) (const_int 15)])))]
6507 "punpckhwd\t{%2, %0|%0, %2}"
6508 [(set_attr "type" "sselog")
6509 (set_attr "prefix_data16" "1")
6510 (set_attr "mode" "TI")])
6512 (define_insn "*avx_punpcklwd"
6513 [(set (match_operand:V8HI 0 "register_operand" "=x")
6516 (match_operand:V8HI 1 "register_operand" "x")
6517 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6518 (parallel [(const_int 0) (const_int 8)
6519 (const_int 1) (const_int 9)
6520 (const_int 2) (const_int 10)
6521 (const_int 3) (const_int 11)])))]
6523 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6524 [(set_attr "type" "sselog")
6525 (set_attr "prefix" "vex")
6526 (set_attr "mode" "TI")])
6528 (define_insn "sse2_punpcklwd"
6529 [(set (match_operand:V8HI 0 "register_operand" "=x")
6532 (match_operand:V8HI 1 "register_operand" "0")
6533 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6534 (parallel [(const_int 0) (const_int 8)
6535 (const_int 1) (const_int 9)
6536 (const_int 2) (const_int 10)
6537 (const_int 3) (const_int 11)])))]
6539 "punpcklwd\t{%2, %0|%0, %2}"
6540 [(set_attr "type" "sselog")
6541 (set_attr "prefix_data16" "1")
6542 (set_attr "mode" "TI")])
6544 (define_insn "*avx_punpckhdq"
6545 [(set (match_operand:V4SI 0 "register_operand" "=x")
6548 (match_operand:V4SI 1 "register_operand" "x")
6549 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6550 (parallel [(const_int 2) (const_int 6)
6551 (const_int 3) (const_int 7)])))]
6553 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6554 [(set_attr "type" "sselog")
6555 (set_attr "prefix" "vex")
6556 (set_attr "mode" "TI")])
6558 (define_insn "sse2_punpckhdq"
6559 [(set (match_operand:V4SI 0 "register_operand" "=x")
6562 (match_operand:V4SI 1 "register_operand" "0")
6563 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6564 (parallel [(const_int 2) (const_int 6)
6565 (const_int 3) (const_int 7)])))]
6567 "punpckhdq\t{%2, %0|%0, %2}"
6568 [(set_attr "type" "sselog")
6569 (set_attr "prefix_data16" "1")
6570 (set_attr "mode" "TI")])
6572 (define_insn "*avx_punpckldq"
6573 [(set (match_operand:V4SI 0 "register_operand" "=x")
6576 (match_operand:V4SI 1 "register_operand" "x")
6577 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6578 (parallel [(const_int 0) (const_int 4)
6579 (const_int 1) (const_int 5)])))]
6581 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6582 [(set_attr "type" "sselog")
6583 (set_attr "prefix" "vex")
6584 (set_attr "mode" "TI")])
6586 (define_insn "sse2_punpckldq"
6587 [(set (match_operand:V4SI 0 "register_operand" "=x")
6590 (match_operand:V4SI 1 "register_operand" "0")
6591 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6592 (parallel [(const_int 0) (const_int 4)
6593 (const_int 1) (const_int 5)])))]
6595 "punpckldq\t{%2, %0|%0, %2}"
6596 [(set_attr "type" "sselog")
6597 (set_attr "prefix_data16" "1")
6598 (set_attr "mode" "TI")])
6600 (define_insn "*avx_pinsr<avxmodesuffixs>"
6601 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6602 (vec_merge:SSEMODE124
6603 (vec_duplicate:SSEMODE124
6604 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6605 (match_operand:SSEMODE124 1 "register_operand" "x")
6606 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6609 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6610 if (MEM_P (operands[2]))
6611 return "vpinsr<avxmodesuffixs>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6613 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6615 [(set_attr "type" "sselog")
6616 (set_attr "prefix" "vex")
6617 (set_attr "mode" "TI")])
6619 (define_insn "*sse4_1_pinsrb"
6620 [(set (match_operand:V16QI 0 "register_operand" "=x")
6622 (vec_duplicate:V16QI
6623 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6624 (match_operand:V16QI 1 "register_operand" "0")
6625 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6628 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6629 if (MEM_P (operands[2]))
6630 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6632 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6634 [(set_attr "type" "sselog")
6635 (set_attr "prefix_extra" "1")
6636 (set_attr "mode" "TI")])
6638 (define_insn "*sse2_pinsrw"
6639 [(set (match_operand:V8HI 0 "register_operand" "=x")
6642 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6643 (match_operand:V8HI 1 "register_operand" "0")
6644 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6647 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6648 if (MEM_P (operands[2]))
6649 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6651 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6653 [(set_attr "type" "sselog")
6654 (set_attr "prefix_data16" "1")
6655 (set_attr "mode" "TI")])
6657 ;; It must come before sse2_loadld since it is preferred.
6658 (define_insn "*sse4_1_pinsrd"
6659 [(set (match_operand:V4SI 0 "register_operand" "=x")
6662 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6663 (match_operand:V4SI 1 "register_operand" "0")
6664 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6667 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6668 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "mode" "TI")])
6674 (define_insn "*avx_pinsrq"
6675 [(set (match_operand:V2DI 0 "register_operand" "=x")
6678 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6679 (match_operand:V2DI 1 "register_operand" "x")
6680 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6681 "TARGET_AVX && TARGET_64BIT"
6683 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6684 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix" "vex")
6688 (set_attr "mode" "TI")])
6690 (define_insn "*sse4_1_pinsrq"
6691 [(set (match_operand:V2DI 0 "register_operand" "=x")
6694 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6695 (match_operand:V2DI 1 "register_operand" "0")
6696 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6697 "TARGET_SSE4_1 && TARGET_64BIT"
6699 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6700 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6702 [(set_attr "type" "sselog")
6703 (set_attr "prefix_extra" "1")
6704 (set_attr "mode" "TI")])
6706 (define_insn "*sse4_1_pextrb"
6707 [(set (match_operand:SI 0 "register_operand" "=r")
6710 (match_operand:V16QI 1 "register_operand" "x")
6711 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6713 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6714 [(set_attr "type" "sselog")
6715 (set_attr "prefix_extra" "1")
6716 (set_attr "prefix" "maybe_vex")
6717 (set_attr "mode" "TI")])
6719 (define_insn "*sse4_1_pextrb_memory"
6720 [(set (match_operand:QI 0 "memory_operand" "=m")
6722 (match_operand:V16QI 1 "register_operand" "x")
6723 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6725 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6726 [(set_attr "type" "sselog")
6727 (set_attr "prefix_extra" "1")
6728 (set_attr "prefix" "maybe_vex")
6729 (set_attr "mode" "TI")])
6731 (define_insn "*sse2_pextrw"
6732 [(set (match_operand:SI 0 "register_operand" "=r")
6735 (match_operand:V8HI 1 "register_operand" "x")
6736 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6738 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6739 [(set_attr "type" "sselog")
6740 (set_attr "prefix_data16" "1")
6741 (set_attr "prefix" "maybe_vex")
6742 (set_attr "mode" "TI")])
6744 (define_insn "*sse4_1_pextrw_memory"
6745 [(set (match_operand:HI 0 "memory_operand" "=m")
6747 (match_operand:V8HI 1 "register_operand" "x")
6748 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6750 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6751 [(set_attr "type" "sselog")
6752 (set_attr "prefix_extra" "1")
6753 (set_attr "prefix" "maybe_vex")
6754 (set_attr "mode" "TI")])
6756 (define_insn "*sse4_1_pextrd"
6757 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6759 (match_operand:V4SI 1 "register_operand" "x")
6760 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6762 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6763 [(set_attr "type" "sselog")
6764 (set_attr "prefix_extra" "1")
6765 (set_attr "prefix" "maybe_vex")
6766 (set_attr "mode" "TI")])
6768 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6769 (define_insn "*sse4_1_pextrq"
6770 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6772 (match_operand:V2DI 1 "register_operand" "x")
6773 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6774 "TARGET_SSE4_1 && TARGET_64BIT"
6775 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6776 [(set_attr "type" "sselog")
6777 (set_attr "prefix_extra" "1")
6778 (set_attr "prefix" "maybe_vex")
6779 (set_attr "mode" "TI")])
6781 (define_expand "sse2_pshufd"
6782 [(match_operand:V4SI 0 "register_operand" "")
6783 (match_operand:V4SI 1 "nonimmediate_operand" "")
6784 (match_operand:SI 2 "const_int_operand" "")]
6787 int mask = INTVAL (operands[2]);
6788 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6789 GEN_INT ((mask >> 0) & 3),
6790 GEN_INT ((mask >> 2) & 3),
6791 GEN_INT ((mask >> 4) & 3),
6792 GEN_INT ((mask >> 6) & 3)));
6796 (define_insn "sse2_pshufd_1"
6797 [(set (match_operand:V4SI 0 "register_operand" "=x")
6799 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6800 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6801 (match_operand 3 "const_0_to_3_operand" "")
6802 (match_operand 4 "const_0_to_3_operand" "")
6803 (match_operand 5 "const_0_to_3_operand" "")])))]
6807 mask |= INTVAL (operands[2]) << 0;
6808 mask |= INTVAL (operands[3]) << 2;
6809 mask |= INTVAL (operands[4]) << 4;
6810 mask |= INTVAL (operands[5]) << 6;
6811 operands[2] = GEN_INT (mask);
6813 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6815 [(set_attr "type" "sselog1")
6816 (set_attr "prefix_data16" "1")
6817 (set_attr "prefix" "vex")
6818 (set_attr "mode" "TI")])
6820 (define_expand "sse2_pshuflw"
6821 [(match_operand:V8HI 0 "register_operand" "")
6822 (match_operand:V8HI 1 "nonimmediate_operand" "")
6823 (match_operand:SI 2 "const_int_operand" "")]
6826 int mask = INTVAL (operands[2]);
6827 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6828 GEN_INT ((mask >> 0) & 3),
6829 GEN_INT ((mask >> 2) & 3),
6830 GEN_INT ((mask >> 4) & 3),
6831 GEN_INT ((mask >> 6) & 3)));
6835 (define_insn "sse2_pshuflw_1"
6836 [(set (match_operand:V8HI 0 "register_operand" "=x")
6838 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6839 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6840 (match_operand 3 "const_0_to_3_operand" "")
6841 (match_operand 4 "const_0_to_3_operand" "")
6842 (match_operand 5 "const_0_to_3_operand" "")
6850 mask |= INTVAL (operands[2]) << 0;
6851 mask |= INTVAL (operands[3]) << 2;
6852 mask |= INTVAL (operands[4]) << 4;
6853 mask |= INTVAL (operands[5]) << 6;
6854 operands[2] = GEN_INT (mask);
6856 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6858 [(set_attr "type" "sselog")
6859 (set_attr "prefix_rep" "1")
6860 (set_attr "prefix" "maybe_vex")
6861 (set_attr "mode" "TI")])
6863 (define_expand "sse2_pshufhw"
6864 [(match_operand:V8HI 0 "register_operand" "")
6865 (match_operand:V8HI 1 "nonimmediate_operand" "")
6866 (match_operand:SI 2 "const_int_operand" "")]
6869 int mask = INTVAL (operands[2]);
6870 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6871 GEN_INT (((mask >> 0) & 3) + 4),
6872 GEN_INT (((mask >> 2) & 3) + 4),
6873 GEN_INT (((mask >> 4) & 3) + 4),
6874 GEN_INT (((mask >> 6) & 3) + 4)));
6878 (define_insn "sse2_pshufhw_1"
6879 [(set (match_operand:V8HI 0 "register_operand" "=x")
6881 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6882 (parallel [(const_int 0)
6886 (match_operand 2 "const_4_to_7_operand" "")
6887 (match_operand 3 "const_4_to_7_operand" "")
6888 (match_operand 4 "const_4_to_7_operand" "")
6889 (match_operand 5 "const_4_to_7_operand" "")])))]
6893 mask |= (INTVAL (operands[2]) - 4) << 0;
6894 mask |= (INTVAL (operands[3]) - 4) << 2;
6895 mask |= (INTVAL (operands[4]) - 4) << 4;
6896 mask |= (INTVAL (operands[5]) - 4) << 6;
6897 operands[2] = GEN_INT (mask);
6899 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6901 [(set_attr "type" "sselog")
6902 (set_attr "prefix_rep" "1")
6903 (set_attr "prefix" "maybe_vex")
6904 (set_attr "mode" "TI")])
6906 (define_expand "sse2_loadd"
6907 [(set (match_operand:V4SI 0 "register_operand" "")
6910 (match_operand:SI 1 "nonimmediate_operand" ""))
6914 "operands[2] = CONST0_RTX (V4SImode);")
6916 (define_insn "*avx_loadld"
6917 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6920 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6921 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6925 vmovd\t{%2, %0|%0, %2}
6926 vmovd\t{%2, %0|%0, %2}
6927 vmovss\t{%2, %1, %0|%0, %1, %2}"
6928 [(set_attr "type" "ssemov")
6929 (set_attr "prefix" "vex")
6930 (set_attr "mode" "TI,TI,V4SF")])
6932 (define_insn "sse2_loadld"
6933 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6936 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6937 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6941 movd\t{%2, %0|%0, %2}
6942 movd\t{%2, %0|%0, %2}
6943 movss\t{%2, %0|%0, %2}
6944 movss\t{%2, %0|%0, %2}"
6945 [(set_attr "type" "ssemov")
6946 (set_attr "mode" "TI,TI,V4SF,SF")])
6948 (define_insn_and_split "sse2_stored"
6949 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6951 (match_operand:V4SI 1 "register_operand" "x,Yi")
6952 (parallel [(const_int 0)])))]
6955 "&& reload_completed
6956 && (TARGET_INTER_UNIT_MOVES
6957 || MEM_P (operands [0])
6958 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6959 [(set (match_dup 0) (match_dup 1))]
6961 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
6964 (define_insn_and_split "*vec_ext_v4si_mem"
6965 [(set (match_operand:SI 0 "register_operand" "=r")
6967 (match_operand:V4SI 1 "memory_operand" "o")
6968 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6974 int i = INTVAL (operands[2]);
6976 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6980 (define_expand "sse_storeq"
6981 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6983 (match_operand:V2DI 1 "register_operand" "")
6984 (parallel [(const_int 0)])))]
6988 (define_insn "*sse2_storeq_rex64"
6989 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6991 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6992 (parallel [(const_int 0)])))]
6993 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6997 %vmov{q}\t{%1, %0|%0, %1}"
6998 [(set_attr "type" "*,*,imov")
6999 (set_attr "prefix" "*,*,maybe_vex")
7000 (set_attr "mode" "*,*,DI")])
7002 (define_insn "*sse2_storeq"
7003 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7005 (match_operand:V2DI 1 "register_operand" "x")
7006 (parallel [(const_int 0)])))]
7011 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7013 (match_operand:V2DI 1 "register_operand" "")
7014 (parallel [(const_int 0)])))]
7017 && (TARGET_INTER_UNIT_MOVES
7018 || MEM_P (operands [0])
7019 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7020 [(set (match_dup 0) (match_dup 1))]
7022 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7025 (define_insn "*vec_extractv2di_1_rex64_avx"
7026 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7028 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7029 (parallel [(const_int 1)])))]
7032 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7034 vmovhps\t{%1, %0|%0, %1}
7035 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7036 vmovq\t{%H1, %0|%0, %H1}
7037 vmov{q}\t{%H1, %0|%0, %H1}"
7038 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7039 (set_attr "memory" "*,none,*,*")
7040 (set_attr "prefix" "vex")
7041 (set_attr "mode" "V2SF,TI,TI,DI")])
7043 (define_insn "*vec_extractv2di_1_rex64"
7044 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7046 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7047 (parallel [(const_int 1)])))]
7048 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7050 movhps\t{%1, %0|%0, %1}
7051 psrldq\t{$8, %0|%0, 8}
7052 movq\t{%H1, %0|%0, %H1}
7053 mov{q}\t{%H1, %0|%0, %H1}"
7054 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7055 (set_attr "memory" "*,none,*,*")
7056 (set_attr "mode" "V2SF,TI,TI,DI")])
7058 (define_insn "*vec_extractv2di_1_avx"
7059 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7061 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7062 (parallel [(const_int 1)])))]
7065 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7067 vmovhps\t{%1, %0|%0, %1}
7068 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7069 vmovq\t{%H1, %0|%0, %H1}"
7070 [(set_attr "type" "ssemov,sseishft,ssemov")
7071 (set_attr "memory" "*,none,*")
7072 (set_attr "prefix" "vex")
7073 (set_attr "mode" "V2SF,TI,TI")])
7075 (define_insn "*vec_extractv2di_1_sse2"
7076 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7078 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7079 (parallel [(const_int 1)])))]
7081 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7083 movhps\t{%1, %0|%0, %1}
7084 psrldq\t{$8, %0|%0, 8}
7085 movq\t{%H1, %0|%0, %H1}"
7086 [(set_attr "type" "ssemov,sseishft,ssemov")
7087 (set_attr "memory" "*,none,*")
7088 (set_attr "mode" "V2SF,TI,TI")])
7090 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7091 (define_insn "*vec_extractv2di_1_sse"
7092 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7094 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7095 (parallel [(const_int 1)])))]
7096 "!TARGET_SSE2 && TARGET_SSE
7097 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7099 movhps\t{%1, %0|%0, %1}
7100 movhlps\t{%1, %0|%0, %1}
7101 movlps\t{%H1, %0|%0, %H1}"
7102 [(set_attr "type" "ssemov")
7103 (set_attr "mode" "V2SF,V4SF,V2SF")])
7105 (define_insn "*vec_dupv4si"
7106 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7108 (match_operand:SI 1 "register_operand" " Y2,0")))]
7111 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7112 shufps\t{$0, %0, %0|%0, %0, 0}"
7113 [(set_attr "type" "sselog1")
7114 (set_attr "prefix" "maybe_vex,orig")
7115 (set_attr "mode" "TI,V4SF")])
7117 (define_insn "*vec_dupv2di_avx"
7118 [(set (match_operand:V2DI 0 "register_operand" "=x")
7120 (match_operand:DI 1 "register_operand" "x")))]
7122 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7123 [(set_attr "type" "sselog1")
7124 (set_attr "prefix" "vex")
7125 (set_attr "mode" "TI")])
7127 (define_insn "*vec_dupv2di"
7128 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7130 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7135 [(set_attr "type" "sselog1,ssemov")
7136 (set_attr "mode" "TI,V4SF")])
7138 (define_insn "*vec_concatv2si_avx"
7139 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7141 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7142 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7145 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7146 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7147 vmovd\t{%1, %0|%0, %1}
7148 punpckldq\t{%2, %0|%0, %2}
7149 movd\t{%1, %0|%0, %1}"
7150 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7151 (set (attr "prefix")
7152 (if_then_else (eq_attr "alternative" "3,4")
7153 (const_string "orig")
7154 (const_string "vex")))
7155 (set_attr "mode" "TI,TI,TI,DI,DI")])
7157 (define_insn "*vec_concatv2si_sse4_1"
7158 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7160 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7161 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7164 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7165 punpckldq\t{%2, %0|%0, %2}
7166 movd\t{%1, %0|%0, %1}
7167 punpckldq\t{%2, %0|%0, %2}
7168 movd\t{%1, %0|%0, %1}"
7169 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7170 (set_attr "prefix_extra" "1,*,*,*,*")
7171 (set_attr "mode" "TI,TI,TI,DI,DI")])
7173 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7174 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7175 ;; alternatives pretty much forces the MMX alternative to be chosen.
7176 (define_insn "*vec_concatv2si_sse2"
7177 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7179 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7180 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7183 punpckldq\t{%2, %0|%0, %2}
7184 movd\t{%1, %0|%0, %1}
7185 punpckldq\t{%2, %0|%0, %2}
7186 movd\t{%1, %0|%0, %1}"
7187 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7188 (set_attr "mode" "TI,TI,DI,DI")])
7190 (define_insn "*vec_concatv2si_sse"
7191 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7193 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7194 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7197 unpcklps\t{%2, %0|%0, %2}
7198 movss\t{%1, %0|%0, %1}
7199 punpckldq\t{%2, %0|%0, %2}
7200 movd\t{%1, %0|%0, %1}"
7201 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7202 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7204 (define_insn "*vec_concatv4si_1_avx"
7205 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7207 (match_operand:V2SI 1 "register_operand" " x,x")
7208 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7211 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7212 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7213 [(set_attr "type" "sselog,ssemov")
7214 (set_attr "prefix" "vex")
7215 (set_attr "mode" "TI,V2SF")])
7217 (define_insn "*vec_concatv4si_1"
7218 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7220 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7221 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7224 punpcklqdq\t{%2, %0|%0, %2}
7225 movlhps\t{%2, %0|%0, %2}
7226 movhps\t{%2, %0|%0, %2}"
7227 [(set_attr "type" "sselog,ssemov,ssemov")
7228 (set_attr "mode" "TI,V4SF,V2SF")])
7230 (define_insn "*vec_concatv2di_avx"
7231 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7233 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7234 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7235 "!TARGET_64BIT && TARGET_AVX"
7237 vmovq\t{%1, %0|%0, %1}
7238 movq2dq\t{%1, %0|%0, %1}
7239 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7240 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7241 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7242 (set (attr "prefix")
7243 (if_then_else (eq_attr "alternative" "1")
7244 (const_string "orig")
7245 (const_string "vex")))
7246 (set_attr "mode" "TI,TI,TI,V2SF")])
7248 (define_insn "vec_concatv2di"
7249 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7251 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7252 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7253 "!TARGET_64BIT && TARGET_SSE"
7255 movq\t{%1, %0|%0, %1}
7256 movq2dq\t{%1, %0|%0, %1}
7257 punpcklqdq\t{%2, %0|%0, %2}
7258 movlhps\t{%2, %0|%0, %2}
7259 movhps\t{%2, %0|%0, %2}"
7260 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7261 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7263 (define_insn "*vec_concatv2di_rex64_avx"
7264 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7266 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7267 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7268 "TARGET_64BIT && TARGET_AVX"
7270 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7271 vmovq\t{%1, %0|%0, %1}
7272 vmovq\t{%1, %0|%0, %1}
7273 movq2dq\t{%1, %0|%0, %1}
7274 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7275 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7276 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7277 (set (attr "prefix")
7278 (if_then_else (eq_attr "alternative" "3")
7279 (const_string "orig")
7280 (const_string "vex")))
7281 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7283 (define_insn "*vec_concatv2di_rex64_sse4_1"
7284 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7286 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7287 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7288 "TARGET_64BIT && TARGET_SSE4_1"
7290 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7291 movq\t{%1, %0|%0, %1}
7292 movq\t{%1, %0|%0, %1}
7293 movq2dq\t{%1, %0|%0, %1}
7294 punpcklqdq\t{%2, %0|%0, %2}
7295 movlhps\t{%2, %0|%0, %2}
7296 movhps\t{%2, %0|%0, %2}"
7297 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7298 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7299 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7301 (define_insn "*vec_concatv2di_rex64_sse"
7302 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7304 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7305 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7306 "TARGET_64BIT && TARGET_SSE"
7308 movq\t{%1, %0|%0, %1}
7309 movq\t{%1, %0|%0, %1}
7310 movq2dq\t{%1, %0|%0, %1}
7311 punpcklqdq\t{%2, %0|%0, %2}
7312 movlhps\t{%2, %0|%0, %2}
7313 movhps\t{%2, %0|%0, %2}"
7314 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7315 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7317 (define_expand "vec_unpacku_hi_v16qi"
7318 [(match_operand:V8HI 0 "register_operand" "")
7319 (match_operand:V16QI 1 "register_operand" "")]
7323 ix86_expand_sse4_unpack (operands, true, true);
7324 else if (TARGET_SSE5)
7325 ix86_expand_sse5_unpack (operands, true, true);
7327 ix86_expand_sse_unpack (operands, true, true);
7331 (define_expand "vec_unpacks_hi_v16qi"
7332 [(match_operand:V8HI 0 "register_operand" "")
7333 (match_operand:V16QI 1 "register_operand" "")]
7337 ix86_expand_sse4_unpack (operands, false, true);
7338 else if (TARGET_SSE5)
7339 ix86_expand_sse5_unpack (operands, false, true);
7341 ix86_expand_sse_unpack (operands, false, true);
7345 (define_expand "vec_unpacku_lo_v16qi"
7346 [(match_operand:V8HI 0 "register_operand" "")
7347 (match_operand:V16QI 1 "register_operand" "")]
7351 ix86_expand_sse4_unpack (operands, true, false);
7352 else if (TARGET_SSE5)
7353 ix86_expand_sse5_unpack (operands, true, false);
7355 ix86_expand_sse_unpack (operands, true, false);
7359 (define_expand "vec_unpacks_lo_v16qi"
7360 [(match_operand:V8HI 0 "register_operand" "")
7361 (match_operand:V16QI 1 "register_operand" "")]
7365 ix86_expand_sse4_unpack (operands, false, false);
7366 else if (TARGET_SSE5)
7367 ix86_expand_sse5_unpack (operands, false, false);
7369 ix86_expand_sse_unpack (operands, false, false);
7373 (define_expand "vec_unpacku_hi_v8hi"
7374 [(match_operand:V4SI 0 "register_operand" "")
7375 (match_operand:V8HI 1 "register_operand" "")]
7379 ix86_expand_sse4_unpack (operands, true, true);
7380 else if (TARGET_SSE5)
7381 ix86_expand_sse5_unpack (operands, true, true);
7383 ix86_expand_sse_unpack (operands, true, true);
7387 (define_expand "vec_unpacks_hi_v8hi"
7388 [(match_operand:V4SI 0 "register_operand" "")
7389 (match_operand:V8HI 1 "register_operand" "")]
7393 ix86_expand_sse4_unpack (operands, false, true);
7394 else if (TARGET_SSE5)
7395 ix86_expand_sse5_unpack (operands, false, true);
7397 ix86_expand_sse_unpack (operands, false, true);
7401 (define_expand "vec_unpacku_lo_v8hi"
7402 [(match_operand:V4SI 0 "register_operand" "")
7403 (match_operand:V8HI 1 "register_operand" "")]
7407 ix86_expand_sse4_unpack (operands, true, false);
7408 else if (TARGET_SSE5)
7409 ix86_expand_sse5_unpack (operands, true, false);
7411 ix86_expand_sse_unpack (operands, true, false);
7415 (define_expand "vec_unpacks_lo_v8hi"
7416 [(match_operand:V4SI 0 "register_operand" "")
7417 (match_operand:V8HI 1 "register_operand" "")]
7421 ix86_expand_sse4_unpack (operands, false, false);
7422 else if (TARGET_SSE5)
7423 ix86_expand_sse5_unpack (operands, false, false);
7425 ix86_expand_sse_unpack (operands, false, false);
7429 (define_expand "vec_unpacku_hi_v4si"
7430 [(match_operand:V2DI 0 "register_operand" "")
7431 (match_operand:V4SI 1 "register_operand" "")]
7435 ix86_expand_sse4_unpack (operands, true, true);
7436 else if (TARGET_SSE5)
7437 ix86_expand_sse5_unpack (operands, true, true);
7439 ix86_expand_sse_unpack (operands, true, true);
7443 (define_expand "vec_unpacks_hi_v4si"
7444 [(match_operand:V2DI 0 "register_operand" "")
7445 (match_operand:V4SI 1 "register_operand" "")]
7449 ix86_expand_sse4_unpack (operands, false, true);
7450 else if (TARGET_SSE5)
7451 ix86_expand_sse5_unpack (operands, false, true);
7453 ix86_expand_sse_unpack (operands, false, true);
7457 (define_expand "vec_unpacku_lo_v4si"
7458 [(match_operand:V2DI 0 "register_operand" "")
7459 (match_operand:V4SI 1 "register_operand" "")]
7463 ix86_expand_sse4_unpack (operands, true, false);
7464 else if (TARGET_SSE5)
7465 ix86_expand_sse5_unpack (operands, true, false);
7467 ix86_expand_sse_unpack (operands, true, false);
7471 (define_expand "vec_unpacks_lo_v4si"
7472 [(match_operand:V2DI 0 "register_operand" "")
7473 (match_operand:V4SI 1 "register_operand" "")]
7477 ix86_expand_sse4_unpack (operands, false, false);
7478 else if (TARGET_SSE5)
7479 ix86_expand_sse5_unpack (operands, false, false);
7481 ix86_expand_sse_unpack (operands, false, false);
7485 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7489 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7491 (define_expand "sse2_uavgv16qi3"
7492 [(set (match_operand:V16QI 0 "register_operand" "")
7498 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7500 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7501 (const_vector:V16QI [(const_int 1) (const_int 1)
7502 (const_int 1) (const_int 1)
7503 (const_int 1) (const_int 1)
7504 (const_int 1) (const_int 1)
7505 (const_int 1) (const_int 1)
7506 (const_int 1) (const_int 1)
7507 (const_int 1) (const_int 1)
7508 (const_int 1) (const_int 1)]))
7511 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7513 (define_insn "*avx_uavgv16qi3"
7514 [(set (match_operand:V16QI 0 "register_operand" "=x")
7520 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7522 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7523 (const_vector:V16QI [(const_int 1) (const_int 1)
7524 (const_int 1) (const_int 1)
7525 (const_int 1) (const_int 1)
7526 (const_int 1) (const_int 1)
7527 (const_int 1) (const_int 1)
7528 (const_int 1) (const_int 1)
7529 (const_int 1) (const_int 1)
7530 (const_int 1) (const_int 1)]))
7532 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7533 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7534 [(set_attr "type" "sseiadd")
7535 (set_attr "prefix" "vex")
7536 (set_attr "mode" "TI")])
7538 (define_insn "*sse2_uavgv16qi3"
7539 [(set (match_operand:V16QI 0 "register_operand" "=x")
7545 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7547 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7548 (const_vector:V16QI [(const_int 1) (const_int 1)
7549 (const_int 1) (const_int 1)
7550 (const_int 1) (const_int 1)
7551 (const_int 1) (const_int 1)
7552 (const_int 1) (const_int 1)
7553 (const_int 1) (const_int 1)
7554 (const_int 1) (const_int 1)
7555 (const_int 1) (const_int 1)]))
7557 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7558 "pavgb\t{%2, %0|%0, %2}"
7559 [(set_attr "type" "sseiadd")
7560 (set_attr "prefix_data16" "1")
7561 (set_attr "mode" "TI")])
7563 (define_expand "sse2_uavgv8hi3"
7564 [(set (match_operand:V8HI 0 "register_operand" "")
7570 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7572 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7573 (const_vector:V8HI [(const_int 1) (const_int 1)
7574 (const_int 1) (const_int 1)
7575 (const_int 1) (const_int 1)
7576 (const_int 1) (const_int 1)]))
7579 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7581 (define_insn "*avx_uavgv8hi3"
7582 [(set (match_operand:V8HI 0 "register_operand" "=x")
7588 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7590 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7591 (const_vector:V8HI [(const_int 1) (const_int 1)
7592 (const_int 1) (const_int 1)
7593 (const_int 1) (const_int 1)
7594 (const_int 1) (const_int 1)]))
7596 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7597 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7598 [(set_attr "type" "sseiadd")
7599 (set_attr "prefix" "vex")
7600 (set_attr "mode" "TI")])
7602 (define_insn "*sse2_uavgv8hi3"
7603 [(set (match_operand:V8HI 0 "register_operand" "=x")
7609 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7611 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7612 (const_vector:V8HI [(const_int 1) (const_int 1)
7613 (const_int 1) (const_int 1)
7614 (const_int 1) (const_int 1)
7615 (const_int 1) (const_int 1)]))
7617 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7618 "pavgw\t{%2, %0|%0, %2}"
7619 [(set_attr "type" "sseiadd")
7620 (set_attr "prefix_data16" "1")
7621 (set_attr "mode" "TI")])
7623 ;; The correct representation for this is absolutely enormous, and
7624 ;; surely not generally useful.
7625 (define_insn "*avx_psadbw"
7626 [(set (match_operand:V2DI 0 "register_operand" "=x")
7627 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7628 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7631 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7632 [(set_attr "type" "sseiadd")
7633 (set_attr "prefix" "vex")
7634 (set_attr "mode" "TI")])
7636 (define_insn "sse2_psadbw"
7637 [(set (match_operand:V2DI 0 "register_operand" "=x")
7638 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7639 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7642 "psadbw\t{%2, %0|%0, %2}"
7643 [(set_attr "type" "sseiadd")
7644 (set_attr "prefix_data16" "1")
7645 (set_attr "mode" "TI")])
7647 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7648 [(set (match_operand:SI 0 "register_operand" "=r")
7650 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7652 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7653 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7654 [(set_attr "type" "ssecvt")
7655 (set_attr "prefix" "vex")
7656 (set_attr "mode" "<MODE>")])
7658 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7659 [(set (match_operand:SI 0 "register_operand" "=r")
7661 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7663 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7664 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7665 [(set_attr "type" "ssecvt")
7666 (set_attr "prefix" "maybe_vex")
7667 (set_attr "mode" "<MODE>")])
7669 (define_insn "sse2_pmovmskb"
7670 [(set (match_operand:SI 0 "register_operand" "=r")
7671 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7674 "%vpmovmskb\t{%1, %0|%0, %1}"
7675 [(set_attr "type" "ssecvt")
7676 (set_attr "prefix_data16" "1")
7677 (set_attr "prefix" "maybe_vex")
7678 (set_attr "mode" "SI")])
7680 (define_expand "sse2_maskmovdqu"
7681 [(set (match_operand:V16QI 0 "memory_operand" "")
7682 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7683 (match_operand:V16QI 2 "register_operand" "")
7689 (define_insn "*sse2_maskmovdqu"
7690 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7691 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7692 (match_operand:V16QI 2 "register_operand" "x")
7693 (mem:V16QI (match_dup 0))]
7695 "TARGET_SSE2 && !TARGET_64BIT"
7696 ;; @@@ check ordering of operands in intel/nonintel syntax
7697 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7698 [(set_attr "type" "ssecvt")
7699 (set_attr "prefix_data16" "1")
7700 (set_attr "prefix" "maybe_vex")
7701 (set_attr "mode" "TI")])
7703 (define_insn "*sse2_maskmovdqu_rex64"
7704 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7705 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7706 (match_operand:V16QI 2 "register_operand" "x")
7707 (mem:V16QI (match_dup 0))]
7709 "TARGET_SSE2 && TARGET_64BIT"
7710 ;; @@@ check ordering of operands in intel/nonintel syntax
7711 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7712 [(set_attr "type" "ssecvt")
7713 (set_attr "prefix_data16" "1")
7714 (set_attr "prefix" "maybe_vex")
7715 (set_attr "mode" "TI")])
7717 (define_insn "sse_ldmxcsr"
7718 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7722 [(set_attr "type" "sse")
7723 (set_attr "prefix" "maybe_vex")
7724 (set_attr "memory" "load")])
7726 (define_insn "sse_stmxcsr"
7727 [(set (match_operand:SI 0 "memory_operand" "=m")
7728 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7731 [(set_attr "type" "sse")
7732 (set_attr "prefix" "maybe_vex")
7733 (set_attr "memory" "store")])
7735 (define_expand "sse_sfence"
7737 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7738 "TARGET_SSE || TARGET_3DNOW_A"
7740 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7741 MEM_VOLATILE_P (operands[0]) = 1;
7744 (define_insn "*sse_sfence"
7745 [(set (match_operand:BLK 0 "" "")
7746 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7747 "TARGET_SSE || TARGET_3DNOW_A"
7749 [(set_attr "type" "sse")
7750 (set_attr "memory" "unknown")])
7752 (define_insn "sse2_clflush"
7753 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7757 [(set_attr "type" "sse")
7758 (set_attr "memory" "unknown")])
7760 (define_expand "sse2_mfence"
7762 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7765 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7766 MEM_VOLATILE_P (operands[0]) = 1;
7769 (define_insn "*sse2_mfence"
7770 [(set (match_operand:BLK 0 "" "")
7771 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7772 "TARGET_64BIT || TARGET_SSE2"
7774 [(set_attr "type" "sse")
7775 (set_attr "memory" "unknown")])
7777 (define_expand "sse2_lfence"
7779 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7782 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7783 MEM_VOLATILE_P (operands[0]) = 1;
7786 (define_insn "*sse2_lfence"
7787 [(set (match_operand:BLK 0 "" "")
7788 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7791 [(set_attr "type" "sse")
7792 (set_attr "memory" "unknown")])
7794 (define_insn "sse3_mwait"
7795 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7796 (match_operand:SI 1 "register_operand" "c")]
7799 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7800 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7801 ;; we only need to set up 32bit registers.
7803 [(set_attr "length" "3")])
7805 (define_insn "sse3_monitor"
7806 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7807 (match_operand:SI 1 "register_operand" "c")
7808 (match_operand:SI 2 "register_operand" "d")]
7810 "TARGET_SSE3 && !TARGET_64BIT"
7811 "monitor\t%0, %1, %2"
7812 [(set_attr "length" "3")])
7814 (define_insn "sse3_monitor64"
7815 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7816 (match_operand:SI 1 "register_operand" "c")
7817 (match_operand:SI 2 "register_operand" "d")]
7819 "TARGET_SSE3 && TARGET_64BIT"
7820 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7821 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7822 ;; zero extended to 64bit, we only need to set up 32bit registers.
7824 [(set_attr "length" "3")])
7826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7828 ;; SSSE3 instructions
7830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7832 (define_insn "*avx_phaddwv8hi3"
7833 [(set (match_operand:V8HI 0 "register_operand" "=x")
7839 (match_operand:V8HI 1 "register_operand" "x")
7840 (parallel [(const_int 0)]))
7841 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7843 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7844 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7847 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7848 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7850 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7851 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7856 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7857 (parallel [(const_int 0)]))
7858 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7860 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7861 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7864 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7865 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7867 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7868 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7870 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7871 [(set_attr "type" "sseiadd")
7872 (set_attr "prefix" "vex")
7873 (set_attr "mode" "TI")])
7875 (define_insn "ssse3_phaddwv8hi3"
7876 [(set (match_operand:V8HI 0 "register_operand" "=x")
7882 (match_operand:V8HI 1 "register_operand" "0")
7883 (parallel [(const_int 0)]))
7884 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7886 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7887 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7890 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7891 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7893 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7894 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7899 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7900 (parallel [(const_int 0)]))
7901 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7903 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7904 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7907 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7908 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7910 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7911 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7913 "phaddw\t{%2, %0|%0, %2}"
7914 [(set_attr "type" "sseiadd")
7915 (set_attr "prefix_data16" "1")
7916 (set_attr "prefix_extra" "1")
7917 (set_attr "mode" "TI")])
7919 (define_insn "ssse3_phaddwv4hi3"
7920 [(set (match_operand:V4HI 0 "register_operand" "=y")
7925 (match_operand:V4HI 1 "register_operand" "0")
7926 (parallel [(const_int 0)]))
7927 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7929 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7930 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7934 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7935 (parallel [(const_int 0)]))
7936 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7938 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7939 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7941 "phaddw\t{%2, %0|%0, %2}"
7942 [(set_attr "type" "sseiadd")
7943 (set_attr "prefix_extra" "1")
7944 (set_attr "mode" "DI")])
7946 (define_insn "*avx_phadddv4si3"
7947 [(set (match_operand:V4SI 0 "register_operand" "=x")
7952 (match_operand:V4SI 1 "register_operand" "x")
7953 (parallel [(const_int 0)]))
7954 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7956 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7957 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7961 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7962 (parallel [(const_int 0)]))
7963 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7965 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7966 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7968 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7969 [(set_attr "type" "sseiadd")
7970 (set_attr "prefix" "vex")
7971 (set_attr "mode" "TI")])
7973 (define_insn "ssse3_phadddv4si3"
7974 [(set (match_operand:V4SI 0 "register_operand" "=x")
7979 (match_operand:V4SI 1 "register_operand" "0")
7980 (parallel [(const_int 0)]))
7981 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7983 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7984 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7988 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7989 (parallel [(const_int 0)]))
7990 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7992 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7993 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7995 "phaddd\t{%2, %0|%0, %2}"
7996 [(set_attr "type" "sseiadd")
7997 (set_attr "prefix_data16" "1")
7998 (set_attr "prefix_extra" "1")
7999 (set_attr "mode" "TI")])
8001 (define_insn "ssse3_phadddv2si3"
8002 [(set (match_operand:V2SI 0 "register_operand" "=y")
8006 (match_operand:V2SI 1 "register_operand" "0")
8007 (parallel [(const_int 0)]))
8008 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8011 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8012 (parallel [(const_int 0)]))
8013 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8015 "phaddd\t{%2, %0|%0, %2}"
8016 [(set_attr "type" "sseiadd")
8017 (set_attr "prefix_extra" "1")
8018 (set_attr "mode" "DI")])
8020 (define_insn "*avx_phaddswv8hi3"
8021 [(set (match_operand:V8HI 0 "register_operand" "=x")
8027 (match_operand:V8HI 1 "register_operand" "x")
8028 (parallel [(const_int 0)]))
8029 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8031 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8032 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8035 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8036 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8038 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8039 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8044 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8045 (parallel [(const_int 0)]))
8046 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8048 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8049 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8052 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8053 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8055 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8056 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8058 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8059 [(set_attr "type" "sseiadd")
8060 (set_attr "prefix" "vex")
8061 (set_attr "mode" "TI")])
8063 (define_insn "ssse3_phaddswv8hi3"
8064 [(set (match_operand:V8HI 0 "register_operand" "=x")
8070 (match_operand:V8HI 1 "register_operand" "0")
8071 (parallel [(const_int 0)]))
8072 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8074 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8075 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8078 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8079 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8081 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8082 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8087 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8088 (parallel [(const_int 0)]))
8089 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8091 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8092 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8095 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8096 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8098 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8099 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8101 "phaddsw\t{%2, %0|%0, %2}"
8102 [(set_attr "type" "sseiadd")
8103 (set_attr "prefix_data16" "1")
8104 (set_attr "prefix_extra" "1")
8105 (set_attr "mode" "TI")])
8107 (define_insn "ssse3_phaddswv4hi3"
8108 [(set (match_operand:V4HI 0 "register_operand" "=y")
8113 (match_operand:V4HI 1 "register_operand" "0")
8114 (parallel [(const_int 0)]))
8115 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8117 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8118 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8122 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8123 (parallel [(const_int 0)]))
8124 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8126 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8127 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8129 "phaddsw\t{%2, %0|%0, %2}"
8130 [(set_attr "type" "sseiadd")
8131 (set_attr "prefix_extra" "1")
8132 (set_attr "mode" "DI")])
8134 (define_insn "*avx_phsubwv8hi3"
8135 [(set (match_operand:V8HI 0 "register_operand" "=x")
8141 (match_operand:V8HI 1 "register_operand" "x")
8142 (parallel [(const_int 0)]))
8143 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8145 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8146 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8149 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8150 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8152 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8153 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8158 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8159 (parallel [(const_int 0)]))
8160 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8163 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8167 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8169 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8170 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8172 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8173 [(set_attr "type" "sseiadd")
8174 (set_attr "prefix" "vex")
8175 (set_attr "mode" "TI")])
8177 (define_insn "ssse3_phsubwv8hi3"
8178 [(set (match_operand:V8HI 0 "register_operand" "=x")
8184 (match_operand:V8HI 1 "register_operand" "0")
8185 (parallel [(const_int 0)]))
8186 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8189 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8201 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8202 (parallel [(const_int 0)]))
8203 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8205 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8206 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8209 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8210 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8212 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8215 "phsubw\t{%2, %0|%0, %2}"
8216 [(set_attr "type" "sseiadd")
8217 (set_attr "prefix_data16" "1")
8218 (set_attr "prefix_extra" "1")
8219 (set_attr "mode" "TI")])
8221 (define_insn "ssse3_phsubwv4hi3"
8222 [(set (match_operand:V4HI 0 "register_operand" "=y")
8227 (match_operand:V4HI 1 "register_operand" "0")
8228 (parallel [(const_int 0)]))
8229 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8231 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8232 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8236 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8237 (parallel [(const_int 0)]))
8238 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8240 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8241 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8243 "phsubw\t{%2, %0|%0, %2}"
8244 [(set_attr "type" "sseiadd")
8245 (set_attr "prefix_extra" "1")
8246 (set_attr "mode" "DI")])
8248 (define_insn "*avx_phsubdv4si3"
8249 [(set (match_operand:V4SI 0 "register_operand" "=x")
8254 (match_operand:V4SI 1 "register_operand" "x")
8255 (parallel [(const_int 0)]))
8256 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8258 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8259 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8263 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8264 (parallel [(const_int 0)]))
8265 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8267 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8268 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8270 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8271 [(set_attr "type" "sseiadd")
8272 (set_attr "prefix" "vex")
8273 (set_attr "mode" "TI")])
8275 (define_insn "ssse3_phsubdv4si3"
8276 [(set (match_operand:V4SI 0 "register_operand" "=x")
8281 (match_operand:V4SI 1 "register_operand" "0")
8282 (parallel [(const_int 0)]))
8283 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8285 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8286 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8290 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8291 (parallel [(const_int 0)]))
8292 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8294 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8295 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8297 "phsubd\t{%2, %0|%0, %2}"
8298 [(set_attr "type" "sseiadd")
8299 (set_attr "prefix_data16" "1")
8300 (set_attr "prefix_extra" "1")
8301 (set_attr "mode" "TI")])
8303 (define_insn "ssse3_phsubdv2si3"
8304 [(set (match_operand:V2SI 0 "register_operand" "=y")
8308 (match_operand:V2SI 1 "register_operand" "0")
8309 (parallel [(const_int 0)]))
8310 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8313 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8314 (parallel [(const_int 0)]))
8315 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8317 "phsubd\t{%2, %0|%0, %2}"
8318 [(set_attr "type" "sseiadd")
8319 (set_attr "prefix_extra" "1")
8320 (set_attr "mode" "DI")])
8322 (define_insn "*avx_phsubswv8hi3"
8323 [(set (match_operand:V8HI 0 "register_operand" "=x")
8329 (match_operand:V8HI 1 "register_operand" "x")
8330 (parallel [(const_int 0)]))
8331 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8333 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8334 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8337 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8338 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8340 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8341 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8346 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8347 (parallel [(const_int 0)]))
8348 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8350 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8355 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8357 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8358 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8360 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8361 [(set_attr "type" "sseiadd")
8362 (set_attr "prefix" "vex")
8363 (set_attr "mode" "TI")])
8365 (define_insn "ssse3_phsubswv8hi3"
8366 [(set (match_operand:V8HI 0 "register_operand" "=x")
8372 (match_operand:V8HI 1 "register_operand" "0")
8373 (parallel [(const_int 0)]))
8374 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8376 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8377 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8380 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8383 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8389 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8390 (parallel [(const_int 0)]))
8391 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8393 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8397 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8400 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8403 "phsubsw\t{%2, %0|%0, %2}"
8404 [(set_attr "type" "sseiadd")
8405 (set_attr "prefix_data16" "1")
8406 (set_attr "prefix_extra" "1")
8407 (set_attr "mode" "TI")])
8409 (define_insn "ssse3_phsubswv4hi3"
8410 [(set (match_operand:V4HI 0 "register_operand" "=y")
8415 (match_operand:V4HI 1 "register_operand" "0")
8416 (parallel [(const_int 0)]))
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8419 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8420 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8424 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8425 (parallel [(const_int 0)]))
8426 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8428 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8429 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8431 "phsubsw\t{%2, %0|%0, %2}"
8432 [(set_attr "type" "sseiadd")
8433 (set_attr "prefix_extra" "1")
8434 (set_attr "mode" "DI")])
8436 (define_insn "*avx_pmaddubsw128"
8437 [(set (match_operand:V8HI 0 "register_operand" "=x")
8442 (match_operand:V16QI 1 "register_operand" "x")
8443 (parallel [(const_int 0)
8453 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8454 (parallel [(const_int 0)
8464 (vec_select:V8QI (match_dup 1)
8465 (parallel [(const_int 1)
8474 (vec_select:V8QI (match_dup 2)
8475 (parallel [(const_int 1)
8482 (const_int 15)]))))))]
8484 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8485 [(set_attr "type" "sseiadd")
8486 (set_attr "prefix" "vex")
8487 (set_attr "mode" "TI")])
8489 (define_insn "ssse3_pmaddubsw128"
8490 [(set (match_operand:V8HI 0 "register_operand" "=x")
8495 (match_operand:V16QI 1 "register_operand" "0")
8496 (parallel [(const_int 0)
8506 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8507 (parallel [(const_int 0)
8517 (vec_select:V8QI (match_dup 1)
8518 (parallel [(const_int 1)
8527 (vec_select:V8QI (match_dup 2)
8528 (parallel [(const_int 1)
8535 (const_int 15)]))))))]
8537 "pmaddubsw\t{%2, %0|%0, %2}"
8538 [(set_attr "type" "sseiadd")
8539 (set_attr "prefix_data16" "1")
8540 (set_attr "prefix_extra" "1")
8541 (set_attr "mode" "TI")])
8543 (define_insn "ssse3_pmaddubsw"
8544 [(set (match_operand:V4HI 0 "register_operand" "=y")
8549 (match_operand:V8QI 1 "register_operand" "0")
8550 (parallel [(const_int 0)
8556 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8557 (parallel [(const_int 0)
8563 (vec_select:V4QI (match_dup 1)
8564 (parallel [(const_int 1)
8569 (vec_select:V4QI (match_dup 2)
8570 (parallel [(const_int 1)
8573 (const_int 7)]))))))]
8575 "pmaddubsw\t{%2, %0|%0, %2}"
8576 [(set_attr "type" "sseiadd")
8577 (set_attr "prefix_extra" "1")
8578 (set_attr "mode" "DI")])
8580 (define_expand "ssse3_pmulhrswv8hi3"
8581 [(set (match_operand:V8HI 0 "register_operand" "")
8588 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8590 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8592 (const_vector:V8HI [(const_int 1) (const_int 1)
8593 (const_int 1) (const_int 1)
8594 (const_int 1) (const_int 1)
8595 (const_int 1) (const_int 1)]))
8598 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8600 (define_insn "*avx_pmulhrswv8hi3"
8601 [(set (match_operand:V8HI 0 "register_operand" "=x")
8608 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8610 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8612 (const_vector:V8HI [(const_int 1) (const_int 1)
8613 (const_int 1) (const_int 1)
8614 (const_int 1) (const_int 1)
8615 (const_int 1) (const_int 1)]))
8617 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8618 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8619 [(set_attr "type" "sseimul")
8620 (set_attr "prefix" "vex")
8621 (set_attr "mode" "TI")])
8623 (define_insn "*ssse3_pmulhrswv8hi3"
8624 [(set (match_operand:V8HI 0 "register_operand" "=x")
8631 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8633 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8635 (const_vector:V8HI [(const_int 1) (const_int 1)
8636 (const_int 1) (const_int 1)
8637 (const_int 1) (const_int 1)
8638 (const_int 1) (const_int 1)]))
8640 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8641 "pmulhrsw\t{%2, %0|%0, %2}"
8642 [(set_attr "type" "sseimul")
8643 (set_attr "prefix_data16" "1")
8644 (set_attr "prefix_extra" "1")
8645 (set_attr "mode" "TI")])
8647 (define_expand "ssse3_pmulhrswv4hi3"
8648 [(set (match_operand:V4HI 0 "register_operand" "")
8655 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8657 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8659 (const_vector:V4HI [(const_int 1) (const_int 1)
8660 (const_int 1) (const_int 1)]))
8663 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8665 (define_insn "*ssse3_pmulhrswv4hi3"
8666 [(set (match_operand:V4HI 0 "register_operand" "=y")
8673 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8675 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8677 (const_vector:V4HI [(const_int 1) (const_int 1)
8678 (const_int 1) (const_int 1)]))
8680 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8681 "pmulhrsw\t{%2, %0|%0, %2}"
8682 [(set_attr "type" "sseimul")
8683 (set_attr "prefix_extra" "1")
8684 (set_attr "mode" "DI")])
8686 (define_insn "*avx_pshufbv16qi3"
8687 [(set (match_operand:V16QI 0 "register_operand" "=x")
8688 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8689 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8692 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8693 [(set_attr "type" "sselog1")
8694 (set_attr "prefix" "vex")
8695 (set_attr "mode" "TI")])
8697 (define_insn "ssse3_pshufbv16qi3"
8698 [(set (match_operand:V16QI 0 "register_operand" "=x")
8699 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8700 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8703 "pshufb\t{%2, %0|%0, %2}";
8704 [(set_attr "type" "sselog1")
8705 (set_attr "prefix_data16" "1")
8706 (set_attr "prefix_extra" "1")
8707 (set_attr "mode" "TI")])
8709 (define_insn "ssse3_pshufbv8qi3"
8710 [(set (match_operand:V8QI 0 "register_operand" "=y")
8711 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8712 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8715 "pshufb\t{%2, %0|%0, %2}";
8716 [(set_attr "type" "sselog1")
8717 (set_attr "prefix_extra" "1")
8718 (set_attr "mode" "DI")])
8720 (define_insn "*avx_psign<mode>3"
8721 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8723 [(match_operand:SSEMODE124 1 "register_operand" "x")
8724 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8727 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8728 [(set_attr "type" "sselog1")
8729 (set_attr "prefix" "vex")
8730 (set_attr "mode" "TI")])
8732 (define_insn "ssse3_psign<mode>3"
8733 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8735 [(match_operand:SSEMODE124 1 "register_operand" "0")
8736 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8739 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8740 [(set_attr "type" "sselog1")
8741 (set_attr "prefix_data16" "1")
8742 (set_attr "prefix_extra" "1")
8743 (set_attr "mode" "TI")])
8745 (define_insn "ssse3_psign<mode>3"
8746 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8748 [(match_operand:MMXMODEI 1 "register_operand" "0")
8749 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8752 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8753 [(set_attr "type" "sselog1")
8754 (set_attr "prefix_extra" "1")
8755 (set_attr "mode" "DI")])
8757 (define_insn "*avx_palignrti"
8758 [(set (match_operand:TI 0 "register_operand" "=x")
8759 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8760 (match_operand:TI 2 "nonimmediate_operand" "xm")
8761 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8765 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8766 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8768 [(set_attr "type" "sseishft")
8769 (set_attr "prefix" "vex")
8770 (set_attr "mode" "TI")])
8772 (define_insn "ssse3_palignrti"
8773 [(set (match_operand:TI 0 "register_operand" "=x")
8774 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8775 (match_operand:TI 2 "nonimmediate_operand" "xm")
8776 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8780 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8781 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8783 [(set_attr "type" "sseishft")
8784 (set_attr "prefix_data16" "1")
8785 (set_attr "prefix_extra" "1")
8786 (set_attr "mode" "TI")])
8788 (define_insn "ssse3_palignrdi"
8789 [(set (match_operand:DI 0 "register_operand" "=y")
8790 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8791 (match_operand:DI 2 "nonimmediate_operand" "ym")
8792 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8796 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8797 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8799 [(set_attr "type" "sseishft")
8800 (set_attr "prefix_extra" "1")
8801 (set_attr "mode" "DI")])
8803 (define_insn "abs<mode>2"
8804 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8805 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8807 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8808 [(set_attr "type" "sselog1")
8809 (set_attr "prefix_data16" "1")
8810 (set_attr "prefix_extra" "1")
8811 (set_attr "prefix" "maybe_vex")
8812 (set_attr "mode" "TI")])
8814 (define_insn "abs<mode>2"
8815 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8816 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8818 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8819 [(set_attr "type" "sselog1")
8820 (set_attr "prefix_extra" "1")
8821 (set_attr "mode" "DI")])
8823 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8825 ;; AMD SSE4A instructions
8827 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8829 (define_insn "sse4a_movnt<mode>"
8830 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8832 [(match_operand:MODEF 1 "register_operand" "x")]
8835 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8836 [(set_attr "type" "ssemov")
8837 (set_attr "mode" "<MODE>")])
8839 (define_insn "sse4a_vmmovnt<mode>"
8840 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8841 (unspec:<ssescalarmode>
8842 [(vec_select:<ssescalarmode>
8843 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8844 (parallel [(const_int 0)]))]
8847 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8848 [(set_attr "type" "ssemov")
8849 (set_attr "mode" "<ssescalarmode>")])
8851 (define_insn "sse4a_extrqi"
8852 [(set (match_operand:V2DI 0 "register_operand" "=x")
8853 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8854 (match_operand 2 "const_int_operand" "")
8855 (match_operand 3 "const_int_operand" "")]
8858 "extrq\t{%3, %2, %0|%0, %2, %3}"
8859 [(set_attr "type" "sse")
8860 (set_attr "prefix_data16" "1")
8861 (set_attr "mode" "TI")])
8863 (define_insn "sse4a_extrq"
8864 [(set (match_operand:V2DI 0 "register_operand" "=x")
8865 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8866 (match_operand:V16QI 2 "register_operand" "x")]
8869 "extrq\t{%2, %0|%0, %2}"
8870 [(set_attr "type" "sse")
8871 (set_attr "prefix_data16" "1")
8872 (set_attr "mode" "TI")])
8874 (define_insn "sse4a_insertqi"
8875 [(set (match_operand:V2DI 0 "register_operand" "=x")
8876 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8877 (match_operand:V2DI 2 "register_operand" "x")
8878 (match_operand 3 "const_int_operand" "")
8879 (match_operand 4 "const_int_operand" "")]
8882 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8883 [(set_attr "type" "sseins")
8884 (set_attr "prefix_rep" "1")
8885 (set_attr "mode" "TI")])
8887 (define_insn "sse4a_insertq"
8888 [(set (match_operand:V2DI 0 "register_operand" "=x")
8889 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8890 (match_operand:V2DI 2 "register_operand" "x")]
8893 "insertq\t{%2, %0|%0, %2}"
8894 [(set_attr "type" "sseins")
8895 (set_attr "prefix_rep" "1")
8896 (set_attr "mode" "TI")])
8898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8900 ;; Intel SSE4.1 instructions
8902 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8904 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
8905 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8906 (vec_merge:AVXMODEF2P
8907 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8908 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8909 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8911 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8912 [(set_attr "type" "ssemov")
8913 (set_attr "prefix" "vex")
8914 (set_attr "mode" "<avxvecmode>")])
8916 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8917 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8919 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8920 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8921 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8924 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8925 [(set_attr "type" "ssemov")
8926 (set_attr "prefix" "vex")
8927 (set_attr "mode" "<avxvecmode>")])
8929 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8930 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8931 (vec_merge:SSEMODEF2P
8932 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8933 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8934 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8936 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8937 [(set_attr "type" "ssemov")
8938 (set_attr "prefix_extra" "1")
8939 (set_attr "mode" "<MODE>")])
8941 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8942 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8944 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8945 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8946 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8949 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8950 [(set_attr "type" "ssemov")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "mode" "<MODE>")])
8954 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8955 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8957 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8958 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8959 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8962 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8963 [(set_attr "type" "ssemul")
8964 (set_attr "prefix" "vex")
8965 (set_attr "mode" "<avxvecmode>")])
8967 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8968 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8970 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8971 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8972 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8975 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8976 [(set_attr "type" "ssemul")
8977 (set_attr "prefix_extra" "1")
8978 (set_attr "mode" "<MODE>")])
8980 (define_insn "sse4_1_movntdqa"
8981 [(set (match_operand:V2DI 0 "register_operand" "=x")
8982 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8985 "%vmovntdqa\t{%1, %0|%0, %1}"
8986 [(set_attr "type" "ssecvt")
8987 (set_attr "prefix_extra" "1")
8988 (set_attr "prefix" "maybe_vex")
8989 (set_attr "mode" "TI")])
8991 (define_insn "*avx_mpsadbw"
8992 [(set (match_operand:V16QI 0 "register_operand" "=x")
8993 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8994 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8995 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8998 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8999 [(set_attr "type" "sselog1")
9000 (set_attr "prefix" "vex")
9001 (set_attr "mode" "TI")])
9003 (define_insn "sse4_1_mpsadbw"
9004 [(set (match_operand:V16QI 0 "register_operand" "=x")
9005 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9006 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9007 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9010 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9011 [(set_attr "type" "sselog1")
9012 (set_attr "prefix_extra" "1")
9013 (set_attr "mode" "TI")])
9015 (define_insn "*avx_packusdw"
9016 [(set (match_operand:V8HI 0 "register_operand" "=x")
9019 (match_operand:V4SI 1 "register_operand" "x"))
9021 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9023 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9024 [(set_attr "type" "sselog")
9025 (set_attr "prefix" "vex")
9026 (set_attr "mode" "TI")])
9028 (define_insn "sse4_1_packusdw"
9029 [(set (match_operand:V8HI 0 "register_operand" "=x")
9032 (match_operand:V4SI 1 "register_operand" "0"))
9034 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9036 "packusdw\t{%2, %0|%0, %2}"
9037 [(set_attr "type" "sselog")
9038 (set_attr "prefix_extra" "1")
9039 (set_attr "mode" "TI")])
9041 (define_insn "*avx_pblendvb"
9042 [(set (match_operand:V16QI 0 "register_operand" "=x")
9043 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9044 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9045 (match_operand:V16QI 3 "register_operand" "x")]
9048 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9049 [(set_attr "type" "ssemov")
9050 (set_attr "prefix" "vex")
9051 (set_attr "mode" "TI")])
9053 (define_insn "sse4_1_pblendvb"
9054 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9055 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9056 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9057 (match_operand:V16QI 3 "register_operand" "Yz")]
9060 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9061 [(set_attr "type" "ssemov")
9062 (set_attr "prefix_extra" "1")
9063 (set_attr "mode" "TI")])
9065 (define_insn "*avx_pblendw"
9066 [(set (match_operand:V8HI 0 "register_operand" "=x")
9068 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9069 (match_operand:V8HI 1 "register_operand" "x")
9070 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9072 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9073 [(set_attr "type" "ssemov")
9074 (set_attr "prefix" "vex")
9075 (set_attr "mode" "TI")])
9077 (define_insn "sse4_1_pblendw"
9078 [(set (match_operand:V8HI 0 "register_operand" "=x")
9080 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9081 (match_operand:V8HI 1 "register_operand" "0")
9082 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9084 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9085 [(set_attr "type" "ssemov")
9086 (set_attr "prefix_extra" "1")
9087 (set_attr "mode" "TI")])
9089 (define_insn "sse4_1_phminposuw"
9090 [(set (match_operand:V8HI 0 "register_operand" "=x")
9091 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9092 UNSPEC_PHMINPOSUW))]
9094 "%vphminposuw\t{%1, %0|%0, %1}"
9095 [(set_attr "type" "sselog1")
9096 (set_attr "prefix_extra" "1")
9097 (set_attr "prefix" "maybe_vex")
9098 (set_attr "mode" "TI")])
9100 (define_insn "sse4_1_extendv8qiv8hi2"
9101 [(set (match_operand:V8HI 0 "register_operand" "=x")
9104 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9105 (parallel [(const_int 0)
9114 "%vpmovsxbw\t{%1, %0|%0, %1}"
9115 [(set_attr "type" "ssemov")
9116 (set_attr "prefix_extra" "1")
9117 (set_attr "prefix" "maybe_vex")
9118 (set_attr "mode" "TI")])
9120 (define_insn "sse4_1_extendv4qiv4si2"
9121 [(set (match_operand:V4SI 0 "register_operand" "=x")
9124 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9125 (parallel [(const_int 0)
9130 "%vpmovsxbd\t{%1, %0|%0, %1}"
9131 [(set_attr "type" "ssemov")
9132 (set_attr "prefix_extra" "1")
9133 (set_attr "prefix" "maybe_vex")
9134 (set_attr "mode" "TI")])
9136 (define_insn "sse4_1_extendv2qiv2di2"
9137 [(set (match_operand:V2DI 0 "register_operand" "=x")
9140 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9141 (parallel [(const_int 0)
9144 "%vpmovsxbq\t{%1, %0|%0, %1}"
9145 [(set_attr "type" "ssemov")
9146 (set_attr "prefix_extra" "1")
9147 (set_attr "prefix" "maybe_vex")
9148 (set_attr "mode" "TI")])
9150 (define_insn "sse4_1_extendv4hiv4si2"
9151 [(set (match_operand:V4SI 0 "register_operand" "=x")
9154 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9155 (parallel [(const_int 0)
9160 "%vpmovsxwd\t{%1, %0|%0, %1}"
9161 [(set_attr "type" "ssemov")
9162 (set_attr "prefix_extra" "1")
9163 (set_attr "prefix" "maybe_vex")
9164 (set_attr "mode" "TI")])
9166 (define_insn "sse4_1_extendv2hiv2di2"
9167 [(set (match_operand:V2DI 0 "register_operand" "=x")
9170 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9171 (parallel [(const_int 0)
9174 "%vpmovsxwq\t{%1, %0|%0, %1}"
9175 [(set_attr "type" "ssemov")
9176 (set_attr "prefix_extra" "1")
9177 (set_attr "prefix" "maybe_vex")
9178 (set_attr "mode" "TI")])
9180 (define_insn "sse4_1_extendv2siv2di2"
9181 [(set (match_operand:V2DI 0 "register_operand" "=x")
9184 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9185 (parallel [(const_int 0)
9188 "%vpmovsxdq\t{%1, %0|%0, %1}"
9189 [(set_attr "type" "ssemov")
9190 (set_attr "prefix_extra" "1")
9191 (set_attr "prefix" "maybe_vex")
9192 (set_attr "mode" "TI")])
9194 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9195 [(set (match_operand:V8HI 0 "register_operand" "=x")
9198 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9199 (parallel [(const_int 0)
9208 "%vpmovzxbw\t{%1, %0|%0, %1}"
9209 [(set_attr "type" "ssemov")
9210 (set_attr "prefix_extra" "1")
9211 (set_attr "prefix" "maybe_vex")
9212 (set_attr "mode" "TI")])
9214 (define_insn "sse4_1_zero_extendv4qiv4si2"
9215 [(set (match_operand:V4SI 0 "register_operand" "=x")
9218 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9219 (parallel [(const_int 0)
9224 "%vpmovzxbd\t{%1, %0|%0, %1}"
9225 [(set_attr "type" "ssemov")
9226 (set_attr "prefix_extra" "1")
9227 (set_attr "prefix" "maybe_vex")
9228 (set_attr "mode" "TI")])
9230 (define_insn "sse4_1_zero_extendv2qiv2di2"
9231 [(set (match_operand:V2DI 0 "register_operand" "=x")
9234 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9235 (parallel [(const_int 0)
9238 "%vpmovzxbq\t{%1, %0|%0, %1}"
9239 [(set_attr "type" "ssemov")
9240 (set_attr "prefix_extra" "1")
9241 (set_attr "prefix" "maybe_vex")
9242 (set_attr "mode" "TI")])
9244 (define_insn "sse4_1_zero_extendv4hiv4si2"
9245 [(set (match_operand:V4SI 0 "register_operand" "=x")
9248 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9249 (parallel [(const_int 0)
9254 "%vpmovzxwd\t{%1, %0|%0, %1}"
9255 [(set_attr "type" "ssemov")
9256 (set_attr "prefix_extra" "1")
9257 (set_attr "prefix" "maybe_vex")
9258 (set_attr "mode" "TI")])
9260 (define_insn "sse4_1_zero_extendv2hiv2di2"
9261 [(set (match_operand:V2DI 0 "register_operand" "=x")
9264 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9265 (parallel [(const_int 0)
9268 "%vpmovzxwq\t{%1, %0|%0, %1}"
9269 [(set_attr "type" "ssemov")
9270 (set_attr "prefix_extra" "1")
9271 (set_attr "prefix" "maybe_vex")
9272 (set_attr "mode" "TI")])
9274 (define_insn "sse4_1_zero_extendv2siv2di2"
9275 [(set (match_operand:V2DI 0 "register_operand" "=x")
9278 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9279 (parallel [(const_int 0)
9282 "%vpmovzxdq\t{%1, %0|%0, %1}"
9283 [(set_attr "type" "ssemov")
9284 (set_attr "prefix_extra" "1")
9285 (set_attr "prefix" "maybe_vex")
9286 (set_attr "mode" "TI")])
9288 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9289 ;; setting FLAGS_REG. But it is not a really compare instruction.
9290 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9291 [(set (reg:CC FLAGS_REG)
9292 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9293 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9296 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9297 [(set_attr "type" "ssecomi")
9298 (set_attr "prefix" "vex")
9299 (set_attr "mode" "<MODE>")])
9301 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9302 ;; But it is not a really compare instruction.
9303 (define_insn "avx_ptest256"
9304 [(set (reg:CC FLAGS_REG)
9305 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9306 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9309 "vptest\t{%1, %0|%0, %1}"
9310 [(set_attr "type" "ssecomi")
9311 (set_attr "prefix" "vex")
9312 (set_attr "mode" "OI")])
9314 (define_insn "sse4_1_ptest"
9315 [(set (reg:CC FLAGS_REG)
9316 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9317 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9320 "%vptest\t{%1, %0|%0, %1}"
9321 [(set_attr "type" "ssecomi")
9322 (set_attr "prefix_extra" "1")
9323 (set_attr "prefix" "maybe_vex")
9324 (set_attr "mode" "TI")])
9326 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9327 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9328 (unspec:AVX256MODEF2P
9329 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9330 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9333 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9334 [(set_attr "type" "ssecvt")
9335 (set_attr "prefix" "vex")
9336 (set_attr "mode" "<MODE>")])
9338 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9339 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9341 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9342 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9345 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9346 [(set_attr "type" "ssecvt")
9347 (set_attr "prefix_extra" "1")
9348 (set_attr "prefix" "maybe_vex")
9349 (set_attr "mode" "<MODE>")])
9351 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9352 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9353 (vec_merge:SSEMODEF2P
9355 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9356 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9358 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9361 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9362 [(set_attr "type" "ssecvt")
9363 (set_attr "prefix" "vex")
9364 (set_attr "mode" "<MODE>")])
9366 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9367 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9368 (vec_merge:SSEMODEF2P
9370 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9371 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9373 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9376 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9377 [(set_attr "type" "ssecvt")
9378 (set_attr "prefix_extra" "1")
9379 (set_attr "mode" "<MODE>")])
9381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9383 ;; Intel SSE4.2 string/text processing instructions
9385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9387 (define_insn_and_split "sse4_2_pcmpestr"
9388 [(set (match_operand:SI 0 "register_operand" "=c,c")
9390 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9391 (match_operand:SI 3 "register_operand" "a,a")
9392 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9393 (match_operand:SI 5 "register_operand" "d,d")
9394 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9396 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9404 (set (reg:CC FLAGS_REG)
9413 && !(reload_completed || reload_in_progress)"
9418 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9419 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9420 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9423 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9424 operands[3], operands[4],
9425 operands[5], operands[6]));
9427 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9428 operands[3], operands[4],
9429 operands[5], operands[6]));
9430 if (flags && !(ecx || xmm0))
9431 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9432 operands[2], operands[3],
9433 operands[4], operands[5],
9437 [(set_attr "type" "sselog")
9438 (set_attr "prefix_data16" "1")
9439 (set_attr "prefix_extra" "1")
9440 (set_attr "memory" "none,load")
9441 (set_attr "mode" "TI")])
9443 (define_insn "sse4_2_pcmpestri"
9444 [(set (match_operand:SI 0 "register_operand" "=c,c")
9446 [(match_operand:V16QI 1 "register_operand" "x,x")
9447 (match_operand:SI 2 "register_operand" "a,a")
9448 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9449 (match_operand:SI 4 "register_operand" "d,d")
9450 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9452 (set (reg:CC FLAGS_REG)
9461 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9462 [(set_attr "type" "sselog")
9463 (set_attr "prefix_data16" "1")
9464 (set_attr "prefix_extra" "1")
9465 (set_attr "prefix" "maybe_vex")
9466 (set_attr "memory" "none,load")
9467 (set_attr "mode" "TI")])
9469 (define_insn "sse4_2_pcmpestrm"
9470 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9472 [(match_operand:V16QI 1 "register_operand" "x,x")
9473 (match_operand:SI 2 "register_operand" "a,a")
9474 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9475 (match_operand:SI 4 "register_operand" "d,d")
9476 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9478 (set (reg:CC FLAGS_REG)
9487 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9488 [(set_attr "type" "sselog")
9489 (set_attr "prefix_data16" "1")
9490 (set_attr "prefix_extra" "1")
9491 (set_attr "prefix" "maybe_vex")
9492 (set_attr "memory" "none,load")
9493 (set_attr "mode" "TI")])
9495 (define_insn "sse4_2_pcmpestr_cconly"
9496 [(set (reg:CC FLAGS_REG)
9498 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9499 (match_operand:SI 3 "register_operand" "a,a,a,a")
9500 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9501 (match_operand:SI 5 "register_operand" "d,d,d,d")
9502 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9504 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9505 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9508 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9509 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9510 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9511 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9512 [(set_attr "type" "sselog")
9513 (set_attr "prefix_data16" "1")
9514 (set_attr "prefix_extra" "1")
9515 (set_attr "memory" "none,load,none,load")
9516 (set_attr "prefix" "maybe_vex")
9517 (set_attr "mode" "TI")])
9519 (define_insn_and_split "sse4_2_pcmpistr"
9520 [(set (match_operand:SI 0 "register_operand" "=c,c")
9522 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9523 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9524 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9526 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9532 (set (reg:CC FLAGS_REG)
9539 && !(reload_completed || reload_in_progress)"
9544 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9545 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9546 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9549 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9550 operands[3], operands[4]));
9552 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9553 operands[3], operands[4]));
9554 if (flags && !(ecx || xmm0))
9555 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9556 operands[2], operands[3],
9560 [(set_attr "type" "sselog")
9561 (set_attr "prefix_data16" "1")
9562 (set_attr "prefix_extra" "1")
9563 (set_attr "memory" "none,load")
9564 (set_attr "mode" "TI")])
9566 (define_insn "sse4_2_pcmpistri"
9567 [(set (match_operand:SI 0 "register_operand" "=c,c")
9569 [(match_operand:V16QI 1 "register_operand" "x,x")
9570 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9571 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9573 (set (reg:CC FLAGS_REG)
9580 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9581 [(set_attr "type" "sselog")
9582 (set_attr "prefix_data16" "1")
9583 (set_attr "prefix_extra" "1")
9584 (set_attr "prefix" "maybe_vex")
9585 (set_attr "memory" "none,load")
9586 (set_attr "mode" "TI")])
9588 (define_insn "sse4_2_pcmpistrm"
9589 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9591 [(match_operand:V16QI 1 "register_operand" "x,x")
9592 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9593 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9595 (set (reg:CC FLAGS_REG)
9602 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9603 [(set_attr "type" "sselog")
9604 (set_attr "prefix_data16" "1")
9605 (set_attr "prefix_extra" "1")
9606 (set_attr "prefix" "maybe_vex")
9607 (set_attr "memory" "none,load")
9608 (set_attr "mode" "TI")])
9610 (define_insn "sse4_2_pcmpistr_cconly"
9611 [(set (reg:CC FLAGS_REG)
9613 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9614 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9615 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9617 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9618 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9621 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9622 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9623 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9624 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9625 [(set_attr "type" "sselog")
9626 (set_attr "prefix_data16" "1")
9627 (set_attr "prefix_extra" "1")
9628 (set_attr "memory" "none,load,none,load")
9629 (set_attr "prefix" "maybe_vex")
9630 (set_attr "mode" "TI")])
9632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9634 ;; SSE5 instructions
9636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9638 ;; SSE5 parallel integer multiply/add instructions.
9639 ;; Note the instruction does not allow the value being added to be a memory
9640 ;; operation. However by pretending via the nonimmediate_operand predicate
9641 ;; that it does and splitting it later allows the following to be recognized:
9642 ;; a[i] = b[i] * c[i] + d[i];
9643 (define_insn "sse5_pmacsww"
9644 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9647 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
9648 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
9649 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9650 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9652 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9653 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9654 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9655 [(set_attr "type" "ssemuladd")
9656 (set_attr "mode" "TI")])
9658 ;; Split pmacsww with two memory operands into a load and the pmacsww.
9660 [(set (match_operand:V8HI 0 "register_operand" "")
9662 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9663 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9664 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9666 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9667 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9668 && !reg_mentioned_p (operands[0], operands[1])
9669 && !reg_mentioned_p (operands[0], operands[2])
9670 && !reg_mentioned_p (operands[0], operands[3])"
9673 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9674 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9679 (define_insn "sse5_pmacssww"
9680 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9682 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9683 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
9684 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9685 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9687 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9688 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9689 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9690 [(set_attr "type" "ssemuladd")
9691 (set_attr "mode" "TI")])
9693 ;; Note the instruction does not allow the value being added to be a memory
9694 ;; operation. However by pretending via the nonimmediate_operand predicate
9695 ;; that it does and splitting it later allows the following to be recognized:
9696 ;; a[i] = b[i] * c[i] + d[i];
9697 (define_insn "sse5_pmacsdd"
9698 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9701 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9702 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9703 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9704 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9706 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9707 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9708 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9709 [(set_attr "type" "ssemuladd")
9710 (set_attr "mode" "TI")])
9712 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
9714 [(set (match_operand:V4SI 0 "register_operand" "")
9716 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9717 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9718 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9720 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9721 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9722 && !reg_mentioned_p (operands[0], operands[1])
9723 && !reg_mentioned_p (operands[0], operands[2])
9724 && !reg_mentioned_p (operands[0], operands[3])"
9727 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9728 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9733 (define_insn "sse5_pmacssdd"
9734 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9736 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9737 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9738 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9739 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9741 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9742 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9743 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9744 [(set_attr "type" "ssemuladd")
9745 (set_attr "mode" "TI")])
9747 (define_insn "sse5_pmacssdql"
9748 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9753 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9754 (parallel [(const_int 1)
9757 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9758 (parallel [(const_int 1)
9760 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9761 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9763 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9764 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9765 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9766 [(set_attr "type" "ssemuladd")
9767 (set_attr "mode" "TI")])
9769 (define_insn "sse5_pmacssdqh"
9770 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9775 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9776 (parallel [(const_int 0)
9780 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9781 (parallel [(const_int 0)
9783 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9784 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9786 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9787 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9788 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9789 [(set_attr "type" "ssemuladd")
9790 (set_attr "mode" "TI")])
9792 (define_insn "sse5_pmacsdql"
9793 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9798 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9799 (parallel [(const_int 1)
9803 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9804 (parallel [(const_int 1)
9806 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9807 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9809 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9810 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9811 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9812 [(set_attr "type" "ssemuladd")
9813 (set_attr "mode" "TI")])
9815 (define_insn_and_split "*sse5_pmacsdql_mem"
9816 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9821 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9822 (parallel [(const_int 1)
9826 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9827 (parallel [(const_int 1)
9829 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
9830 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
9832 "&& (reload_completed
9833 || (!reg_mentioned_p (operands[0], operands[1])
9834 && !reg_mentioned_p (operands[0], operands[2])))"
9843 (parallel [(const_int 1)
9848 (parallel [(const_int 1)
9852 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
9853 ;; fake it with a multiply/add. In general, we expect the define_split to
9854 ;; occur before register allocation, so we have to handle the corner case where
9855 ;; the target is the same as operands 1/2
9856 (define_insn_and_split "sse5_mulv2div2di3_low"
9857 [(set (match_operand:V2DI 0 "register_operand" "=&x")
9861 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9862 (parallel [(const_int 1)
9866 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9867 (parallel [(const_int 1)
9868 (const_int 3)])))))]
9871 "&& (reload_completed
9872 || (!reg_mentioned_p (operands[0], operands[1])
9873 && !reg_mentioned_p (operands[0], operands[2])))"
9882 (parallel [(const_int 1)
9887 (parallel [(const_int 1)
9891 operands[3] = CONST0_RTX (V2DImode);
9893 [(set_attr "type" "ssemuladd")
9894 (set_attr "mode" "TI")])
9896 (define_insn "sse5_pmacsdqh"
9897 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9902 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9903 (parallel [(const_int 0)
9907 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9908 (parallel [(const_int 0)
9910 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9911 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9913 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9914 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9915 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9916 [(set_attr "type" "ssemuladd")
9917 (set_attr "mode" "TI")])
9919 (define_insn_and_split "*sse5_pmacsdqh_mem"
9920 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9925 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9926 (parallel [(const_int 0)
9930 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9931 (parallel [(const_int 0)
9933 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
9934 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
9936 "&& (reload_completed
9937 || (!reg_mentioned_p (operands[0], operands[1])
9938 && !reg_mentioned_p (operands[0], operands[2])))"
9947 (parallel [(const_int 0)
9952 (parallel [(const_int 0)
9956 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
9957 ;; fake it with a multiply/add. In general, we expect the define_split to
9958 ;; occur before register allocation, so we have to handle the corner case where
9959 ;; the target is the same as either operands[1] or operands[2]
9960 (define_insn_and_split "sse5_mulv2div2di3_high"
9961 [(set (match_operand:V2DI 0 "register_operand" "=&x")
9965 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9966 (parallel [(const_int 0)
9970 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9971 (parallel [(const_int 0)
9972 (const_int 2)])))))]
9975 "&& (reload_completed
9976 || (!reg_mentioned_p (operands[0], operands[1])
9977 && !reg_mentioned_p (operands[0], operands[2])))"
9986 (parallel [(const_int 0)
9991 (parallel [(const_int 0)
9995 operands[3] = CONST0_RTX (V2DImode);
9997 [(set_attr "type" "ssemuladd")
9998 (set_attr "mode" "TI")])
10000 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10001 (define_insn "sse5_pmacsswd"
10002 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10007 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10008 (parallel [(const_int 1)
10014 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10015 (parallel [(const_int 1)
10019 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10020 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10022 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10023 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10024 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10025 [(set_attr "type" "ssemuladd")
10026 (set_attr "mode" "TI")])
10028 (define_insn "sse5_pmacswd"
10029 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10034 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10035 (parallel [(const_int 1)
10041 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10042 (parallel [(const_int 1)
10046 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10047 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10049 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10050 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10051 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10052 [(set_attr "type" "ssemuladd")
10053 (set_attr "mode" "TI")])
10055 (define_insn "sse5_pmadcsswd"
10056 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10062 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10063 (parallel [(const_int 0)
10069 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10070 (parallel [(const_int 0)
10078 (parallel [(const_int 1)
10085 (parallel [(const_int 1)
10088 (const_int 7)])))))
10089 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10090 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10092 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10093 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10094 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10095 [(set_attr "type" "ssemuladd")
10096 (set_attr "mode" "TI")])
10098 (define_insn "sse5_pmadcswd"
10099 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10105 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10106 (parallel [(const_int 0)
10112 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10113 (parallel [(const_int 0)
10121 (parallel [(const_int 1)
10128 (parallel [(const_int 1)
10131 (const_int 7)])))))
10132 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10133 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10135 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10136 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10137 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10138 [(set_attr "type" "ssemuladd")
10139 (set_attr "mode" "TI")])
10141 ;; SSE5 parallel XMM conditional moves
10142 (define_insn "sse5_pcmov_<mode>"
10143 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10144 (if_then_else:SSEMODE
10145 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10146 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10147 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10148 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10150 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10151 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10152 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10153 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10154 [(set_attr "type" "sse4arg")])
10156 ;; SSE5 horizontal add/subtract instructions
10157 (define_insn "sse5_phaddbw"
10158 [(set (match_operand:V8HI 0 "register_operand" "=x")
10162 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10163 (parallel [(const_int 0)
10174 (parallel [(const_int 1)
10181 (const_int 15)])))))]
10183 "phaddbw\t{%1, %0|%0, %1}"
10184 [(set_attr "type" "sseiadd1")])
10186 (define_insn "sse5_phaddbd"
10187 [(set (match_operand:V4SI 0 "register_operand" "=x")
10192 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10193 (parallel [(const_int 0)
10200 (parallel [(const_int 1)
10203 (const_int 13)]))))
10208 (parallel [(const_int 2)
10215 (parallel [(const_int 3)
10218 (const_int 15)]))))))]
10220 "phaddbd\t{%1, %0|%0, %1}"
10221 [(set_attr "type" "sseiadd1")])
10223 (define_insn "sse5_phaddbq"
10224 [(set (match_operand:V2DI 0 "register_operand" "=x")
10230 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10231 (parallel [(const_int 0)
10236 (parallel [(const_int 1)
10242 (parallel [(const_int 2)
10247 (parallel [(const_int 3)
10248 (const_int 7)])))))
10254 (parallel [(const_int 8)
10259 (parallel [(const_int 9)
10260 (const_int 13)]))))
10265 (parallel [(const_int 10)
10270 (parallel [(const_int 11)
10271 (const_int 15)])))))))]
10273 "phaddbq\t{%1, %0|%0, %1}"
10274 [(set_attr "type" "sseiadd1")])
10276 (define_insn "sse5_phaddwd"
10277 [(set (match_operand:V4SI 0 "register_operand" "=x")
10281 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10282 (parallel [(const_int 0)
10289 (parallel [(const_int 1)
10292 (const_int 7)])))))]
10294 "phaddwd\t{%1, %0|%0, %1}"
10295 [(set_attr "type" "sseiadd1")])
10297 (define_insn "sse5_phaddwq"
10298 [(set (match_operand:V2DI 0 "register_operand" "=x")
10303 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10304 (parallel [(const_int 0)
10309 (parallel [(const_int 1)
10315 (parallel [(const_int 2)
10320 (parallel [(const_int 3)
10321 (const_int 7)]))))))]
10323 "phaddwq\t{%1, %0|%0, %1}"
10324 [(set_attr "type" "sseiadd1")])
10326 (define_insn "sse5_phadddq"
10327 [(set (match_operand:V2DI 0 "register_operand" "=x")
10331 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10332 (parallel [(const_int 0)
10337 (parallel [(const_int 1)
10338 (const_int 3)])))))]
10340 "phadddq\t{%1, %0|%0, %1}"
10341 [(set_attr "type" "sseiadd1")])
10343 (define_insn "sse5_phaddubw"
10344 [(set (match_operand:V8HI 0 "register_operand" "=x")
10348 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10349 (parallel [(const_int 0)
10360 (parallel [(const_int 1)
10367 (const_int 15)])))))]
10369 "phaddubw\t{%1, %0|%0, %1}"
10370 [(set_attr "type" "sseiadd1")])
10372 (define_insn "sse5_phaddubd"
10373 [(set (match_operand:V4SI 0 "register_operand" "=x")
10378 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10379 (parallel [(const_int 0)
10386 (parallel [(const_int 1)
10389 (const_int 13)]))))
10394 (parallel [(const_int 2)
10401 (parallel [(const_int 3)
10404 (const_int 15)]))))))]
10406 "phaddubd\t{%1, %0|%0, %1}"
10407 [(set_attr "type" "sseiadd1")])
10409 (define_insn "sse5_phaddubq"
10410 [(set (match_operand:V2DI 0 "register_operand" "=x")
10416 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10417 (parallel [(const_int 0)
10422 (parallel [(const_int 1)
10428 (parallel [(const_int 2)
10433 (parallel [(const_int 3)
10434 (const_int 7)])))))
10440 (parallel [(const_int 8)
10445 (parallel [(const_int 9)
10446 (const_int 13)]))))
10451 (parallel [(const_int 10)
10456 (parallel [(const_int 11)
10457 (const_int 15)])))))))]
10459 "phaddubq\t{%1, %0|%0, %1}"
10460 [(set_attr "type" "sseiadd1")])
10462 (define_insn "sse5_phadduwd"
10463 [(set (match_operand:V4SI 0 "register_operand" "=x")
10467 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10468 (parallel [(const_int 0)
10475 (parallel [(const_int 1)
10478 (const_int 7)])))))]
10480 "phadduwd\t{%1, %0|%0, %1}"
10481 [(set_attr "type" "sseiadd1")])
10483 (define_insn "sse5_phadduwq"
10484 [(set (match_operand:V2DI 0 "register_operand" "=x")
10489 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10490 (parallel [(const_int 0)
10495 (parallel [(const_int 1)
10501 (parallel [(const_int 2)
10506 (parallel [(const_int 3)
10507 (const_int 7)]))))))]
10509 "phadduwq\t{%1, %0|%0, %1}"
10510 [(set_attr "type" "sseiadd1")])
10512 (define_insn "sse5_phaddudq"
10513 [(set (match_operand:V2DI 0 "register_operand" "=x")
10517 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 0)
10523 (parallel [(const_int 1)
10524 (const_int 3)])))))]
10526 "phaddudq\t{%1, %0|%0, %1}"
10527 [(set_attr "type" "sseiadd1")])
10529 (define_insn "sse5_phsubbw"
10530 [(set (match_operand:V8HI 0 "register_operand" "=x")
10534 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10535 (parallel [(const_int 0)
10546 (parallel [(const_int 1)
10553 (const_int 15)])))))]
10555 "phsubbw\t{%1, %0|%0, %1}"
10556 [(set_attr "type" "sseiadd1")])
10558 (define_insn "sse5_phsubwd"
10559 [(set (match_operand:V4SI 0 "register_operand" "=x")
10563 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10564 (parallel [(const_int 0)
10571 (parallel [(const_int 1)
10574 (const_int 7)])))))]
10576 "phsubwd\t{%1, %0|%0, %1}"
10577 [(set_attr "type" "sseiadd1")])
10579 (define_insn "sse5_phsubdq"
10580 [(set (match_operand:V2DI 0 "register_operand" "=x")
10584 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10585 (parallel [(const_int 0)
10590 (parallel [(const_int 1)
10591 (const_int 3)])))))]
10593 "phsubdq\t{%1, %0|%0, %1}"
10594 [(set_attr "type" "sseiadd1")])
10596 ;; SSE5 permute instructions
10597 (define_insn "sse5_pperm"
10598 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10600 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
10601 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
10602 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10603 UNSPEC_SSE5_PERMUTE))]
10604 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10605 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10606 [(set_attr "type" "sse4arg")
10607 (set_attr "mode" "TI")])
10609 ;; The following are for the various unpack insns which doesn't need the first
10610 ;; source operand, so we can just use the output operand for the first operand.
10611 ;; This allows either of the other two operands to be a memory operand. We
10612 ;; can't just use the first operand as an argument to the normal pperm because
10613 ;; then an output only argument, suddenly becomes an input operand.
10614 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10615 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10618 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10619 (match_operand 2 "" "")))) ;; parallel with const_int's
10620 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10622 && (register_operand (operands[1], V16QImode)
10623 || register_operand (operands[2], V16QImode))"
10624 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10625 [(set_attr "type" "sseadd")
10626 (set_attr "mode" "TI")])
10628 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10629 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10632 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10633 (match_operand 2 "" "")))) ;; parallel with const_int's
10634 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10636 && (register_operand (operands[1], V16QImode)
10637 || register_operand (operands[2], V16QImode))"
10638 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10639 [(set_attr "type" "sseadd")
10640 (set_attr "mode" "TI")])
10642 (define_insn "sse5_pperm_zero_v8hi_v4si"
10643 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10646 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10647 (match_operand 2 "" "")))) ;; parallel with const_int's
10648 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10650 && (register_operand (operands[1], V8HImode)
10651 || register_operand (operands[2], V16QImode))"
10652 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10653 [(set_attr "type" "sseadd")
10654 (set_attr "mode" "TI")])
10656 (define_insn "sse5_pperm_sign_v8hi_v4si"
10657 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10660 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10661 (match_operand 2 "" "")))) ;; parallel with const_int's
10662 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10664 && (register_operand (operands[1], V8HImode)
10665 || register_operand (operands[2], V16QImode))"
10666 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10667 [(set_attr "type" "sseadd")
10668 (set_attr "mode" "TI")])
10670 (define_insn "sse5_pperm_zero_v4si_v2di"
10671 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10674 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10675 (match_operand 2 "" "")))) ;; parallel with const_int's
10676 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10678 && (register_operand (operands[1], V4SImode)
10679 || register_operand (operands[2], V16QImode))"
10680 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10681 [(set_attr "type" "sseadd")
10682 (set_attr "mode" "TI")])
10684 (define_insn "sse5_pperm_sign_v4si_v2di"
10685 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10688 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10689 (match_operand 2 "" "")))) ;; parallel with const_int's
10690 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10692 && (register_operand (operands[1], V4SImode)
10693 || register_operand (operands[2], V16QImode))"
10694 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10695 [(set_attr "type" "sseadd")
10696 (set_attr "mode" "TI")])
10698 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10699 (define_insn "sse5_pperm_pack_v2di_v4si"
10700 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10703 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
10705 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10706 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10707 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10708 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10709 [(set_attr "type" "sse4arg")
10710 (set_attr "mode" "TI")])
10712 (define_insn "sse5_pperm_pack_v4si_v8hi"
10713 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
10716 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
10718 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10719 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10720 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10721 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10722 [(set_attr "type" "sse4arg")
10723 (set_attr "mode" "TI")])
10725 (define_insn "sse5_pperm_pack_v8hi_v16qi"
10726 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10729 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
10731 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10732 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10733 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10734 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10735 [(set_attr "type" "sse4arg")
10736 (set_attr "mode" "TI")])
10738 ;; Floating point permutation (permps, permpd)
10739 (define_insn "sse5_perm<mode>"
10740 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10742 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10743 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10744 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10745 UNSPEC_SSE5_PERMUTE))]
10746 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10747 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10748 [(set_attr "type" "sse4arg")
10749 (set_attr "mode" "<MODE>")])
10751 ;; SSE5 packed rotate instructions
10752 (define_expand "rotl<mode>3"
10753 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10754 (rotate:SSEMODE1248
10755 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10756 (match_operand:SI 2 "general_operand")))]
10759 /* If we were given a scalar, convert it to parallel */
10760 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10762 rtvec vs = rtvec_alloc (<ssescalarnum>);
10763 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10764 rtx reg = gen_reg_rtx (<MODE>mode);
10765 rtx op2 = operands[2];
10768 if (GET_MODE (op2) != <ssescalarmode>mode)
10770 op2 = gen_reg_rtx (<ssescalarmode>mode);
10771 convert_move (op2, operands[2], false);
10774 for (i = 0; i < <ssescalarnum>; i++)
10775 RTVEC_ELT (vs, i) = op2;
10777 emit_insn (gen_vec_init<mode> (reg, par));
10778 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10783 (define_expand "rotr<mode>3"
10784 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10785 (rotatert:SSEMODE1248
10786 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10787 (match_operand:SI 2 "general_operand")))]
10790 /* If we were given a scalar, convert it to parallel */
10791 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10793 rtvec vs = rtvec_alloc (<ssescalarnum>);
10794 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10795 rtx neg = gen_reg_rtx (<MODE>mode);
10796 rtx reg = gen_reg_rtx (<MODE>mode);
10797 rtx op2 = operands[2];
10800 if (GET_MODE (op2) != <ssescalarmode>mode)
10802 op2 = gen_reg_rtx (<ssescalarmode>mode);
10803 convert_move (op2, operands[2], false);
10806 for (i = 0; i < <ssescalarnum>; i++)
10807 RTVEC_ELT (vs, i) = op2;
10809 emit_insn (gen_vec_init<mode> (reg, par));
10810 emit_insn (gen_neg<mode>2 (neg, reg));
10811 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
10816 (define_insn "sse5_rotl<mode>3"
10817 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10818 (rotate:SSEMODE1248
10819 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10820 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10822 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10823 [(set_attr "type" "sseishft")
10824 (set_attr "mode" "TI")])
10826 (define_insn "sse5_rotr<mode>3"
10827 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10828 (rotatert:SSEMODE1248
10829 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10830 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10833 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10834 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10836 [(set_attr "type" "sseishft")
10837 (set_attr "mode" "TI")])
10839 (define_expand "vrotr<mode>3"
10840 [(match_operand:SSEMODE1248 0 "register_operand" "")
10841 (match_operand:SSEMODE1248 1 "register_operand" "")
10842 (match_operand:SSEMODE1248 2 "register_operand" "")]
10845 rtx reg = gen_reg_rtx (<MODE>mode);
10846 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10847 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10851 (define_expand "vrotl<mode>3"
10852 [(match_operand:SSEMODE1248 0 "register_operand" "")
10853 (match_operand:SSEMODE1248 1 "register_operand" "")
10854 (match_operand:SSEMODE1248 2 "register_operand" "")]
10857 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10861 (define_insn "sse5_vrotl<mode>3"
10862 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10863 (if_then_else:SSEMODE1248
10865 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
10867 (rotate:SSEMODE1248
10868 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
10870 (rotatert:SSEMODE1248
10872 (neg:SSEMODE1248 (match_dup 2)))))]
10873 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
10874 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10875 [(set_attr "type" "sseishft")
10876 (set_attr "mode" "TI")])
10878 ;; SSE5 packed shift instructions.
10879 ;; FIXME: add V2DI back in
10880 (define_expand "vlshr<mode>3"
10881 [(match_operand:SSEMODE124 0 "register_operand" "")
10882 (match_operand:SSEMODE124 1 "register_operand" "")
10883 (match_operand:SSEMODE124 2 "register_operand" "")]
10886 rtx neg = gen_reg_rtx (<MODE>mode);
10887 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10888 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
10892 (define_expand "vashr<mode>3"
10893 [(match_operand:SSEMODE124 0 "register_operand" "")
10894 (match_operand:SSEMODE124 1 "register_operand" "")
10895 (match_operand:SSEMODE124 2 "register_operand" "")]
10898 rtx neg = gen_reg_rtx (<MODE>mode);
10899 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10900 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
10904 (define_expand "vashl<mode>3"
10905 [(match_operand:SSEMODE124 0 "register_operand" "")
10906 (match_operand:SSEMODE124 1 "register_operand" "")
10907 (match_operand:SSEMODE124 2 "register_operand" "")]
10910 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
10914 (define_insn "sse5_ashl<mode>3"
10915 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10916 (if_then_else:SSEMODE1248
10918 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
10920 (ashift:SSEMODE1248
10921 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
10923 (ashiftrt:SSEMODE1248
10925 (neg:SSEMODE1248 (match_dup 2)))))]
10926 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
10927 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10928 [(set_attr "type" "sseishft")
10929 (set_attr "mode" "TI")])
10931 (define_insn "sse5_lshl<mode>3"
10932 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10933 (if_then_else:SSEMODE1248
10935 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
10937 (ashift:SSEMODE1248
10938 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
10940 (lshiftrt:SSEMODE1248
10942 (neg:SSEMODE1248 (match_dup 2)))))]
10943 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
10944 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10945 [(set_attr "type" "sseishft")
10946 (set_attr "mode" "TI")])
10948 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
10949 (define_expand "ashlv16qi3"
10950 [(match_operand:V16QI 0 "register_operand" "")
10951 (match_operand:V16QI 1 "register_operand" "")
10952 (match_operand:SI 2 "nonmemory_operand" "")]
10955 rtvec vs = rtvec_alloc (16);
10956 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10957 rtx reg = gen_reg_rtx (V16QImode);
10959 for (i = 0; i < 16; i++)
10960 RTVEC_ELT (vs, i) = operands[2];
10962 emit_insn (gen_vec_initv16qi (reg, par));
10963 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
10967 (define_expand "lshlv16qi3"
10968 [(match_operand:V16QI 0 "register_operand" "")
10969 (match_operand:V16QI 1 "register_operand" "")
10970 (match_operand:SI 2 "nonmemory_operand" "")]
10973 rtvec vs = rtvec_alloc (16);
10974 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10975 rtx reg = gen_reg_rtx (V16QImode);
10977 for (i = 0; i < 16; i++)
10978 RTVEC_ELT (vs, i) = operands[2];
10980 emit_insn (gen_vec_initv16qi (reg, par));
10981 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
10985 (define_expand "ashrv16qi3"
10986 [(match_operand:V16QI 0 "register_operand" "")
10987 (match_operand:V16QI 1 "register_operand" "")
10988 (match_operand:SI 2 "nonmemory_operand" "")]
10991 rtvec vs = rtvec_alloc (16);
10992 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10993 rtx reg = gen_reg_rtx (V16QImode);
10995 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
10996 ? GEN_INT (- INTVAL (operands[2]))
10999 for (i = 0; i < 16; i++)
11000 RTVEC_ELT (vs, i) = ele;
11002 emit_insn (gen_vec_initv16qi (reg, par));
11004 if (GET_CODE (operands[2]) != CONST_INT)
11006 rtx neg = gen_reg_rtx (V16QImode);
11007 emit_insn (gen_negv16qi2 (neg, reg));
11008 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11011 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11016 (define_expand "ashrv2di3"
11017 [(match_operand:V2DI 0 "register_operand" "")
11018 (match_operand:V2DI 1 "register_operand" "")
11019 (match_operand:DI 2 "nonmemory_operand" "")]
11022 rtvec vs = rtvec_alloc (2);
11023 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11024 rtx reg = gen_reg_rtx (V2DImode);
11027 if (GET_CODE (operands[2]) == CONST_INT)
11028 ele = GEN_INT (- INTVAL (operands[2]));
11029 else if (GET_MODE (operands[2]) != DImode)
11031 rtx move = gen_reg_rtx (DImode);
11032 ele = gen_reg_rtx (DImode);
11033 convert_move (move, operands[2], false);
11034 emit_insn (gen_negdi2 (ele, move));
11038 ele = gen_reg_rtx (DImode);
11039 emit_insn (gen_negdi2 (ele, operands[2]));
11042 RTVEC_ELT (vs, 0) = ele;
11043 RTVEC_ELT (vs, 1) = ele;
11044 emit_insn (gen_vec_initv2di (reg, par));
11045 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11049 ;; SSE5 FRCZ support
11051 (define_insn "sse5_frcz<mode>2"
11052 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11054 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11057 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11058 [(set_attr "type" "ssecvt1")
11059 (set_attr "prefix_extra" "1")
11060 (set_attr "mode" "<MODE>")])
11063 (define_insn "sse5_vmfrcz<mode>2"
11064 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11065 (vec_merge:SSEMODEF2P
11067 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11069 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11072 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11073 [(set_attr "type" "ssecvt1")
11074 (set_attr "prefix_extra" "1")
11075 (set_attr "mode" "<MODE>")])
11077 (define_insn "sse5_cvtph2ps"
11078 [(set (match_operand:V4SF 0 "register_operand" "=x")
11079 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11082 "cvtph2ps\t{%1, %0|%0, %1}"
11083 [(set_attr "type" "ssecvt")
11084 (set_attr "mode" "V4SF")])
11086 (define_insn "sse5_cvtps2ph"
11087 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11088 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11091 "cvtps2ph\t{%1, %0|%0, %1}"
11092 [(set_attr "type" "ssecvt")
11093 (set_attr "mode" "V4SF")])
11095 ;; Scalar versions of the com instructions that use vector types that are
11096 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11097 ;; com instructions fill in 0's in the upper bits instead of leaving them
11098 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11099 (define_expand "sse5_vmmaskcmp<mode>3"
11100 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11101 (vec_merge:SSEMODEF2P
11102 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11103 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11104 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11109 operands[4] = CONST0_RTX (<MODE>mode);
11112 (define_insn "*sse5_vmmaskcmp<mode>3"
11113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11114 (vec_merge:SSEMODEF2P
11115 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11116 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11117 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11118 (match_operand:SSEMODEF2P 4 "")
11121 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11122 [(set_attr "type" "sse4arg")
11123 (set_attr "mode" "<ssescalarmode>")])
11125 ;; We don't have a comparison operator that always returns true/false, so
11126 ;; handle comfalse and comtrue specially.
11127 (define_insn "sse5_com_tf<mode>3"
11128 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11130 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11131 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11132 (match_operand:SI 3 "const_int_operand" "n")]
11133 UNSPEC_SSE5_TRUEFALSE))]
11136 const char *ret = NULL;
11138 switch (INTVAL (operands[3]))
11141 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11145 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11149 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11153 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11157 gcc_unreachable ();
11162 [(set_attr "type" "ssecmp")
11163 (set_attr "mode" "<MODE>")])
11165 (define_insn "sse5_maskcmp<mode>3"
11166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11167 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11168 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11169 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11171 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11172 [(set_attr "type" "ssecmp")
11173 (set_attr "mode" "<MODE>")])
11175 (define_insn "sse5_maskcmp<mode>3"
11176 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11177 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11178 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11179 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11181 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11182 [(set_attr "type" "sse4arg")
11183 (set_attr "mode" "TI")])
11185 (define_insn "sse5_maskcmp_uns<mode>3"
11186 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11187 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11188 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11189 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11191 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11192 [(set_attr "type" "ssecmp")
11193 (set_attr "mode" "TI")])
11195 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11196 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11197 ;; the exact instruction generated for the intrinsic.
11198 (define_insn "sse5_maskcmp_uns2<mode>3"
11199 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11200 (unspec:SSEMODE1248
11201 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11202 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11203 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11204 UNSPEC_SSE5_UNSIGNED_CMP))]
11206 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11207 [(set_attr "type" "ssecmp")
11208 (set_attr "mode" "TI")])
11210 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11211 ;; being added here to be complete.
11212 (define_insn "sse5_pcom_tf<mode>3"
11213 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11214 (unspec:SSEMODE1248
11215 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11216 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11217 (match_operand:SI 3 "const_int_operand" "n")]
11218 UNSPEC_SSE5_TRUEFALSE))]
11221 return ((INTVAL (operands[3]) != 0)
11222 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11223 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11225 [(set_attr "type" "ssecmp")
11226 (set_attr "mode" "TI")])
11228 (define_insn "*avx_aesenc"
11229 [(set (match_operand:V2DI 0 "register_operand" "=x")
11230 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11231 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11233 "TARGET_AES && TARGET_AVX"
11234 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11235 [(set_attr "type" "sselog1")
11236 (set_attr "prefix" "vex")
11237 (set_attr "mode" "TI")])
11239 (define_insn "aesenc"
11240 [(set (match_operand:V2DI 0 "register_operand" "=x")
11241 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11242 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11245 "aesenc\t{%2, %0|%0, %2}"
11246 [(set_attr "type" "sselog1")
11247 (set_attr "prefix_extra" "1")
11248 (set_attr "mode" "TI")])
11250 (define_insn "*avx_aesenclast"
11251 [(set (match_operand:V2DI 0 "register_operand" "=x")
11252 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11253 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11254 UNSPEC_AESENCLAST))]
11255 "TARGET_AES && TARGET_AVX"
11256 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11257 [(set_attr "type" "sselog1")
11258 (set_attr "prefix" "vex")
11259 (set_attr "mode" "TI")])
11261 (define_insn "aesenclast"
11262 [(set (match_operand:V2DI 0 "register_operand" "=x")
11263 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11264 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11265 UNSPEC_AESENCLAST))]
11267 "aesenclast\t{%2, %0|%0, %2}"
11268 [(set_attr "type" "sselog1")
11269 (set_attr "prefix_extra" "1")
11270 (set_attr "mode" "TI")])
11272 (define_insn "*avx_aesdec"
11273 [(set (match_operand:V2DI 0 "register_operand" "=x")
11274 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11275 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11277 "TARGET_AES && TARGET_AVX"
11278 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11279 [(set_attr "type" "sselog1")
11280 (set_attr "prefix" "vex")
11281 (set_attr "mode" "TI")])
11283 (define_insn "aesdec"
11284 [(set (match_operand:V2DI 0 "register_operand" "=x")
11285 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11286 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11289 "aesdec\t{%2, %0|%0, %2}"
11290 [(set_attr "type" "sselog1")
11291 (set_attr "prefix_extra" "1")
11292 (set_attr "mode" "TI")])
11294 (define_insn "*avx_aesdeclast"
11295 [(set (match_operand:V2DI 0 "register_operand" "=x")
11296 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11297 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11298 UNSPEC_AESDECLAST))]
11299 "TARGET_AES && TARGET_AVX"
11300 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11301 [(set_attr "type" "sselog1")
11302 (set_attr "prefix" "vex")
11303 (set_attr "mode" "TI")])
11305 (define_insn "aesdeclast"
11306 [(set (match_operand:V2DI 0 "register_operand" "=x")
11307 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11308 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11309 UNSPEC_AESDECLAST))]
11311 "aesdeclast\t{%2, %0|%0, %2}"
11312 [(set_attr "type" "sselog1")
11313 (set_attr "prefix_extra" "1")
11314 (set_attr "mode" "TI")])
11316 (define_insn "aesimc"
11317 [(set (match_operand:V2DI 0 "register_operand" "=x")
11318 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11321 "%vaesimc\t{%1, %0|%0, %1}"
11322 [(set_attr "type" "sselog1")
11323 (set_attr "prefix_extra" "1")
11324 (set_attr "prefix" "maybe_vex")
11325 (set_attr "mode" "TI")])
11327 (define_insn "aeskeygenassist"
11328 [(set (match_operand:V2DI 0 "register_operand" "=x")
11329 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11330 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11331 UNSPEC_AESKEYGENASSIST))]
11333 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11334 [(set_attr "type" "sselog1")
11335 (set_attr "prefix_extra" "1")
11336 (set_attr "prefix" "maybe_vex")
11337 (set_attr "mode" "TI")])
11339 (define_insn "*vpclmulqdq"
11340 [(set (match_operand:V2DI 0 "register_operand" "=x")
11341 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11342 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11343 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11345 "TARGET_PCLMUL && TARGET_AVX"
11346 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11347 [(set_attr "type" "sselog1")
11348 (set_attr "prefix" "vex")
11349 (set_attr "mode" "TI")])
11351 (define_insn "pclmulqdq"
11352 [(set (match_operand:V2DI 0 "register_operand" "=x")
11353 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11354 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11355 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11358 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11359 [(set_attr "type" "sselog1")
11360 (set_attr "prefix_extra" "1")
11361 (set_attr "mode" "TI")])
11363 (define_expand "avx_vzeroall"
11364 [(match_par_dup 0 [(const_int 0)])]
11367 int nregs = TARGET_64BIT ? 16 : 8;
11370 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11372 XVECEXP (operands[0], 0, 0)
11373 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11376 for (regno = 0; regno < nregs; regno++)
11377 XVECEXP (operands[0], 0, regno + 1)
11378 = gen_rtx_SET (VOIDmode,
11379 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11380 CONST0_RTX (V8SImode));
11383 (define_insn "*avx_vzeroall"
11384 [(match_parallel 0 "vzeroall_operation"
11385 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11386 (set (match_operand 1 "register_operand" "=x")
11387 (match_operand 2 "const0_operand" "X"))])]
11390 [(set_attr "type" "sse")
11391 (set_attr "memory" "none")
11392 (set_attr "prefix" "vex")
11393 (set_attr "mode" "OI")])
11395 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11396 (define_insn "avx_vzeroupper"
11397 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11398 (clobber (reg:V8SI XMM0_REG))
11399 (clobber (reg:V8SI XMM1_REG))
11400 (clobber (reg:V8SI XMM2_REG))
11401 (clobber (reg:V8SI XMM3_REG))
11402 (clobber (reg:V8SI XMM4_REG))
11403 (clobber (reg:V8SI XMM5_REG))
11404 (clobber (reg:V8SI XMM6_REG))
11405 (clobber (reg:V8SI XMM7_REG))]
11406 "TARGET_AVX && !TARGET_64BIT"
11408 [(set_attr "type" "sse")
11409 (set_attr "memory" "none")
11410 (set_attr "prefix" "vex")
11411 (set_attr "mode" "OI")])
11413 (define_insn "avx_vzeroupper_rex64"
11414 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11415 (clobber (reg:V8SI XMM0_REG))
11416 (clobber (reg:V8SI XMM1_REG))
11417 (clobber (reg:V8SI XMM2_REG))
11418 (clobber (reg:V8SI XMM3_REG))
11419 (clobber (reg:V8SI XMM4_REG))
11420 (clobber (reg:V8SI XMM5_REG))
11421 (clobber (reg:V8SI XMM6_REG))
11422 (clobber (reg:V8SI XMM7_REG))
11423 (clobber (reg:V8SI XMM8_REG))
11424 (clobber (reg:V8SI XMM9_REG))
11425 (clobber (reg:V8SI XMM10_REG))
11426 (clobber (reg:V8SI XMM11_REG))
11427 (clobber (reg:V8SI XMM12_REG))
11428 (clobber (reg:V8SI XMM13_REG))
11429 (clobber (reg:V8SI XMM14_REG))
11430 (clobber (reg:V8SI XMM15_REG))]
11431 "TARGET_AVX && TARGET_64BIT"
11433 [(set_attr "type" "sse")
11434 (set_attr "memory" "none")
11435 (set_attr "prefix" "vex")
11436 (set_attr "mode" "OI")])
11438 (define_insn "avx_vpermil<mode>"
11439 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11441 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11442 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11445 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11446 [(set_attr "type" "sselog")
11447 (set_attr "prefix" "vex")
11448 (set_attr "mode" "<MODE>")])
11450 (define_insn "avx_vpermilvar<mode>3"
11451 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11453 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11454 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11457 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11458 [(set_attr "type" "sselog")
11459 (set_attr "prefix" "vex")
11460 (set_attr "mode" "<MODE>")])
11462 (define_insn "avx_vperm2f128<mode>3"
11463 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11464 (unspec:AVX256MODE2P
11465 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11466 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11467 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11468 UNSPEC_VPERMIL2F128))]
11470 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11471 [(set_attr "type" "sselog")
11472 (set_attr "prefix" "vex")
11473 (set_attr "mode" "V8SF")])
11475 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11476 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11477 (vec_concat:AVXMODEF4P
11478 (vec_concat:<avxhalfvecmode>
11479 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11481 (vec_concat:<avxhalfvecmode>
11485 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11486 [(set_attr "type" "ssemov")
11487 (set_attr "prefix" "vex")
11488 (set_attr "mode" "<avxscalarmode>")])
11490 (define_insn "avx_vbroadcastss256"
11491 [(set (match_operand:V8SF 0 "register_operand" "=x")
11495 (match_operand:SF 1 "memory_operand" "m")
11508 "vbroadcastss\t{%1, %0|%0, %1}"
11509 [(set_attr "type" "ssemov")
11510 (set_attr "prefix" "vex")
11511 (set_attr "mode" "SF")])
11513 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11514 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11515 (vec_concat:AVX256MODEF2P
11516 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11519 "vbroadcastf128\t{%1, %0|%0, %1}"
11520 [(set_attr "type" "ssemov")
11521 (set_attr "prefix" "vex")
11522 (set_attr "mode" "V4SF")])
11524 (define_expand "avx_vinsertf128<mode>"
11525 [(match_operand:AVX256MODE 0 "register_operand" "")
11526 (match_operand:AVX256MODE 1 "register_operand" "")
11527 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11528 (match_operand:SI 3 "const_0_to_1_operand" "")]
11531 switch (INTVAL (operands[3]))
11534 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11538 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11542 gcc_unreachable ();
11547 (define_insn "vec_set_lo_<mode>"
11548 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11549 (vec_concat:AVX256MODE4P
11550 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11551 (vec_select:<avxhalfvecmode>
11552 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11553 (parallel [(const_int 2) (const_int 3)]))))]
11555 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11556 [(set_attr "type" "sselog")
11557 (set_attr "prefix" "vex")
11558 (set_attr "mode" "V8SF")])
11560 (define_insn "vec_set_hi_<mode>"
11561 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11562 (vec_concat:AVX256MODE4P
11563 (vec_select:<avxhalfvecmode>
11564 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11565 (parallel [(const_int 0) (const_int 1)]))
11566 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11568 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11569 [(set_attr "type" "sselog")
11570 (set_attr "prefix" "vex")
11571 (set_attr "mode" "V8SF")])
11573 (define_insn "vec_set_lo_<mode>"
11574 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11575 (vec_concat:AVX256MODE8P
11576 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11577 (vec_select:<avxhalfvecmode>
11578 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11579 (parallel [(const_int 4) (const_int 5)
11580 (const_int 6) (const_int 7)]))))]
11582 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11583 [(set_attr "type" "sselog")
11584 (set_attr "prefix" "vex")
11585 (set_attr "mode" "V8SF")])
11587 (define_insn "vec_set_hi_<mode>"
11588 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11589 (vec_concat:AVX256MODE8P
11590 (vec_select:<avxhalfvecmode>
11591 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11592 (parallel [(const_int 0) (const_int 1)
11593 (const_int 2) (const_int 3)]))
11594 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11596 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11597 [(set_attr "type" "sselog")
11598 (set_attr "prefix" "vex")
11599 (set_attr "mode" "V8SF")])
11601 (define_insn "vec_set_lo_v16hi"
11602 [(set (match_operand:V16HI 0 "register_operand" "=x")
11604 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11606 (match_operand:V16HI 1 "register_operand" "x")
11607 (parallel [(const_int 8) (const_int 9)
11608 (const_int 10) (const_int 11)
11609 (const_int 12) (const_int 13)
11610 (const_int 14) (const_int 15)]))))]
11612 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11613 [(set_attr "type" "sselog")
11614 (set_attr "prefix" "vex")
11615 (set_attr "mode" "V8SF")])
11617 (define_insn "vec_set_hi_v16hi"
11618 [(set (match_operand:V16HI 0 "register_operand" "=x")
11621 (match_operand:V16HI 1 "register_operand" "x")
11622 (parallel [(const_int 0) (const_int 1)
11623 (const_int 2) (const_int 3)
11624 (const_int 4) (const_int 5)
11625 (const_int 6) (const_int 7)]))
11626 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11628 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11629 [(set_attr "type" "sselog")
11630 (set_attr "prefix" "vex")
11631 (set_attr "mode" "V8SF")])
11633 (define_insn "vec_set_lo_v32qi"
11634 [(set (match_operand:V32QI 0 "register_operand" "=x")
11636 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11638 (match_operand:V32QI 1 "register_operand" "x")
11639 (parallel [(const_int 16) (const_int 17)
11640 (const_int 18) (const_int 19)
11641 (const_int 20) (const_int 21)
11642 (const_int 22) (const_int 23)
11643 (const_int 24) (const_int 25)
11644 (const_int 26) (const_int 27)
11645 (const_int 28) (const_int 29)
11646 (const_int 30) (const_int 31)]))))]
11648 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11649 [(set_attr "type" "sselog")
11650 (set_attr "prefix" "vex")
11651 (set_attr "mode" "V8SF")])
11653 (define_insn "vec_set_hi_v32qi"
11654 [(set (match_operand:V32QI 0 "register_operand" "=x")
11657 (match_operand:V32QI 1 "register_operand" "x")
11658 (parallel [(const_int 0) (const_int 1)
11659 (const_int 2) (const_int 3)
11660 (const_int 4) (const_int 5)
11661 (const_int 6) (const_int 7)
11662 (const_int 8) (const_int 9)
11663 (const_int 10) (const_int 11)
11664 (const_int 12) (const_int 13)
11665 (const_int 14) (const_int 15)]))
11666 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11668 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11669 [(set_attr "type" "sselog")
11670 (set_attr "prefix" "vex")
11671 (set_attr "mode" "V8SF")])
11673 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11674 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11676 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11677 (match_operand:<avxpermvecmode> 2 "register_operand" "x")]
11680 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11681 [(set_attr "type" "sselog1")
11682 (set_attr "prefix" "vex")
11683 (set_attr "mode" "<MODE>")])
11685 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11686 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11688 [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
11689 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11691 UNSPEC_MASKSTORE))]
11693 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11694 [(set_attr "type" "sselog1")
11695 (set_attr "prefix" "vex")
11696 (set_attr "mode" "<MODE>")])
11698 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11699 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11700 (unspec:AVX256MODE2P
11701 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
11705 switch (which_alternative)
11710 switch (get_attr_mode (insn))
11713 return "vmovaps\t{%1, %x0|%x0, %1}";
11715 return "vmovapd\t{%1, %x0|%x0, %1}";
11717 return "vmovdqa\t{%1, %x0|%x0, %1}";
11724 gcc_unreachable ();
11726 [(set_attr "type" "ssemov")
11727 (set_attr "prefix" "vex")
11728 (set_attr "mode" "<avxvecmode>")
11729 (set (attr "length")
11730 (if_then_else (eq_attr "alternative" "0")
11732 (const_string "*")))])
11734 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
11735 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
11736 (unspec:<avxhalfvecmode>
11737 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
11741 switch (which_alternative)
11746 switch (get_attr_mode (insn))
11749 return "vmovaps\t{%x1, %0|%0, %x1}";
11751 return "vmovapd\t{%x1, %0|%0, %x1}";
11753 return "vmovdqa\t{%x1, %0|%0, %x1}";
11760 gcc_unreachable ();
11762 [(set_attr "type" "ssemov")
11763 (set_attr "prefix" "vex")
11764 (set_attr "mode" "<avxvecmode>")
11765 (set (attr "length")
11766 (if_then_else (eq_attr "alternative" "0")
11768 (const_string "*")))])
11770 (define_expand "vec_init<mode>"
11771 [(match_operand:AVX256MODE 0 "register_operand" "")
11772 (match_operand 1 "" "")]
11775 ix86_expand_vector_init (false, operands[0], operands[1]);
11779 (define_insn "*vec_concat<mode>_avx"
11780 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11781 (vec_concat:AVX256MODE
11782 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11783 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11786 switch (which_alternative)
11789 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11791 switch (get_attr_mode (insn))
11794 return "vmovaps\t{%1, %x0|%x0, %1}";
11796 return "vmovapd\t{%1, %x0|%x0, %1}";
11798 return "vmovdqa\t{%1, %x0|%x0, %1}";
11801 gcc_unreachable ();
11804 [(set_attr "type" "sselog,ssemov")
11805 (set_attr "prefix" "vex")
11806 (set_attr "mode" "<avxvecmode>")])