1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Modes handled by integer vcond pattern
68 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
69 (V2DI "TARGET_SSE4_2 || TARGET_SSE5")])
71 ;; Mapping from float mode to required SSE level
72 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
74 ;; Mapping from integer vector mode to mnemonic suffix
75 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
77 ;; Mapping of the sse5 suffix
78 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
79 (V4SF "ps") (V2DF "pd")])
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
81 (V4SF "ss") (V2DF "sd")])
82 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
84 ;; Mapping of the max integer size for sse5 rotate immediate constraint
85 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
87 ;; Mapping of vector modes back to the scalar modes
88 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
89 (V16QI "QI") (V8HI "HI")
90 (V4SI "SI") (V2DI "DI")])
92 ;; Mapping of vector modes to a vector mode of double size
93 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
94 (V4SF "V8SF") (V4SI "V8SI")])
96 ;; Number of scalar elements in each vector type
97 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
98 (V16QI "16") (V8HI "8")
99 (V4SI "4") (V2DI "2")])
102 (define_mode_attr avxvecmode
103 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
104 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
105 (V8SF "V8SF") (V4DF "V4DF")])
106 (define_mode_attr avxvecpsmode
107 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
108 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
109 (define_mode_attr avxhalfvecmode
110 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
111 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
112 (define_mode_attr avxscalarmode
113 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
114 (V8SF "SF") (V4DF "DF")])
115 (define_mode_attr avxcvtvecmode
116 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
117 (define_mode_attr avxpermvecmode
118 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
119 (define_mode_attr avxmodesuffixf2c
120 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
121 (define_mode_attr avxmodesuffixp
122 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
124 (define_mode_attr avxmodesuffixs
125 [(V16QI "b") (V8HI "w") (V4SI "d")])
126 (define_mode_attr avxmodesuffix
127 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
128 (V8SI "256") (V8SF "256") (V4DF "256")])
130 ;; Mapping of immediate bits for blend instructions
131 (define_mode_attr blendbits
132 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
134 ;; Mapping of immediate bits for vpermil instructions
135 (define_mode_attr vpermilbits
136 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
138 ;; Mapping of immediate bits for pinsr instructions
139 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
141 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
149 (define_expand "mov<mode>"
150 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
151 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
154 ix86_expand_vector_move (<MODE>mode, operands);
158 (define_insn "*avx_mov<mode>_internal"
159 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
160 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
162 && (register_operand (operands[0], <MODE>mode)
163 || register_operand (operands[1], <MODE>mode))"
165 switch (which_alternative)
168 return standard_sse_constant_opcode (insn, operands[1]);
171 switch (get_attr_mode (insn))
175 return "vmovaps\t{%1, %0|%0, %1}";
178 return "vmovapd\t{%1, %0|%0, %1}";
180 return "vmovdqa\t{%1, %0|%0, %1}";
186 [(set_attr "type" "sselog1,ssemov,ssemov")
187 (set_attr "prefix" "vex")
188 (set_attr "mode" "<avxvecmode>")])
190 ;; All of these patterns are enabled for SSE1 as well as SSE2.
191 ;; This is essential for maintaining stable calling conventions.
193 (define_expand "mov<mode>"
194 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
195 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
198 ix86_expand_vector_move (<MODE>mode, operands);
202 (define_insn "*mov<mode>_internal"
203 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
204 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
206 && (register_operand (operands[0], <MODE>mode)
207 || register_operand (operands[1], <MODE>mode))"
209 switch (which_alternative)
212 return standard_sse_constant_opcode (insn, operands[1]);
215 switch (get_attr_mode (insn))
218 return "movaps\t{%1, %0|%0, %1}";
220 return "movapd\t{%1, %0|%0, %1}";
222 return "movdqa\t{%1, %0|%0, %1}";
228 [(set_attr "type" "sselog1,ssemov,ssemov")
230 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
231 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
232 (and (eq_attr "alternative" "2")
233 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
235 (const_string "V4SF")
236 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
237 (const_string "V4SF")
238 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
239 (const_string "V2DF")
241 (const_string "TI")))])
243 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
244 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
245 ;; from memory, we'd prefer to load the memory directly into the %xmm
246 ;; register. To facilitate this happy circumstance, this pattern won't
247 ;; split until after register allocation. If the 64-bit value didn't
248 ;; come from memory, this is the best we can do. This is much better
249 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
252 (define_insn_and_split "movdi_to_sse"
254 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
255 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
256 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
257 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
259 "&& reload_completed"
262 if (register_operand (operands[1], DImode))
264 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
265 Assemble the 64-bit DImode value in an xmm register. */
266 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
267 gen_rtx_SUBREG (SImode, operands[1], 0)));
268 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
269 gen_rtx_SUBREG (SImode, operands[1], 4)));
270 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
272 else if (memory_operand (operands[1], DImode))
273 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
279 [(set (match_operand:V4SF 0 "register_operand" "")
280 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
281 "TARGET_SSE && reload_completed"
284 (vec_duplicate:V4SF (match_dup 1))
288 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
289 operands[2] = CONST0_RTX (V4SFmode);
293 [(set (match_operand:V2DF 0 "register_operand" "")
294 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
295 "TARGET_SSE2 && reload_completed"
296 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
298 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
299 operands[2] = CONST0_RTX (DFmode);
302 (define_expand "push<mode>1"
303 [(match_operand:AVX256MODE 0 "register_operand" "")]
306 ix86_expand_push (<MODE>mode, operands[0]);
310 (define_expand "push<mode>1"
311 [(match_operand:SSEMODE 0 "register_operand" "")]
314 ix86_expand_push (<MODE>mode, operands[0]);
318 (define_expand "movmisalign<mode>"
319 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
320 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
323 ix86_expand_vector_move_misalign (<MODE>mode, operands);
327 (define_expand "movmisalign<mode>"
328 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
329 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
332 ix86_expand_vector_move_misalign (<MODE>mode, operands);
336 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
337 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
339 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
341 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
343 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
344 [(set_attr "type" "ssemov")
345 (set_attr "prefix" "vex")
346 (set_attr "mode" "<MODE>")])
348 (define_insn "sse2_movq128"
349 [(set (match_operand:V2DI 0 "register_operand" "=x")
352 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
353 (parallel [(const_int 0)]))
356 "%vmovq\t{%1, %0|%0, %1}"
357 [(set_attr "type" "ssemov")
358 (set_attr "prefix" "maybe_vex")
359 (set_attr "mode" "TI")])
361 (define_insn "<sse>_movup<ssemodesuffixf2c>"
362 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
364 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
366 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
367 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
368 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
369 [(set_attr "type" "ssemov")
370 (set_attr "mode" "<MODE>")])
372 (define_insn "avx_movdqu<avxmodesuffix>"
373 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
375 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
377 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
378 "vmovdqu\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "vex")
381 (set_attr "mode" "<avxvecmode>")])
383 (define_insn "sse2_movdqu"
384 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
385 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
387 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
388 "movdqu\t{%1, %0|%0, %1}"
389 [(set_attr "type" "ssemov")
390 (set_attr "prefix_data16" "1")
391 (set_attr "mode" "TI")])
393 (define_insn "avx_movnt<mode>"
394 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
396 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
398 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
399 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
400 [(set_attr "type" "ssemov")
401 (set_attr "prefix" "vex")
402 (set_attr "mode" "<MODE>")])
404 (define_insn "<sse>_movnt<mode>"
405 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
407 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
409 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
410 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
411 [(set_attr "type" "ssemov")
412 (set_attr "mode" "<MODE>")])
414 (define_insn "avx_movnt<mode>"
415 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
417 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
420 "vmovntdq\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssecvt")
422 (set_attr "prefix" "vex")
423 (set_attr "mode" "<avxvecmode>")])
425 (define_insn "sse2_movntv2di"
426 [(set (match_operand:V2DI 0 "memory_operand" "=m")
427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
430 "movntdq\t{%1, %0|%0, %1}"
431 [(set_attr "type" "ssecvt")
432 (set_attr "prefix_data16" "1")
433 (set_attr "mode" "TI")])
435 (define_insn "sse2_movntsi"
436 [(set (match_operand:SI 0 "memory_operand" "=m")
437 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
440 "movnti\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssecvt")
442 (set_attr "mode" "V2DF")])
444 (define_insn "avx_lddqu<avxmodesuffix>"
445 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
447 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
450 "vlddqu\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssecvt")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse3_lddqu"
456 [(set (match_operand:V16QI 0 "register_operand" "=x")
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
460 "lddqu\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssecvt")
462 (set_attr "prefix_rep" "1")
463 (set_attr "mode" "TI")])
465 ; Expand patterns for non-temporal stores. At the moment, only those
466 ; that directly map to insns are defined; it would be possible to
467 ; define patterns for other modes that would expand to several insns.
469 (define_expand "storent<mode>"
470 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
472 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
474 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
477 (define_expand "storent<mode>"
478 [(set (match_operand:MODEF 0 "memory_operand" "")
480 [(match_operand:MODEF 1 "register_operand" "")]
485 (define_expand "storentv2di"
486 [(set (match_operand:V2DI 0 "memory_operand" "")
487 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
492 (define_expand "storentsi"
493 [(set (match_operand:SI 0 "memory_operand" "")
494 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 ;; Parallel floating point arithmetic
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 (define_expand "<code><mode>2"
506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
508 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
509 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
510 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
512 (define_expand "<plusminus_insn><mode>3"
513 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
514 (plusminus:AVX256MODEF2P
515 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
516 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
517 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
518 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
520 (define_insn "*avx_<plusminus_insn><mode>3"
521 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
522 (plusminus:AVXMODEF2P
523 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
524 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
525 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
526 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
527 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
528 [(set_attr "type" "sseadd")
529 (set_attr "prefix" "vex")
530 (set_attr "mode" "<avxvecmode>")])
532 (define_expand "<plusminus_insn><mode>3"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
534 (plusminus:SSEMODEF2P
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
538 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
540 (define_insn "*<plusminus_insn><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
542 (plusminus:SSEMODEF2P
543 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
546 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
547 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
548 [(set_attr "type" "sseadd")
549 (set_attr "mode" "<MODE>")])
551 (define_insn "*avx_vm<plusminus_insn><mode>3"
552 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
553 (vec_merge:SSEMODEF2P
554 (plusminus:SSEMODEF2P
555 (match_operand:SSEMODEF2P 1 "register_operand" "x")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
559 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
560 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<ssescalarmode>")])
565 (define_insn "<sse>_vm<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
567 (vec_merge:SSEMODEF2P
568 (plusminus:SSEMODEF2P
569 (match_operand:SSEMODEF2P 1 "register_operand" "0")
570 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
573 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
574 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
575 [(set_attr "type" "sseadd")
576 (set_attr "mode" "<ssescalarmode>")])
578 (define_expand "mul<mode>3"
579 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
581 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
582 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
583 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
584 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
586 (define_insn "*avx_mul<mode>3"
587 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
589 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
590 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
591 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
592 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
593 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "ssemul")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<avxvecmode>")])
598 (define_expand "mul<mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
601 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
602 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
603 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
604 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
606 (define_insn "*mul<mode>3"
607 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
609 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
610 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
611 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
612 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
613 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
614 [(set_attr "type" "ssemul")
615 (set_attr "mode" "<MODE>")])
617 (define_insn "*avx_vmmul<mode>3"
618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
619 (vec_merge:SSEMODEF2P
621 (match_operand:SSEMODEF2P 1 "register_operand" "x")
622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
625 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
626 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<ssescalarmode>")])
631 (define_insn "<sse>_vmmul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
633 (vec_merge:SSEMODEF2P
635 (match_operand:SSEMODEF2P 1 "register_operand" "0")
636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
639 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
640 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
641 [(set_attr "type" "ssemul")
642 (set_attr "mode" "<ssescalarmode>")])
644 (define_expand "divv8sf3"
645 [(set (match_operand:V8SF 0 "register_operand" "")
646 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
647 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
650 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
652 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
653 && flag_finite_math_only && !flag_trapping_math
654 && flag_unsafe_math_optimizations)
656 ix86_emit_swdivsf (operands[0], operands[1],
657 operands[2], V8SFmode);
662 (define_expand "divv4df3"
663 [(set (match_operand:V4DF 0 "register_operand" "")
664 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
665 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
667 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
669 (define_insn "avx_div<mode>3"
670 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
672 (match_operand:AVXMODEF2P 1 "register_operand" "x")
673 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
674 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
675 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
676 [(set_attr "type" "ssediv")
677 (set_attr "prefix" "vex")
678 (set_attr "mode" "<MODE>")])
680 (define_expand "divv4sf3"
681 [(set (match_operand:V4SF 0 "register_operand" "")
682 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
683 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
686 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
687 && flag_finite_math_only && !flag_trapping_math
688 && flag_unsafe_math_optimizations)
690 ix86_emit_swdivsf (operands[0], operands[1],
691 operands[2], V4SFmode);
696 (define_expand "divv2df3"
697 [(set (match_operand:V2DF 0 "register_operand" "")
698 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
703 (define_insn "*avx_div<mode>3"
704 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
706 (match_operand:SSEMODEF2P 1 "register_operand" "x")
707 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
708 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
710 [(set_attr "type" "ssediv")
711 (set_attr "prefix" "vex")
712 (set_attr "mode" "<MODE>")])
714 (define_insn "<sse>_div<mode>3"
715 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
717 (match_operand:SSEMODEF2P 1 "register_operand" "0")
718 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
719 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
721 [(set_attr "type" "ssediv")
722 (set_attr "mode" "<MODE>")])
724 (define_insn "*avx_vmdiv<mode>3"
725 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
726 (vec_merge:SSEMODEF2P
728 (match_operand:SSEMODEF2P 1 "register_operand" "x")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
732 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
733 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
734 [(set_attr "type" "ssediv")
735 (set_attr "prefix" "vex")
736 (set_attr "mode" "<ssescalarmode>")])
738 (define_insn "<sse>_vmdiv<mode>3"
739 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
740 (vec_merge:SSEMODEF2P
742 (match_operand:SSEMODEF2P 1 "register_operand" "0")
743 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
746 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
747 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
748 [(set_attr "type" "ssediv")
749 (set_attr "mode" "<ssescalarmode>")])
751 (define_insn "avx_rcpv8sf2"
752 [(set (match_operand:V8SF 0 "register_operand" "=x")
754 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
756 "vrcpps\t{%1, %0|%0, %1}"
757 [(set_attr "type" "sse")
758 (set_attr "prefix" "vex")
759 (set_attr "mode" "V8SF")])
761 (define_insn "sse_rcpv4sf2"
762 [(set (match_operand:V4SF 0 "register_operand" "=x")
764 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
766 "%vrcpps\t{%1, %0|%0, %1}"
767 [(set_attr "type" "sse")
768 (set_attr "prefix" "maybe_vex")
769 (set_attr "mode" "V4SF")])
771 (define_insn "*avx_vmrcpv4sf2"
772 [(set (match_operand:V4SF 0 "register_operand" "=x")
774 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
776 (match_operand:V4SF 2 "register_operand" "x")
779 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
780 [(set_attr "type" "sse")
781 (set_attr "prefix" "vex")
782 (set_attr "mode" "SF")])
784 (define_insn "sse_vmrcpv4sf2"
785 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
789 (match_operand:V4SF 2 "register_operand" "0")
792 "rcpss\t{%1, %0|%0, %1}"
793 [(set_attr "type" "sse")
794 (set_attr "mode" "SF")])
796 (define_expand "sqrtv8sf2"
797 [(set (match_operand:V8SF 0 "register_operand" "")
798 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
801 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
802 && flag_finite_math_only && !flag_trapping_math
803 && flag_unsafe_math_optimizations)
805 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
810 (define_insn "avx_sqrtv8sf2"
811 [(set (match_operand:V8SF 0 "register_operand" "=x")
812 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
814 "vsqrtps\t{%1, %0|%0, %1}"
815 [(set_attr "type" "sse")
816 (set_attr "prefix" "vex")
817 (set_attr "mode" "V8SF")])
819 (define_expand "sqrtv4sf2"
820 [(set (match_operand:V4SF 0 "register_operand" "")
821 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
824 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
825 && flag_finite_math_only && !flag_trapping_math
826 && flag_unsafe_math_optimizations)
828 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
833 (define_insn "sse_sqrtv4sf2"
834 [(set (match_operand:V4SF 0 "register_operand" "=x")
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
837 "%vsqrtps\t{%1, %0|%0, %1}"
838 [(set_attr "type" "sse")
839 (set_attr "prefix" "maybe_vex")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sqrtv4df2"
843 [(set (match_operand:V4DF 0 "register_operand" "=x")
844 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
846 "vsqrtpd\t{%1, %0|%0, %1}"
847 [(set_attr "type" "sse")
848 (set_attr "prefix" "vex")
849 (set_attr "mode" "V4DF")])
851 (define_insn "sqrtv2df2"
852 [(set (match_operand:V2DF 0 "register_operand" "=x")
853 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
855 "%vsqrtpd\t{%1, %0|%0, %1}"
856 [(set_attr "type" "sse")
857 (set_attr "prefix" "maybe_vex")
858 (set_attr "mode" "V2DF")])
860 (define_insn "*avx_vmsqrt<mode>2"
861 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
862 (vec_merge:SSEMODEF2P
864 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
865 (match_operand:SSEMODEF2P 2 "register_operand" "x")
867 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
868 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "prefix" "vex")
871 (set_attr "mode" "<ssescalarmode>")])
873 (define_insn "<sse>_vmsqrt<mode>2"
874 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
875 (vec_merge:SSEMODEF2P
877 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
878 (match_operand:SSEMODEF2P 2 "register_operand" "0")
880 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
881 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse")
883 (set_attr "mode" "<ssescalarmode>")])
885 (define_expand "rsqrtv8sf2"
886 [(set (match_operand:V8SF 0 "register_operand" "")
888 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
889 "TARGET_AVX && TARGET_SSE_MATH"
891 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
895 (define_insn "avx_rsqrtv8sf2"
896 [(set (match_operand:V8SF 0 "register_operand" "=x")
898 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
900 "vrsqrtps\t{%1, %0|%0, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "V8SF")])
905 (define_expand "rsqrtv4sf2"
906 [(set (match_operand:V4SF 0 "register_operand" "")
908 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
911 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
915 (define_insn "sse_rsqrtv4sf2"
916 [(set (match_operand:V4SF 0 "register_operand" "=x")
918 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
920 "%vrsqrtps\t{%1, %0|%0, %1}"
921 [(set_attr "type" "sse")
922 (set_attr "prefix" "maybe_vex")
923 (set_attr "mode" "V4SF")])
925 (define_insn "*avx_vmrsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x")
928 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
930 (match_operand:V4SF 2 "register_operand" "x")
933 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "SF")])
938 (define_insn "sse_vmrsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "=x")
941 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
943 (match_operand:V4SF 2 "register_operand" "0")
946 "rsqrtss\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sse")
948 (set_attr "mode" "SF")])
950 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
951 ;; isn't really correct, as those rtl operators aren't defined when
952 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
954 (define_expand "<code><mode>3"
955 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
956 (smaxmin:AVX256MODEF2P
957 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
958 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
959 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
961 if (!flag_finite_math_only)
962 operands[1] = force_reg (<MODE>mode, operands[1]);
963 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
966 (define_expand "<code><mode>3"
967 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
969 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
970 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
971 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
973 if (!flag_finite_math_only)
974 operands[1] = force_reg (<MODE>mode, operands[1]);
975 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
978 (define_insn "*avx_<code><mode>3_finite"
979 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
981 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
982 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
983 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
985 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
986 [(set_attr "type" "sseadd")
987 (set_attr "prefix" "vex")
988 (set_attr "mode" "<MODE>")])
990 (define_insn "*<code><mode>3_finite"
991 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
993 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
994 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
995 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
996 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
997 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
998 [(set_attr "type" "sseadd")
999 (set_attr "mode" "<MODE>")])
1001 (define_insn "*avx_<code><mode>3"
1002 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1004 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1005 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1006 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1007 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1008 [(set_attr "type" "sseadd")
1009 (set_attr "prefix" "vex")
1010 (set_attr "mode" "<avxvecmode>")])
1012 (define_insn "*<code><mode>3"
1013 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1015 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1016 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1017 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1018 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "mode" "<MODE>")])
1022 (define_insn "*avx_vm<code><mode>3"
1023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1024 (vec_merge:SSEMODEF2P
1026 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1030 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1031 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1032 [(set_attr "type" "sse")
1033 (set_attr "prefix" "vex")
1034 (set_attr "mode" "<ssescalarmode>")])
1036 (define_insn "<sse>_vm<code><mode>3"
1037 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1038 (vec_merge:SSEMODEF2P
1040 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1041 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1044 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1045 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1046 [(set_attr "type" "sse")
1047 (set_attr "mode" "<ssescalarmode>")])
1049 ;; These versions of the min/max patterns implement exactly the operations
1050 ;; min = (op1 < op2 ? op1 : op2)
1051 ;; max = (!(op1 < op2) ? op1 : op2)
1052 ;; Their operands are not commutative, and thus they may be used in the
1053 ;; presence of -0.0 and NaN.
1055 (define_insn "*avx_ieee_smin<mode>3"
1056 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1058 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1059 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1061 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1062 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1063 [(set_attr "type" "sseadd")
1064 (set_attr "prefix" "vex")
1065 (set_attr "mode" "<avxvecmode>")])
1067 (define_insn "*avx_ieee_smax<mode>3"
1068 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1070 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1071 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1073 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1074 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1075 [(set_attr "type" "sseadd")
1076 (set_attr "prefix" "vex")
1077 (set_attr "mode" "<avxvecmode>")])
1079 (define_insn "*ieee_smin<mode>3"
1080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1082 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1083 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1085 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1086 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1087 [(set_attr "type" "sseadd")
1088 (set_attr "mode" "<MODE>")])
1090 (define_insn "*ieee_smax<mode>3"
1091 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1093 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1094 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1096 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1097 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1098 [(set_attr "type" "sseadd")
1099 (set_attr "mode" "<MODE>")])
1101 (define_insn "avx_addsubv8sf3"
1102 [(set (match_operand:V8SF 0 "register_operand" "=x")
1105 (match_operand:V8SF 1 "register_operand" "x")
1106 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1107 (minus:V8SF (match_dup 1) (match_dup 2))
1110 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sseadd")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "V8SF")])
1115 (define_insn "avx_addsubv4df3"
1116 [(set (match_operand:V4DF 0 "register_operand" "=x")
1119 (match_operand:V4DF 1 "register_operand" "x")
1120 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1121 (minus:V4DF (match_dup 1) (match_dup 2))
1124 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1125 [(set_attr "type" "sseadd")
1126 (set_attr "prefix" "vex")
1127 (set_attr "mode" "V4DF")])
1129 (define_insn "*avx_addsubv4sf3"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1133 (match_operand:V4SF 1 "register_operand" "x")
1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1135 (minus:V4SF (match_dup 1) (match_dup 2))
1138 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "type" "sseadd")
1140 (set_attr "prefix" "vex")
1141 (set_attr "mode" "V4SF")])
1143 (define_insn "sse3_addsubv4sf3"
1144 [(set (match_operand:V4SF 0 "register_operand" "=x")
1147 (match_operand:V4SF 1 "register_operand" "0")
1148 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1149 (minus:V4SF (match_dup 1) (match_dup 2))
1152 "addsubps\t{%2, %0|%0, %2}"
1153 [(set_attr "type" "sseadd")
1154 (set_attr "prefix_rep" "1")
1155 (set_attr "mode" "V4SF")])
1157 (define_insn "*avx_addsubv2df3"
1158 [(set (match_operand:V2DF 0 "register_operand" "=x")
1161 (match_operand:V2DF 1 "register_operand" "x")
1162 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1163 (minus:V2DF (match_dup 1) (match_dup 2))
1166 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1167 [(set_attr "type" "sseadd")
1168 (set_attr "prefix" "vex")
1169 (set_attr "mode" "V2DF")])
1171 (define_insn "sse3_addsubv2df3"
1172 [(set (match_operand:V2DF 0 "register_operand" "=x")
1175 (match_operand:V2DF 1 "register_operand" "0")
1176 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1177 (minus:V2DF (match_dup 1) (match_dup 2))
1180 "addsubpd\t{%2, %0|%0, %2}"
1181 [(set_attr "type" "sseadd")
1182 (set_attr "mode" "V2DF")])
1184 (define_insn "avx_h<plusminus_insn>v4df3"
1185 [(set (match_operand:V4DF 0 "register_operand" "=x")
1190 (match_operand:V4DF 1 "register_operand" "x")
1191 (parallel [(const_int 0)]))
1192 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1194 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1195 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1199 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1200 (parallel [(const_int 0)]))
1201 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1203 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1204 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1206 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1207 [(set_attr "type" "sseadd")
1208 (set_attr "prefix" "vex")
1209 (set_attr "mode" "V4DF")])
1211 (define_insn "avx_h<plusminus_insn>v8sf3"
1212 [(set (match_operand:V8SF 0 "register_operand" "=x")
1218 (match_operand:V8SF 1 "register_operand" "x")
1219 (parallel [(const_int 0)]))
1220 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1222 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1223 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1227 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1228 (parallel [(const_int 0)]))
1229 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1231 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1232 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1237 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1239 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1240 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1243 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1249 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1250 [(set_attr "type" "sseadd")
1251 (set_attr "prefix" "vex")
1252 (set_attr "mode" "V8SF")])
1254 (define_insn "*avx_h<plusminus_insn>v4sf3"
1255 [(set (match_operand:V4SF 0 "register_operand" "=x")
1260 (match_operand:V4SF 1 "register_operand" "x")
1261 (parallel [(const_int 0)]))
1262 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1264 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1265 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1269 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1270 (parallel [(const_int 0)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1273 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1274 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1276 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1277 [(set_attr "type" "sseadd")
1278 (set_attr "prefix" "vex")
1279 (set_attr "mode" "V4SF")])
1281 (define_insn "sse3_h<plusminus_insn>v4sf3"
1282 [(set (match_operand:V4SF 0 "register_operand" "=x")
1287 (match_operand:V4SF 1 "register_operand" "0")
1288 (parallel [(const_int 0)]))
1289 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1291 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1292 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1297 (parallel [(const_int 0)]))
1298 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1300 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1301 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1303 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1304 [(set_attr "type" "sseadd")
1305 (set_attr "prefix_rep" "1")
1306 (set_attr "mode" "V4SF")])
1308 (define_insn "*avx_h<plusminus_insn>v2df3"
1309 [(set (match_operand:V2DF 0 "register_operand" "=x")
1313 (match_operand:V2DF 1 "register_operand" "x")
1314 (parallel [(const_int 0)]))
1315 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1318 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1319 (parallel [(const_int 0)]))
1320 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1322 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1323 [(set_attr "type" "sseadd")
1324 (set_attr "prefix" "vex")
1325 (set_attr "mode" "V2DF")])
1327 (define_insn "sse3_h<plusminus_insn>v2df3"
1328 [(set (match_operand:V2DF 0 "register_operand" "=x")
1332 (match_operand:V2DF 1 "register_operand" "0")
1333 (parallel [(const_int 0)]))
1334 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1337 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1338 (parallel [(const_int 0)]))
1339 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1341 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1342 [(set_attr "type" "sseadd")
1343 (set_attr "mode" "V2DF")])
1345 (define_expand "reduc_splus_v4sf"
1346 [(match_operand:V4SF 0 "register_operand" "")
1347 (match_operand:V4SF 1 "register_operand" "")]
1352 rtx tmp = gen_reg_rtx (V4SFmode);
1353 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1354 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1357 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1361 (define_expand "reduc_splus_v2df"
1362 [(match_operand:V2DF 0 "register_operand" "")
1363 (match_operand:V2DF 1 "register_operand" "")]
1366 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1370 (define_expand "reduc_smax_v4sf"
1371 [(match_operand:V4SF 0 "register_operand" "")
1372 (match_operand:V4SF 1 "register_operand" "")]
1375 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1379 (define_expand "reduc_smin_v4sf"
1380 [(match_operand:V4SF 0 "register_operand" "")
1381 (match_operand:V4SF 1 "register_operand" "")]
1384 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 ;; Parallel floating point comparisons
1392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1394 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1395 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1397 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1398 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1402 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp")
1404 (set_attr "prefix" "vex")
1405 (set_attr "mode" "<MODE>")])
1407 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1408 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1409 (vec_merge:SSEMODEF2P
1411 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1412 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1413 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1418 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1419 [(set_attr "type" "ssecmp")
1420 (set_attr "prefix" "vex")
1421 (set_attr "mode" "<ssescalarmode>")])
1423 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1424 ;; may generate 256bit vector compare instructions.
1425 (define_insn "*avx_maskcmp<mode>3"
1426 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1427 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1428 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1429 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1430 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1431 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1432 [(set_attr "type" "ssecmp")
1433 (set_attr "prefix" "vex")
1434 (set_attr "mode" "<avxvecmode>")])
1436 (define_insn "<sse>_maskcmp<mode>3"
1437 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1438 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1439 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1440 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1441 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1443 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1444 [(set_attr "type" "ssecmp")
1445 (set_attr "mode" "<MODE>")])
1447 (define_insn "<sse>_vmmaskcmp<mode>3"
1448 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1449 (vec_merge:SSEMODEF2P
1450 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1451 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1452 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1455 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1456 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1457 [(set_attr "type" "ssecmp")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 (define_insn "<sse>_comi"
1461 [(set (reg:CCFP FLAGS_REG)
1464 (match_operand:<ssevecmode> 0 "register_operand" "x")
1465 (parallel [(const_int 0)]))
1467 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1468 (parallel [(const_int 0)]))))]
1469 "SSE_FLOAT_MODE_P (<MODE>mode)"
1470 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1471 [(set_attr "type" "ssecomi")
1472 (set_attr "prefix" "maybe_vex")
1473 (set_attr "mode" "<MODE>")])
1475 (define_insn "<sse>_ucomi"
1476 [(set (reg:CCFPU FLAGS_REG)
1479 (match_operand:<ssevecmode> 0 "register_operand" "x")
1480 (parallel [(const_int 0)]))
1482 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1483 (parallel [(const_int 0)]))))]
1484 "SSE_FLOAT_MODE_P (<MODE>mode)"
1485 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1486 [(set_attr "type" "ssecomi")
1487 (set_attr "prefix" "maybe_vex")
1488 (set_attr "mode" "<MODE>")])
1490 (define_expand "vcond<mode>"
1491 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1492 (if_then_else:SSEMODEF2P
1493 (match_operator 3 ""
1494 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1495 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1496 (match_operand:SSEMODEF2P 1 "general_operand" "")
1497 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1498 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1500 bool ok = ix86_expand_fp_vcond (operands);
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1507 ;; Parallel floating point logical operations
1509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1511 (define_insn "avx_andnot<mode>3"
1512 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1515 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1516 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1517 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1518 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1519 [(set_attr "type" "sselog")
1520 (set_attr "prefix" "vex")
1521 (set_attr "mode" "<avxvecmode>")])
1523 (define_insn "<sse>_andnot<mode>3"
1524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1527 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1528 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1529 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1530 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1531 [(set_attr "type" "sselog")
1532 (set_attr "mode" "<MODE>")])
1534 (define_expand "<code><mode>3"
1535 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1536 (plogic:AVX256MODEF2P
1537 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1538 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1539 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1542 (define_insn "*avx_<code><mode>3"
1543 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1545 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1546 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1547 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1549 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1550 [(set_attr "type" "sselog")
1551 (set_attr "prefix" "vex")
1552 (set_attr "mode" "<avxvecmode>")])
1554 (define_expand "<code><mode>3"
1555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1557 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1559 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1562 (define_insn "*<code><mode>3"
1563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1567 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1568 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1569 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1570 [(set_attr "type" "sselog")
1571 (set_attr "mode" "<MODE>")])
1573 ;; Also define scalar versions. These are used for abs, neg, and
1574 ;; conditional move. Using subregs into vector modes causes register
1575 ;; allocation lossage. These patterns do not allow memory operands
1576 ;; because the native instructions read the full 128-bits.
1578 (define_insn "*avx_andnot<mode>3"
1579 [(set (match_operand:MODEF 0 "register_operand" "=x")
1582 (match_operand:MODEF 1 "register_operand" "x"))
1583 (match_operand:MODEF 2 "register_operand" "x")))]
1584 "AVX_FLOAT_MODE_P (<MODE>mode)"
1585 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1586 [(set_attr "type" "sselog")
1587 (set_attr "prefix" "vex")
1588 (set_attr "mode" "<ssevecmode>")])
1590 (define_insn "*andnot<mode>3"
1591 [(set (match_operand:MODEF 0 "register_operand" "=x")
1594 (match_operand:MODEF 1 "register_operand" "0"))
1595 (match_operand:MODEF 2 "register_operand" "x")))]
1596 "SSE_FLOAT_MODE_P (<MODE>mode)"
1597 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1598 [(set_attr "type" "sselog")
1599 (set_attr "mode" "<ssevecmode>")])
1601 (define_insn "*avx_<code><mode>3"
1602 [(set (match_operand:MODEF 0 "register_operand" "=x")
1604 (match_operand:MODEF 1 "register_operand" "x")
1605 (match_operand:MODEF 2 "register_operand" "x")))]
1606 "AVX_FLOAT_MODE_P (<MODE>mode)"
1607 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1608 [(set_attr "type" "sselog")
1609 (set_attr "prefix" "vex")
1610 (set_attr "mode" "<ssevecmode>")])
1612 (define_insn "*<code><mode>3"
1613 [(set (match_operand:MODEF 0 "register_operand" "=x")
1615 (match_operand:MODEF 1 "register_operand" "0")
1616 (match_operand:MODEF 2 "register_operand" "x")))]
1617 "SSE_FLOAT_MODE_P (<MODE>mode)"
1618 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1619 [(set_attr "type" "sselog")
1620 (set_attr "mode" "<ssevecmode>")])
1622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1624 ;; SSE5 floating point multiply/accumulate instructions This includes the
1625 ;; scalar version of the instructions as well as the vector
1627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1629 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1630 ;; combine to generate a multiply/add with two memory references. We then
1631 ;; split this insn, into loading up the destination register with one of the
1632 ;; memory operations. If we don't manage to split the insn, reload will
1633 ;; generate the appropriate moves. The reason this is needed, is that combine
1634 ;; has already folded one of the memory references into both the multiply and
1635 ;; add insns, and it can't generate a new pseudo. I.e.:
1636 ;; (set (reg1) (mem (addr1)))
1637 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1638 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1640 (define_insn "sse5_fmadd<mode>4"
1641 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1644 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1645 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1646 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1647 "TARGET_SSE5 && TARGET_FUSED_MADD
1648 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1649 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1650 [(set_attr "type" "ssemuladd")
1651 (set_attr "mode" "<MODE>")])
1653 ;; Split fmadd with two memory operands into a load and the fmadd.
1655 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1658 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1659 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1660 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1662 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1664 && !reg_mentioned_p (operands[0], operands[1])
1665 && !reg_mentioned_p (operands[0], operands[2])
1666 && !reg_mentioned_p (operands[0], operands[3])"
1669 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1670 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1671 operands[2], operands[3]));
1675 ;; For the scalar operations, use operand1 for the upper words that aren't
1676 ;; modified, so restrict the forms that are generated.
1677 ;; Scalar version of fmadd
1678 (define_insn "sse5_vmfmadd<mode>4"
1679 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1680 (vec_merge:SSEMODEF2P
1683 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1684 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1685 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1688 "TARGET_SSE5 && TARGET_FUSED_MADD
1689 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1690 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1691 [(set_attr "type" "ssemuladd")
1692 (set_attr "mode" "<MODE>")])
1694 ;; Floating multiply and subtract
1695 ;; Allow two memory operands the same as fmadd
1696 (define_insn "sse5_fmsub<mode>4"
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1703 "TARGET_SSE5 && TARGET_FUSED_MADD
1704 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1705 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1706 [(set_attr "type" "ssemuladd")
1707 (set_attr "mode" "<MODE>")])
1709 ;; Split fmsub with two memory operands into a load and the fmsub.
1711 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1714 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1715 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1716 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1718 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1719 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1720 && !reg_mentioned_p (operands[0], operands[1])
1721 && !reg_mentioned_p (operands[0], operands[2])
1722 && !reg_mentioned_p (operands[0], operands[3])"
1725 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1726 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1727 operands[2], operands[3]));
1731 ;; For the scalar operations, use operand1 for the upper words that aren't
1732 ;; modified, so restrict the forms that are generated.
1733 ;; Scalar version of fmsub
1734 (define_insn "sse5_vmfmsub<mode>4"
1735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1736 (vec_merge:SSEMODEF2P
1739 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1741 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1744 "TARGET_SSE5 && TARGET_FUSED_MADD
1745 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1746 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")])
1750 ;; Floating point negative multiply and add
1751 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1752 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1753 ;; Allow two memory operands to help in optimizing.
1754 (define_insn "sse5_fnmadd<mode>4"
1755 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1757 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1759 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1760 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1761 "TARGET_SSE5 && TARGET_FUSED_MADD
1762 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1763 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1764 [(set_attr "type" "ssemuladd")
1765 (set_attr "mode" "<MODE>")])
1767 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1769 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1771 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1773 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1774 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1776 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1777 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1778 && !reg_mentioned_p (operands[0], operands[1])
1779 && !reg_mentioned_p (operands[0], operands[2])
1780 && !reg_mentioned_p (operands[0], operands[3])"
1783 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1784 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1785 operands[2], operands[3]));
1789 ;; For the scalar operations, use operand1 for the upper words that aren't
1790 ;; modified, so restrict the forms that are generated.
1791 ;; Scalar version of fnmadd
1792 (define_insn "sse5_vmfnmadd<mode>4"
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1794 (vec_merge:SSEMODEF2P
1796 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1798 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1799 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1802 "TARGET_SSE5 && TARGET_FUSED_MADD
1803 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1804 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1805 [(set_attr "type" "ssemuladd")
1806 (set_attr "mode" "<MODE>")])
1808 ;; Floating point negative multiply and subtract
1809 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1810 ;; Allow 2 memory operands to help with optimization
1811 (define_insn "sse5_fnmsub<mode>4"
1812 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1816 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1817 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1818 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1819 "TARGET_SSE5 && TARGET_FUSED_MADD
1820 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1821 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1822 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")])
1825 ;; Split fnmsub with two memory operands into a load and the fmsub.
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1833 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1835 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1836 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1837 && !reg_mentioned_p (operands[0], operands[1])
1838 && !reg_mentioned_p (operands[0], operands[2])
1839 && !reg_mentioned_p (operands[0], operands[3])"
1842 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1843 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1844 operands[2], operands[3]));
1848 ;; For the scalar operations, use operand1 for the upper words that aren't
1849 ;; modified, so restrict the forms that are generated.
1850 ;; Scalar version of fnmsub
1851 (define_insn "sse5_vmfnmsub<mode>4"
1852 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1853 (vec_merge:SSEMODEF2P
1857 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1858 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1859 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1862 "TARGET_SSE5 && TARGET_FUSED_MADD
1863 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1864 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1865 [(set_attr "type" "ssemuladd")
1866 (set_attr "mode" "<MODE>")])
1868 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1869 ;; even if the user used -mno-fused-madd
1870 ;; Parallel instructions. During instruction generation, just default
1871 ;; to registers, and let combine later build the appropriate instruction.
1872 (define_expand "sse5i_fmadd<mode>4"
1873 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1877 (match_operand:SSEMODEF2P 1 "register_operand" "")
1878 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1879 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1880 UNSPEC_SSE5_INTRINSIC))]
1883 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1884 if (TARGET_FUSED_MADD)
1886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1887 operands[2], operands[3]));
1892 (define_insn "*sse5i_fmadd<mode>4"
1893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1897 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1898 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1899 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1900 UNSPEC_SSE5_INTRINSIC))]
1901 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1902 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1903 [(set_attr "type" "ssemuladd")
1904 (set_attr "mode" "<MODE>")])
1906 (define_expand "sse5i_fmsub<mode>4"
1907 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1911 (match_operand:SSEMODEF2P 1 "register_operand" "")
1912 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1913 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1914 UNSPEC_SSE5_INTRINSIC))]
1917 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1918 if (TARGET_FUSED_MADD)
1920 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1921 operands[2], operands[3]));
1926 (define_insn "*sse5i_fmsub<mode>4"
1927 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1931 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1932 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1933 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1934 UNSPEC_SSE5_INTRINSIC))]
1935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1936 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")])
1940 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1941 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1942 (define_expand "sse5i_fnmadd<mode>4"
1943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1946 (match_operand:SSEMODEF2P 3 "register_operand" "")
1948 (match_operand:SSEMODEF2P 1 "register_operand" "")
1949 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1950 UNSPEC_SSE5_INTRINSIC))]
1953 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1954 if (TARGET_FUSED_MADD)
1956 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1957 operands[2], operands[3]));
1962 (define_insn "*sse5i_fnmadd<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1966 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1968 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1969 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1970 UNSPEC_SSE5_INTRINSIC))]
1971 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1972 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1977 (define_expand "sse5i_fnmsub<mode>4"
1978 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1983 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1984 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1985 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1986 UNSPEC_SSE5_INTRINSIC))]
1989 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1990 if (TARGET_FUSED_MADD)
1992 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1993 operands[2], operands[3]));
1998 (define_insn "*sse5i_fnmsub<mode>4"
1999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2007 UNSPEC_SSE5_INTRINSIC))]
2008 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2009 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2013 ;; Scalar instructions
2014 (define_expand "sse5i_vmfmadd<mode>4"
2015 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2017 [(vec_merge:SSEMODEF2P
2020 (match_operand:SSEMODEF2P 1 "register_operand" "")
2021 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2022 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2025 UNSPEC_SSE5_INTRINSIC))]
2028 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2029 if (TARGET_FUSED_MADD)
2031 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2032 operands[2], operands[3]));
2037 ;; For the scalar operations, use operand1 for the upper words that aren't
2038 ;; modified, so restrict the forms that are accepted.
2039 (define_insn "*sse5i_vmfmadd<mode>4"
2040 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2042 [(vec_merge:SSEMODEF2P
2045 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2046 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2047 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2050 UNSPEC_SSE5_INTRINSIC))]
2051 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2052 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2053 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<ssescalarmode>")])
2056 (define_expand "sse5i_vmfmsub<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2059 [(vec_merge:SSEMODEF2P
2062 (match_operand:SSEMODEF2P 1 "register_operand" "")
2063 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2064 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2067 UNSPEC_SSE5_INTRINSIC))]
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2073 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2079 (define_insn "*sse5i_vmfmsub<mode>4"
2080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2082 [(vec_merge:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2087 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2090 UNSPEC_SSE5_INTRINSIC))]
2091 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2092 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2093 [(set_attr "type" "ssemuladd")
2094 (set_attr "mode" "<ssescalarmode>")])
2096 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2097 (define_expand "sse5i_vmfnmadd<mode>4"
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2100 [(vec_merge:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 3 "register_operand" "")
2104 (match_operand:SSEMODEF2P 1 "register_operand" "")
2105 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2108 UNSPEC_SSE5_INTRINSIC))]
2111 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2112 if (TARGET_FUSED_MADD)
2114 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2115 operands[2], operands[3]));
2120 (define_insn "*sse5i_vmfnmadd<mode>4"
2121 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2123 [(vec_merge:SSEMODEF2P
2125 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2127 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2131 UNSPEC_SSE5_INTRINSIC))]
2132 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2133 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2134 [(set_attr "type" "ssemuladd")
2135 (set_attr "mode" "<ssescalarmode>")])
2137 (define_expand "sse5i_vmfnmsub<mode>4"
2138 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2140 [(vec_merge:SSEMODEF2P
2144 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2145 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2146 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2149 UNSPEC_SSE5_INTRINSIC))]
2152 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2153 if (TARGET_FUSED_MADD)
2155 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2156 operands[2], operands[3]));
2161 (define_insn "*sse5i_vmfnmsub<mode>4"
2162 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2164 [(vec_merge:SSEMODEF2P
2168 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2169 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2170 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2173 UNSPEC_SSE5_INTRINSIC))]
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2175 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2176 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<ssescalarmode>")])
2179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2181 ;; Parallel single-precision floating point conversion operations
2183 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2185 (define_insn "sse_cvtpi2ps"
2186 [(set (match_operand:V4SF 0 "register_operand" "=x")
2189 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2190 (match_operand:V4SF 1 "register_operand" "0")
2193 "cvtpi2ps\t{%2, %0|%0, %2}"
2194 [(set_attr "type" "ssecvt")
2195 (set_attr "mode" "V4SF")])
2197 (define_insn "sse_cvtps2pi"
2198 [(set (match_operand:V2SI 0 "register_operand" "=y")
2200 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2202 (parallel [(const_int 0) (const_int 1)])))]
2204 "cvtps2pi\t{%1, %0|%0, %1}"
2205 [(set_attr "type" "ssecvt")
2206 (set_attr "unit" "mmx")
2207 (set_attr "mode" "DI")])
2209 (define_insn "sse_cvttps2pi"
2210 [(set (match_operand:V2SI 0 "register_operand" "=y")
2212 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2213 (parallel [(const_int 0) (const_int 1)])))]
2215 "cvttps2pi\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "ssecvt")
2217 (set_attr "unit" "mmx")
2218 (set_attr "mode" "SF")])
2220 (define_insn "*avx_cvtsi2ss"
2221 [(set (match_operand:V4SF 0 "register_operand" "=x")
2224 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2225 (match_operand:V4SF 1 "register_operand" "x")
2228 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2229 [(set_attr "type" "sseicvt")
2230 (set_attr "prefix" "vex")
2231 (set_attr "mode" "SF")])
2233 (define_insn "sse_cvtsi2ss"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2237 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2238 (match_operand:V4SF 1 "register_operand" "0,0")
2241 "cvtsi2ss\t{%2, %0|%0, %2}"
2242 [(set_attr "type" "sseicvt")
2243 (set_attr "athlon_decode" "vector,double")
2244 (set_attr "amdfam10_decode" "vector,double")
2245 (set_attr "mode" "SF")])
2247 (define_insn "*avx_cvtsi2ssq"
2248 [(set (match_operand:V4SF 0 "register_operand" "=x")
2251 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2252 (match_operand:V4SF 1 "register_operand" "x")
2254 "TARGET_AVX && TARGET_64BIT"
2255 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2256 [(set_attr "type" "sseicvt")
2257 (set_attr "prefix" "vex")
2258 (set_attr "mode" "SF")])
2260 (define_insn "sse_cvtsi2ssq"
2261 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2264 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2265 (match_operand:V4SF 1 "register_operand" "0,0")
2267 "TARGET_SSE && TARGET_64BIT"
2268 "cvtsi2ssq\t{%2, %0|%0, %2}"
2269 [(set_attr "type" "sseicvt")
2270 (set_attr "athlon_decode" "vector,double")
2271 (set_attr "amdfam10_decode" "vector,double")
2272 (set_attr "mode" "SF")])
2274 (define_insn "sse_cvtss2si"
2275 [(set (match_operand:SI 0 "register_operand" "=r,r")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279 (parallel [(const_int 0)]))]
2280 UNSPEC_FIX_NOTRUNC))]
2282 "%vcvtss2si\t{%1, %0|%0, %1}"
2283 [(set_attr "type" "sseicvt")
2284 (set_attr "athlon_decode" "double,vector")
2285 (set_attr "prefix_rep" "1")
2286 (set_attr "prefix" "maybe_vex")
2287 (set_attr "mode" "SI")])
2289 (define_insn "sse_cvtss2si_2"
2290 [(set (match_operand:SI 0 "register_operand" "=r,r")
2291 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2292 UNSPEC_FIX_NOTRUNC))]
2294 "%vcvtss2si\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "sseicvt")
2296 (set_attr "athlon_decode" "double,vector")
2297 (set_attr "amdfam10_decode" "double,double")
2298 (set_attr "prefix_rep" "1")
2299 (set_attr "prefix" "maybe_vex")
2300 (set_attr "mode" "SI")])
2302 (define_insn "sse_cvtss2siq"
2303 [(set (match_operand:DI 0 "register_operand" "=r,r")
2306 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2307 (parallel [(const_int 0)]))]
2308 UNSPEC_FIX_NOTRUNC))]
2309 "TARGET_SSE && TARGET_64BIT"
2310 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "sseicvt")
2312 (set_attr "athlon_decode" "double,vector")
2313 (set_attr "prefix_rep" "1")
2314 (set_attr "prefix" "maybe_vex")
2315 (set_attr "mode" "DI")])
2317 (define_insn "sse_cvtss2siq_2"
2318 [(set (match_operand:DI 0 "register_operand" "=r,r")
2319 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2320 UNSPEC_FIX_NOTRUNC))]
2321 "TARGET_SSE && TARGET_64BIT"
2322 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "sseicvt")
2324 (set_attr "athlon_decode" "double,vector")
2325 (set_attr "amdfam10_decode" "double,double")
2326 (set_attr "prefix_rep" "1")
2327 (set_attr "prefix" "maybe_vex")
2328 (set_attr "mode" "DI")])
2330 (define_insn "sse_cvttss2si"
2331 [(set (match_operand:SI 0 "register_operand" "=r,r")
2334 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2335 (parallel [(const_int 0)]))))]
2337 "%vcvttss2si\t{%1, %0|%0, %1}"
2338 [(set_attr "type" "sseicvt")
2339 (set_attr "athlon_decode" "double,vector")
2340 (set_attr "amdfam10_decode" "double,double")
2341 (set_attr "prefix_rep" "1")
2342 (set_attr "prefix" "maybe_vex")
2343 (set_attr "mode" "SI")])
2345 (define_insn "sse_cvttss2siq"
2346 [(set (match_operand:DI 0 "register_operand" "=r,r")
2349 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2350 (parallel [(const_int 0)]))))]
2351 "TARGET_SSE && TARGET_64BIT"
2352 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2353 [(set_attr "type" "sseicvt")
2354 (set_attr "athlon_decode" "double,vector")
2355 (set_attr "amdfam10_decode" "double,double")
2356 (set_attr "prefix_rep" "1")
2357 (set_attr "prefix" "maybe_vex")
2358 (set_attr "mode" "DI")])
2360 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2361 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2362 (float:AVXMODEDCVTDQ2PS
2363 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2365 "vcvtdq2ps\t{%1, %0|%0, %1}"
2366 [(set_attr "type" "ssecvt")
2367 (set_attr "prefix" "vex")
2368 (set_attr "mode" "<avxvecmode>")])
2370 (define_insn "sse2_cvtdq2ps"
2371 [(set (match_operand:V4SF 0 "register_operand" "=x")
2372 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2374 "cvtdq2ps\t{%1, %0|%0, %1}"
2375 [(set_attr "type" "ssecvt")
2376 (set_attr "mode" "V4SF")])
2378 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2379 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2380 (unspec:AVXMODEDCVTPS2DQ
2381 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2382 UNSPEC_FIX_NOTRUNC))]
2384 "vcvtps2dq\t{%1, %0|%0, %1}"
2385 [(set_attr "type" "ssecvt")
2386 (set_attr "prefix" "vex")
2387 (set_attr "mode" "<avxvecmode>")])
2389 (define_insn "sse2_cvtps2dq"
2390 [(set (match_operand:V4SI 0 "register_operand" "=x")
2391 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2392 UNSPEC_FIX_NOTRUNC))]
2394 "cvtps2dq\t{%1, %0|%0, %1}"
2395 [(set_attr "type" "ssecvt")
2396 (set_attr "prefix_data16" "1")
2397 (set_attr "mode" "TI")])
2399 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2400 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2401 (fix:AVXMODEDCVTPS2DQ
2402 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2404 "vcvttps2dq\t{%1, %0|%0, %1}"
2405 [(set_attr "type" "ssecvt")
2406 (set_attr "prefix" "vex")
2407 (set_attr "mode" "<avxvecmode>")])
2409 (define_insn "sse2_cvttps2dq"
2410 [(set (match_operand:V4SI 0 "register_operand" "=x")
2411 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2413 "cvttps2dq\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "prefix_rep" "1")
2416 (set_attr "mode" "TI")])
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;; Parallel double-precision floating point conversion operations
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424 (define_insn "sse2_cvtpi2pd"
2425 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2426 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2428 "cvtpi2pd\t{%1, %0|%0, %1}"
2429 [(set_attr "type" "ssecvt")
2430 (set_attr "unit" "mmx,*")
2431 (set_attr "mode" "V2DF")])
2433 (define_insn "sse2_cvtpd2pi"
2434 [(set (match_operand:V2SI 0 "register_operand" "=y")
2435 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2436 UNSPEC_FIX_NOTRUNC))]
2438 "cvtpd2pi\t{%1, %0|%0, %1}"
2439 [(set_attr "type" "ssecvt")
2440 (set_attr "unit" "mmx")
2441 (set_attr "prefix_data16" "1")
2442 (set_attr "mode" "DI")])
2444 (define_insn "sse2_cvttpd2pi"
2445 [(set (match_operand:V2SI 0 "register_operand" "=y")
2446 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2448 "cvttpd2pi\t{%1, %0|%0, %1}"
2449 [(set_attr "type" "ssecvt")
2450 (set_attr "unit" "mmx")
2451 (set_attr "prefix_data16" "1")
2452 (set_attr "mode" "TI")])
2454 (define_insn "*avx_cvtsi2sd"
2455 [(set (match_operand:V2DF 0 "register_operand" "=x")
2458 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2459 (match_operand:V2DF 1 "register_operand" "x")
2462 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2463 [(set_attr "type" "sseicvt")
2464 (set_attr "prefix" "vex")
2465 (set_attr "mode" "DF")])
2467 (define_insn "sse2_cvtsi2sd"
2468 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2471 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2472 (match_operand:V2DF 1 "register_operand" "0,0")
2475 "cvtsi2sd\t{%2, %0|%0, %2}"
2476 [(set_attr "type" "sseicvt")
2477 (set_attr "mode" "DF")
2478 (set_attr "athlon_decode" "double,direct")
2479 (set_attr "amdfam10_decode" "vector,double")])
2481 (define_insn "*avx_cvtsi2sdq"
2482 [(set (match_operand:V2DF 0 "register_operand" "=x")
2485 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2486 (match_operand:V2DF 1 "register_operand" "x")
2488 "TARGET_AVX && TARGET_64BIT"
2489 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "DF")])
2494 (define_insn "sse2_cvtsi2sdq"
2495 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2498 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2499 (match_operand:V2DF 1 "register_operand" "0,0")
2501 "TARGET_SSE2 && TARGET_64BIT"
2502 "cvtsi2sdq\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "mode" "DF")
2505 (set_attr "athlon_decode" "double,direct")
2506 (set_attr "amdfam10_decode" "vector,double")])
2508 (define_insn "sse2_cvtsd2si"
2509 [(set (match_operand:SI 0 "register_operand" "=r,r")
2512 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2513 (parallel [(const_int 0)]))]
2514 UNSPEC_FIX_NOTRUNC))]
2516 "%vcvtsd2si\t{%1, %0|%0, %1}"
2517 [(set_attr "type" "sseicvt")
2518 (set_attr "athlon_decode" "double,vector")
2519 (set_attr "prefix_rep" "1")
2520 (set_attr "prefix" "maybe_vex")
2521 (set_attr "mode" "SI")])
2523 (define_insn "sse2_cvtsd2si_2"
2524 [(set (match_operand:SI 0 "register_operand" "=r,r")
2525 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2526 UNSPEC_FIX_NOTRUNC))]
2528 "%vcvtsd2si\t{%1, %0|%0, %1}"
2529 [(set_attr "type" "sseicvt")
2530 (set_attr "athlon_decode" "double,vector")
2531 (set_attr "amdfam10_decode" "double,double")
2532 (set_attr "prefix_rep" "1")
2533 (set_attr "prefix" "maybe_vex")
2534 (set_attr "mode" "SI")])
2536 (define_insn "sse2_cvtsd2siq"
2537 [(set (match_operand:DI 0 "register_operand" "=r,r")
2540 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2541 (parallel [(const_int 0)]))]
2542 UNSPEC_FIX_NOTRUNC))]
2543 "TARGET_SSE2 && TARGET_64BIT"
2544 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2545 [(set_attr "type" "sseicvt")
2546 (set_attr "athlon_decode" "double,vector")
2547 (set_attr "prefix_rep" "1")
2548 (set_attr "prefix" "maybe_vex")
2549 (set_attr "mode" "DI")])
2551 (define_insn "sse2_cvtsd2siq_2"
2552 [(set (match_operand:DI 0 "register_operand" "=r,r")
2553 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2554 UNSPEC_FIX_NOTRUNC))]
2555 "TARGET_SSE2 && TARGET_64BIT"
2556 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2557 [(set_attr "type" "sseicvt")
2558 (set_attr "athlon_decode" "double,vector")
2559 (set_attr "amdfam10_decode" "double,double")
2560 (set_attr "prefix_rep" "1")
2561 (set_attr "prefix" "maybe_vex")
2562 (set_attr "mode" "DI")])
2564 (define_insn "sse2_cvttsd2si"
2565 [(set (match_operand:SI 0 "register_operand" "=r,r")
2568 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2569 (parallel [(const_int 0)]))))]
2571 "%vcvttsd2si\t{%1, %0|%0, %1}"
2572 [(set_attr "type" "sseicvt")
2573 (set_attr "prefix_rep" "1")
2574 (set_attr "prefix" "maybe_vex")
2575 (set_attr "mode" "SI")
2576 (set_attr "athlon_decode" "double,vector")
2577 (set_attr "amdfam10_decode" "double,double")])
2579 (define_insn "sse2_cvttsd2siq"
2580 [(set (match_operand:DI 0 "register_operand" "=r,r")
2583 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2584 (parallel [(const_int 0)]))))]
2585 "TARGET_SSE2 && TARGET_64BIT"
2586 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2587 [(set_attr "type" "sseicvt")
2588 (set_attr "prefix_rep" "1")
2589 (set_attr "prefix" "maybe_vex")
2590 (set_attr "mode" "DI")
2591 (set_attr "athlon_decode" "double,vector")
2592 (set_attr "amdfam10_decode" "double,double")])
2594 (define_insn "avx_cvtdq2pd256"
2595 [(set (match_operand:V4DF 0 "register_operand" "=x")
2596 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2598 "vcvtdq2pd\t{%1, %0|%0, %1}"
2599 [(set_attr "type" "ssecvt")
2600 (set_attr "prefix" "vex")
2601 (set_attr "mode" "V4DF")])
2603 (define_insn "sse2_cvtdq2pd"
2604 [(set (match_operand:V2DF 0 "register_operand" "=x")
2607 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2608 (parallel [(const_int 0) (const_int 1)]))))]
2610 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2611 [(set_attr "type" "ssecvt")
2612 (set_attr "prefix" "maybe_vex")
2613 (set_attr "mode" "V2DF")])
2615 (define_insn "avx_cvtpd2dq256"
2616 [(set (match_operand:V4SI 0 "register_operand" "=x")
2617 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2618 UNSPEC_FIX_NOTRUNC))]
2620 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2621 [(set_attr "type" "ssecvt")
2622 (set_attr "prefix" "vex")
2623 (set_attr "mode" "OI")])
2625 (define_expand "sse2_cvtpd2dq"
2626 [(set (match_operand:V4SI 0 "register_operand" "")
2628 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2632 "operands[2] = CONST0_RTX (V2SImode);")
2634 (define_insn "*sse2_cvtpd2dq"
2635 [(set (match_operand:V4SI 0 "register_operand" "=x")
2637 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2639 (match_operand:V2SI 2 "const0_operand" "")))]
2641 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2642 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2643 [(set_attr "type" "ssecvt")
2644 (set_attr "prefix_rep" "1")
2645 (set_attr "prefix" "maybe_vex")
2646 (set_attr "mode" "TI")
2647 (set_attr "amdfam10_decode" "double")])
2649 (define_insn "avx_cvttpd2dq256"
2650 [(set (match_operand:V4SI 0 "register_operand" "=x")
2651 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2653 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2654 [(set_attr "type" "ssecvt")
2655 (set_attr "prefix" "vex")
2656 (set_attr "mode" "OI")])
2658 (define_expand "sse2_cvttpd2dq"
2659 [(set (match_operand:V4SI 0 "register_operand" "")
2661 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2664 "operands[2] = CONST0_RTX (V2SImode);")
2666 (define_insn "*sse2_cvttpd2dq"
2667 [(set (match_operand:V4SI 0 "register_operand" "=x")
2669 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2670 (match_operand:V2SI 2 "const0_operand" "")))]
2672 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2673 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2674 [(set_attr "type" "ssecvt")
2675 (set_attr "prefix_rep" "1")
2676 (set_attr "prefix" "maybe_vex")
2677 (set_attr "mode" "TI")
2678 (set_attr "amdfam10_decode" "double")])
2680 (define_insn "*avx_cvtsd2ss"
2681 [(set (match_operand:V4SF 0 "register_operand" "=x")
2684 (float_truncate:V2SF
2685 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2686 (match_operand:V4SF 1 "register_operand" "x")
2689 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2690 [(set_attr "type" "ssecvt")
2691 (set_attr "prefix" "vex")
2692 (set_attr "mode" "SF")])
2694 (define_insn "sse2_cvtsd2ss"
2695 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2698 (float_truncate:V2SF
2699 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2700 (match_operand:V4SF 1 "register_operand" "0,0")
2703 "cvtsd2ss\t{%2, %0|%0, %2}"
2704 [(set_attr "type" "ssecvt")
2705 (set_attr "athlon_decode" "vector,double")
2706 (set_attr "amdfam10_decode" "vector,double")
2707 (set_attr "mode" "SF")])
2709 (define_insn "*avx_cvtss2sd"
2710 [(set (match_operand:V2DF 0 "register_operand" "=x")
2714 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2715 (parallel [(const_int 0) (const_int 1)])))
2716 (match_operand:V2DF 1 "register_operand" "x")
2719 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2720 [(set_attr "type" "ssecvt")
2721 (set_attr "prefix" "vex")
2722 (set_attr "mode" "DF")])
2724 (define_insn "sse2_cvtss2sd"
2725 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2729 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2730 (parallel [(const_int 0) (const_int 1)])))
2731 (match_operand:V2DF 1 "register_operand" "0,0")
2734 "cvtss2sd\t{%2, %0|%0, %2}"
2735 [(set_attr "type" "ssecvt")
2736 (set_attr "amdfam10_decode" "vector,double")
2737 (set_attr "mode" "DF")])
2739 (define_insn "avx_cvtpd2ps256"
2740 [(set (match_operand:V4SF 0 "register_operand" "=x")
2741 (float_truncate:V4SF
2742 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2744 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2745 [(set_attr "type" "ssecvt")
2746 (set_attr "prefix" "vex")
2747 (set_attr "mode" "V4SF")])
2749 (define_expand "sse2_cvtpd2ps"
2750 [(set (match_operand:V4SF 0 "register_operand" "")
2752 (float_truncate:V2SF
2753 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2756 "operands[2] = CONST0_RTX (V2SFmode);")
2758 (define_insn "*sse2_cvtpd2ps"
2759 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V2SF
2762 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2763 (match_operand:V2SF 2 "const0_operand" "")))]
2765 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2766 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2767 [(set_attr "type" "ssecvt")
2768 (set_attr "prefix_data16" "1")
2769 (set_attr "prefix" "maybe_vex")
2770 (set_attr "mode" "V4SF")
2771 (set_attr "amdfam10_decode" "double")])
2773 (define_insn "avx_cvtps2pd256"
2774 [(set (match_operand:V4DF 0 "register_operand" "=x")
2776 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2778 "vcvtps2pd\t{%1, %0|%0, %1}"
2779 [(set_attr "type" "ssecvt")
2780 (set_attr "prefix" "vex")
2781 (set_attr "mode" "V4DF")])
2783 (define_insn "sse2_cvtps2pd"
2784 [(set (match_operand:V2DF 0 "register_operand" "=x")
2787 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2788 (parallel [(const_int 0) (const_int 1)]))))]
2790 "%vcvtps2pd\t{%1, %0|%0, %1}"
2791 [(set_attr "type" "ssecvt")
2792 (set_attr "prefix" "maybe_vex")
2793 (set_attr "mode" "V2DF")
2794 (set_attr "amdfam10_decode" "direct")])
2796 (define_expand "vec_unpacks_hi_v4sf"
2801 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2802 (parallel [(const_int 6)
2806 (set (match_operand:V2DF 0 "register_operand" "")
2810 (parallel [(const_int 0) (const_int 1)]))))]
2813 operands[2] = gen_reg_rtx (V4SFmode);
2816 (define_expand "vec_unpacks_lo_v4sf"
2817 [(set (match_operand:V2DF 0 "register_operand" "")
2820 (match_operand:V4SF 1 "nonimmediate_operand" "")
2821 (parallel [(const_int 0) (const_int 1)]))))]
2824 (define_expand "vec_unpacks_float_hi_v8hi"
2825 [(match_operand:V4SF 0 "register_operand" "")
2826 (match_operand:V8HI 1 "register_operand" "")]
2829 rtx tmp = gen_reg_rtx (V4SImode);
2831 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2832 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2836 (define_expand "vec_unpacks_float_lo_v8hi"
2837 [(match_operand:V4SF 0 "register_operand" "")
2838 (match_operand:V8HI 1 "register_operand" "")]
2841 rtx tmp = gen_reg_rtx (V4SImode);
2843 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2844 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2848 (define_expand "vec_unpacku_float_hi_v8hi"
2849 [(match_operand:V4SF 0 "register_operand" "")
2850 (match_operand:V8HI 1 "register_operand" "")]
2853 rtx tmp = gen_reg_rtx (V4SImode);
2855 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2856 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2860 (define_expand "vec_unpacku_float_lo_v8hi"
2861 [(match_operand:V4SF 0 "register_operand" "")
2862 (match_operand:V8HI 1 "register_operand" "")]
2865 rtx tmp = gen_reg_rtx (V4SImode);
2867 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2868 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2872 (define_expand "vec_unpacks_float_hi_v4si"
2875 (match_operand:V4SI 1 "nonimmediate_operand" "")
2876 (parallel [(const_int 2)
2880 (set (match_operand:V2DF 0 "register_operand" "")
2884 (parallel [(const_int 0) (const_int 1)]))))]
2887 operands[2] = gen_reg_rtx (V4SImode);
2890 (define_expand "vec_unpacks_float_lo_v4si"
2891 [(set (match_operand:V2DF 0 "register_operand" "")
2894 (match_operand:V4SI 1 "nonimmediate_operand" "")
2895 (parallel [(const_int 0) (const_int 1)]))))]
2898 (define_expand "vec_pack_trunc_v2df"
2899 [(match_operand:V4SF 0 "register_operand" "")
2900 (match_operand:V2DF 1 "nonimmediate_operand" "")
2901 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2906 r1 = gen_reg_rtx (V4SFmode);
2907 r2 = gen_reg_rtx (V4SFmode);
2909 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2910 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2911 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2915 (define_expand "vec_pack_sfix_trunc_v2df"
2916 [(match_operand:V4SI 0 "register_operand" "")
2917 (match_operand:V2DF 1 "nonimmediate_operand" "")
2918 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2923 r1 = gen_reg_rtx (V4SImode);
2924 r2 = gen_reg_rtx (V4SImode);
2926 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2927 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2928 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2929 gen_lowpart (V2DImode, r1),
2930 gen_lowpart (V2DImode, r2)));
2934 (define_expand "vec_pack_sfix_v2df"
2935 [(match_operand:V4SI 0 "register_operand" "")
2936 (match_operand:V2DF 1 "nonimmediate_operand" "")
2937 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2942 r1 = gen_reg_rtx (V4SImode);
2943 r2 = gen_reg_rtx (V4SImode);
2945 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2946 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2947 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2948 gen_lowpart (V2DImode, r1),
2949 gen_lowpart (V2DImode, r2)));
2953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2955 ;; Parallel single-precision floating point element swizzling
2957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2959 (define_expand "sse_movhlps_exp"
2960 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2963 (match_operand:V4SF 1 "nonimmediate_operand" "")
2964 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2965 (parallel [(const_int 6)
2970 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2972 (define_insn "*avx_movhlps"
2973 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2976 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
2977 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2978 (parallel [(const_int 6)
2982 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2984 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2985 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2986 vmovhps\t{%2, %0|%0, %2}"
2987 [(set_attr "type" "ssemov")
2988 (set_attr "prefix" "vex")
2989 (set_attr "mode" "V4SF,V2SF,V2SF")])
2991 (define_insn "sse_movhlps"
2992 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2995 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2996 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2997 (parallel [(const_int 6)
3001 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3003 movhlps\t{%2, %0|%0, %2}
3004 movlps\t{%H2, %0|%0, %H2}
3005 movhps\t{%2, %0|%0, %2}"
3006 [(set_attr "type" "ssemov")
3007 (set_attr "mode" "V4SF,V2SF,V2SF")])
3009 (define_expand "sse_movlhps_exp"
3010 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3013 (match_operand:V4SF 1 "nonimmediate_operand" "")
3014 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3015 (parallel [(const_int 0)
3020 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3022 (define_insn "*avx_movlhps"
3023 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3026 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3027 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3028 (parallel [(const_int 0)
3032 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3034 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3035 vmovhps\t{%2, %1, %0|%0, %1, %2}
3036 vmovlps\t{%2, %H0|%H0, %2}"
3037 [(set_attr "type" "ssemov")
3038 (set_attr "prefix" "vex")
3039 (set_attr "mode" "V4SF,V2SF,V2SF")])
3041 (define_insn "sse_movlhps"
3042 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3045 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3046 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3047 (parallel [(const_int 0)
3051 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3053 movlhps\t{%2, %0|%0, %2}
3054 movhps\t{%2, %0|%0, %2}
3055 movlps\t{%2, %H0|%H0, %2}"
3056 [(set_attr "type" "ssemov")
3057 (set_attr "mode" "V4SF,V2SF,V2SF")])
3059 (define_insn "avx_unpckhps256"
3060 [(set (match_operand:V8SF 0 "register_operand" "=x")
3063 (match_operand:V8SF 1 "register_operand" "x")
3064 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3065 (parallel [(const_int 2) (const_int 10)
3066 (const_int 3) (const_int 11)
3067 (const_int 6) (const_int 14)
3068 (const_int 7) (const_int 15)])))]
3070 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3071 [(set_attr "type" "sselog")
3072 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V8SF")])
3075 (define_insn "*avx_unpckhps"
3076 [(set (match_operand:V4SF 0 "register_operand" "=x")
3079 (match_operand:V4SF 1 "register_operand" "x")
3080 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3081 (parallel [(const_int 2) (const_int 6)
3082 (const_int 3) (const_int 7)])))]
3084 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3085 [(set_attr "type" "sselog")
3086 (set_attr "prefix" "vex")
3087 (set_attr "mode" "V4SF")])
3089 (define_insn "sse_unpckhps"
3090 [(set (match_operand:V4SF 0 "register_operand" "=x")
3093 (match_operand:V4SF 1 "register_operand" "0")
3094 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3095 (parallel [(const_int 2) (const_int 6)
3096 (const_int 3) (const_int 7)])))]
3098 "unpckhps\t{%2, %0|%0, %2}"
3099 [(set_attr "type" "sselog")
3100 (set_attr "mode" "V4SF")])
3102 (define_insn "avx_unpcklps256"
3103 [(set (match_operand:V8SF 0 "register_operand" "=x")
3106 (match_operand:V8SF 1 "register_operand" "x")
3107 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3108 (parallel [(const_int 0) (const_int 8)
3109 (const_int 1) (const_int 9)
3110 (const_int 4) (const_int 12)
3111 (const_int 5) (const_int 13)])))]
3113 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3114 [(set_attr "type" "sselog")
3115 (set_attr "prefix" "vex")
3116 (set_attr "mode" "V8SF")])
3118 (define_insn "*avx_unpcklps"
3119 [(set (match_operand:V4SF 0 "register_operand" "=x")
3122 (match_operand:V4SF 1 "register_operand" "x")
3123 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3124 (parallel [(const_int 0) (const_int 4)
3125 (const_int 1) (const_int 5)])))]
3127 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3128 [(set_attr "type" "sselog")
3129 (set_attr "prefix" "vex")
3130 (set_attr "mode" "V4SF")])
3132 (define_insn "sse_unpcklps"
3133 [(set (match_operand:V4SF 0 "register_operand" "=x")
3136 (match_operand:V4SF 1 "register_operand" "0")
3137 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3138 (parallel [(const_int 0) (const_int 4)
3139 (const_int 1) (const_int 5)])))]
3141 "unpcklps\t{%2, %0|%0, %2}"
3142 [(set_attr "type" "sselog")
3143 (set_attr "mode" "V4SF")])
3145 ;; These are modeled with the same vec_concat as the others so that we
3146 ;; capture users of shufps that can use the new instructions
3147 (define_insn "avx_movshdup256"
3148 [(set (match_operand:V8SF 0 "register_operand" "=x")
3151 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3153 (parallel [(const_int 1) (const_int 1)
3154 (const_int 3) (const_int 3)
3155 (const_int 5) (const_int 5)
3156 (const_int 7) (const_int 7)])))]
3158 "vmovshdup\t{%1, %0|%0, %1}"
3159 [(set_attr "type" "sse")
3160 (set_attr "prefix" "vex")
3161 (set_attr "mode" "V8SF")])
3163 (define_insn "sse3_movshdup"
3164 [(set (match_operand:V4SF 0 "register_operand" "=x")
3167 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3169 (parallel [(const_int 1)
3174 "%vmovshdup\t{%1, %0|%0, %1}"
3175 [(set_attr "type" "sse")
3176 (set_attr "prefix_rep" "1")
3177 (set_attr "prefix" "maybe_vex")
3178 (set_attr "mode" "V4SF")])
3180 (define_insn "avx_movsldup256"
3181 [(set (match_operand:V8SF 0 "register_operand" "=x")
3184 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3186 (parallel [(const_int 0) (const_int 0)
3187 (const_int 2) (const_int 2)
3188 (const_int 4) (const_int 4)
3189 (const_int 6) (const_int 6)])))]
3191 "vmovsldup\t{%1, %0|%0, %1}"
3192 [(set_attr "type" "sse")
3193 (set_attr "prefix" "vex")
3194 (set_attr "mode" "V8SF")])
3196 (define_insn "sse3_movsldup"
3197 [(set (match_operand:V4SF 0 "register_operand" "=x")
3200 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3202 (parallel [(const_int 0)
3207 "%vmovsldup\t{%1, %0|%0, %1}"
3208 [(set_attr "type" "sse")
3209 (set_attr "prefix_rep" "1")
3210 (set_attr "prefix" "maybe_vex")
3211 (set_attr "mode" "V4SF")])
3213 (define_expand "avx_shufps256"
3214 [(match_operand:V8SF 0 "register_operand" "")
3215 (match_operand:V8SF 1 "register_operand" "")
3216 (match_operand:V8SF 2 "nonimmediate_operand" "")
3217 (match_operand:SI 3 "const_int_operand" "")]
3220 int mask = INTVAL (operands[3]);
3221 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3222 GEN_INT ((mask >> 0) & 3),
3223 GEN_INT ((mask >> 2) & 3),
3224 GEN_INT (((mask >> 4) & 3) + 8),
3225 GEN_INT (((mask >> 6) & 3) + 8),
3226 GEN_INT (((mask >> 0) & 3) + 4),
3227 GEN_INT (((mask >> 2) & 3) + 4),
3228 GEN_INT (((mask >> 4) & 3) + 12),
3229 GEN_INT (((mask >> 6) & 3) + 12)));
3233 ;; One bit in mask selects 2 elements.
3234 (define_insn "avx_shufps256_1"
3235 [(set (match_operand:V8SF 0 "register_operand" "=x")
3238 (match_operand:V8SF 1 "register_operand" "x")
3239 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3240 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3241 (match_operand 4 "const_0_to_3_operand" "")
3242 (match_operand 5 "const_8_to_11_operand" "")
3243 (match_operand 6 "const_8_to_11_operand" "")
3244 (match_operand 7 "const_4_to_7_operand" "")
3245 (match_operand 8 "const_4_to_7_operand" "")
3246 (match_operand 9 "const_12_to_15_operand" "")
3247 (match_operand 10 "const_12_to_15_operand" "")])))]
3249 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3250 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3251 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3252 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3255 mask = INTVAL (operands[3]);
3256 mask |= INTVAL (operands[4]) << 2;
3257 mask |= (INTVAL (operands[5]) - 8) << 4;
3258 mask |= (INTVAL (operands[6]) - 8) << 6;
3259 operands[3] = GEN_INT (mask);
3261 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3263 [(set_attr "type" "sselog")
3264 (set_attr "prefix" "vex")
3265 (set_attr "mode" "V8SF")])
3267 (define_expand "sse_shufps"
3268 [(match_operand:V4SF 0 "register_operand" "")
3269 (match_operand:V4SF 1 "register_operand" "")
3270 (match_operand:V4SF 2 "nonimmediate_operand" "")
3271 (match_operand:SI 3 "const_int_operand" "")]
3274 int mask = INTVAL (operands[3]);
3275 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3276 GEN_INT ((mask >> 0) & 3),
3277 GEN_INT ((mask >> 2) & 3),
3278 GEN_INT (((mask >> 4) & 3) + 4),
3279 GEN_INT (((mask >> 6) & 3) + 4)));
3283 (define_insn "*avx_shufps_<mode>"
3284 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3285 (vec_select:SSEMODE4S
3286 (vec_concat:<ssedoublesizemode>
3287 (match_operand:SSEMODE4S 1 "register_operand" "x")
3288 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3289 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3290 (match_operand 4 "const_0_to_3_operand" "")
3291 (match_operand 5 "const_4_to_7_operand" "")
3292 (match_operand 6 "const_4_to_7_operand" "")])))]
3296 mask |= INTVAL (operands[3]) << 0;
3297 mask |= INTVAL (operands[4]) << 2;
3298 mask |= (INTVAL (operands[5]) - 4) << 4;
3299 mask |= (INTVAL (operands[6]) - 4) << 6;
3300 operands[3] = GEN_INT (mask);
3302 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3304 [(set_attr "type" "sselog")
3305 (set_attr "prefix" "vex")
3306 (set_attr "mode" "V4SF")])
3308 (define_insn "sse_shufps_<mode>"
3309 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3310 (vec_select:SSEMODE4S
3311 (vec_concat:<ssedoublesizemode>
3312 (match_operand:SSEMODE4S 1 "register_operand" "0")
3313 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3314 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3315 (match_operand 4 "const_0_to_3_operand" "")
3316 (match_operand 5 "const_4_to_7_operand" "")
3317 (match_operand 6 "const_4_to_7_operand" "")])))]
3321 mask |= INTVAL (operands[3]) << 0;
3322 mask |= INTVAL (operands[4]) << 2;
3323 mask |= (INTVAL (operands[5]) - 4) << 4;
3324 mask |= (INTVAL (operands[6]) - 4) << 6;
3325 operands[3] = GEN_INT (mask);
3327 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3329 [(set_attr "type" "sselog")
3330 (set_attr "mode" "V4SF")])
3332 (define_insn "sse_storehps"
3333 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3335 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3336 (parallel [(const_int 2) (const_int 3)])))]
3339 %vmovhps\t{%1, %0|%0, %1}
3340 %vmovhlps\t{%1, %d0|%d0, %1}
3341 %vmovlps\t{%H1, %d0|%d0, %H1}"
3342 [(set_attr "type" "ssemov")
3343 (set_attr "prefix" "maybe_vex")
3344 (set_attr "mode" "V2SF,V4SF,V2SF")])
3346 (define_expand "sse_loadhps_exp"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3350 (match_operand:V4SF 1 "nonimmediate_operand" "")
3351 (parallel [(const_int 0) (const_int 1)]))
3352 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3354 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3356 (define_insn "*avx_loadhps"
3357 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3360 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3361 (parallel [(const_int 0) (const_int 1)]))
3362 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3365 vmovhps\t{%2, %1, %0|%0, %1, %2}
3366 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3367 vmovlps\t{%2, %H0|%H0, %2}"
3368 [(set_attr "type" "ssemov")
3369 (set_attr "prefix" "vex")
3370 (set_attr "mode" "V2SF,V4SF,V2SF")])
3372 (define_insn "sse_loadhps"
3373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3376 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3377 (parallel [(const_int 0) (const_int 1)]))
3378 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3381 movhps\t{%2, %0|%0, %2}
3382 movlhps\t{%2, %0|%0, %2}
3383 movlps\t{%2, %H0|%H0, %2}"
3384 [(set_attr "type" "ssemov")
3385 (set_attr "mode" "V2SF,V4SF,V2SF")])
3387 (define_insn "*avx_storelps"
3388 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3390 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3391 (parallel [(const_int 0) (const_int 1)])))]
3394 vmovlps\t{%1, %0|%0, %1}
3395 vmovaps\t{%1, %0|%0, %1}
3396 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3397 [(set_attr "type" "ssemov")
3398 (set_attr "prefix" "vex")
3399 (set_attr "mode" "V2SF,V2DF,V2SF")])
3401 (define_insn "sse_storelps"
3402 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3404 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3405 (parallel [(const_int 0) (const_int 1)])))]
3408 movlps\t{%1, %0|%0, %1}
3409 movaps\t{%1, %0|%0, %1}
3410 movlps\t{%1, %0|%0, %1}"
3411 [(set_attr "type" "ssemov")
3412 (set_attr "mode" "V2SF,V4SF,V2SF")])
3414 (define_expand "sse_loadlps_exp"
3415 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3417 (match_operand:V2SF 2 "nonimmediate_operand" "")
3419 (match_operand:V4SF 1 "nonimmediate_operand" "")
3420 (parallel [(const_int 2) (const_int 3)]))))]
3422 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3424 (define_insn "*avx_loadlps"
3425 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3427 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3429 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3430 (parallel [(const_int 2) (const_int 3)]))))]
3433 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3434 vmovlps\t{%2, %1, %0|%0, %1, %2}
3435 vmovlps\t{%2, %0|%0, %2}"
3436 [(set_attr "type" "sselog,ssemov,ssemov")
3437 (set_attr "prefix" "vex")
3438 (set_attr "mode" "V4SF,V2SF,V2SF")])
3440 (define_insn "sse_loadlps"
3441 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3443 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3445 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3446 (parallel [(const_int 2) (const_int 3)]))))]
3449 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3450 movlps\t{%2, %0|%0, %2}
3451 movlps\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sselog,ssemov,ssemov")
3453 (set_attr "mode" "V4SF,V2SF,V2SF")])
3455 (define_insn "*avx_movss"
3456 [(set (match_operand:V4SF 0 "register_operand" "=x")
3458 (match_operand:V4SF 2 "register_operand" "x")
3459 (match_operand:V4SF 1 "register_operand" "x")
3462 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3463 [(set_attr "type" "ssemov")
3464 (set_attr "prefix" "vex")
3465 (set_attr "mode" "SF")])
3467 (define_insn "sse_movss"
3468 [(set (match_operand:V4SF 0 "register_operand" "=x")
3470 (match_operand:V4SF 2 "register_operand" "x")
3471 (match_operand:V4SF 1 "register_operand" "0")
3474 "movss\t{%2, %0|%0, %2}"
3475 [(set_attr "type" "ssemov")
3476 (set_attr "mode" "SF")])
3478 (define_insn "*vec_dupv4sf_avx"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x")
3481 (match_operand:SF 1 "register_operand" "x")))]
3483 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3484 [(set_attr "type" "sselog1")
3485 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")])
3488 (define_insn "*vec_dupv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x")
3491 (match_operand:SF 1 "register_operand" "0")))]
3493 "shufps\t{$0, %0, %0|%0, %0, 0}"
3494 [(set_attr "type" "sselog1")
3495 (set_attr "mode" "V4SF")])
3497 (define_insn "*vec_concatv2sf_avx"
3498 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3500 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3501 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3504 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3505 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3506 vmovss\t{%1, %0|%0, %1}
3507 punpckldq\t{%2, %0|%0, %2}
3508 movd\t{%1, %0|%0, %1}"
3509 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3510 (set (attr "prefix")
3511 (if_then_else (eq_attr "alternative" "3,4")
3512 (const_string "orig")
3513 (const_string "vex")))
3514 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3516 ;; Although insertps takes register source, we prefer
3517 ;; unpcklps with register source since it is shorter.
3518 (define_insn "*vec_concatv2sf_sse4_1"
3519 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3521 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3522 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3525 unpcklps\t{%2, %0|%0, %2}
3526 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3527 movss\t{%1, %0|%0, %1}
3528 punpckldq\t{%2, %0|%0, %2}
3529 movd\t{%1, %0|%0, %1}"
3530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3531 (set_attr "prefix_extra" "*,1,*,*,*")
3532 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3534 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3535 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3536 ;; alternatives pretty much forces the MMX alternative to be chosen.
3537 (define_insn "*vec_concatv2sf_sse"
3538 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3540 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3541 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3544 unpcklps\t{%2, %0|%0, %2}
3545 movss\t{%1, %0|%0, %1}
3546 punpckldq\t{%2, %0|%0, %2}
3547 movd\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3549 (set_attr "mode" "V4SF,SF,DI,DI")])
3551 (define_insn "*vec_concatv4sf_avx"
3552 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3554 (match_operand:V2SF 1 "register_operand" " x,x")
3555 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3558 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3559 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3560 [(set_attr "type" "ssemov")
3561 (set_attr "prefix" "vex")
3562 (set_attr "mode" "V4SF,V2SF")])
3564 (define_insn "*vec_concatv4sf_sse"
3565 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3567 (match_operand:V2SF 1 "register_operand" " 0,0")
3568 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3571 movlhps\t{%2, %0|%0, %2}
3572 movhps\t{%2, %0|%0, %2}"
3573 [(set_attr "type" "ssemov")
3574 (set_attr "mode" "V4SF,V2SF")])
3576 (define_expand "vec_init<mode>"
3577 [(match_operand:SSEMODE 0 "register_operand" "")
3578 (match_operand 1 "" "")]
3581 ix86_expand_vector_init (false, operands[0], operands[1]);
3585 (define_insn "*vec_setv4sf_0_avx"
3586 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3589 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3590 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3594 vmovss\t{%2, %1, %0|%0, %1, %2}
3595 vmovss\t{%2, %0|%0, %2}
3596 vmovd\t{%2, %0|%0, %2}
3598 [(set_attr "type" "ssemov")
3599 (set_attr "prefix" "vex")
3600 (set_attr "mode" "SF")])
3602 (define_insn "vec_setv4sf_0"
3603 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3606 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3607 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3611 movss\t{%2, %0|%0, %2}
3612 movss\t{%2, %0|%0, %2}
3613 movd\t{%2, %0|%0, %2}
3615 [(set_attr "type" "ssemov")
3616 (set_attr "mode" "SF")])
3618 ;; A subset is vec_setv4sf.
3619 (define_insn "*vec_setv4sf_avx"
3620 [(set (match_operand:V4SF 0 "register_operand" "=x")
3623 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3624 (match_operand:V4SF 1 "register_operand" "x")
3625 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3628 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3629 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3631 [(set_attr "type" "sselog")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V4SF")])
3635 (define_insn "*vec_setv4sf_sse4_1"
3636 [(set (match_operand:V4SF 0 "register_operand" "=x")
3639 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3640 (match_operand:V4SF 1 "register_operand" "0")
3641 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3644 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3645 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3647 [(set_attr "type" "sselog")
3648 (set_attr "prefix_extra" "1")
3649 (set_attr "mode" "V4SF")])
3651 (define_insn "*avx_insertps"
3652 [(set (match_operand:V4SF 0 "register_operand" "=x")
3653 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3654 (match_operand:V4SF 1 "register_operand" "x")
3655 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3658 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3659 [(set_attr "type" "sselog")
3660 (set_attr "prefix" "vex")
3661 (set_attr "mode" "V4SF")])
3663 (define_insn "sse4_1_insertps"
3664 [(set (match_operand:V4SF 0 "register_operand" "=x")
3665 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3666 (match_operand:V4SF 1 "register_operand" "0")
3667 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3670 "insertps\t{%3, %2, %0|%0, %2, %3}";
3671 [(set_attr "type" "sselog")
3672 (set_attr "prefix_extra" "1")
3673 (set_attr "mode" "V4SF")])
3676 [(set (match_operand:V4SF 0 "memory_operand" "")
3679 (match_operand:SF 1 "nonmemory_operand" ""))
3682 "TARGET_SSE && reload_completed"
3685 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3689 (define_expand "vec_set<mode>"
3690 [(match_operand:SSEMODE 0 "register_operand" "")
3691 (match_operand:<ssescalarmode> 1 "register_operand" "")
3692 (match_operand 2 "const_int_operand" "")]
3695 ix86_expand_vector_set (false, operands[0], operands[1],
3696 INTVAL (operands[2]));
3700 (define_insn_and_split "*vec_extractv4sf_0"
3701 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3703 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3704 (parallel [(const_int 0)])))]
3705 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3707 "&& reload_completed"
3710 rtx op1 = operands[1];
3712 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3714 op1 = gen_lowpart (SFmode, op1);
3715 emit_move_insn (operands[0], op1);
3719 (define_expand "avx_vextractf128<mode>"
3720 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3721 (match_operand:AVX256MODE 1 "register_operand" "")
3722 (match_operand:SI 2 "const_0_to_1_operand" "")]
3725 switch (INTVAL (operands[2]))
3728 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3731 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3739 (define_insn "vec_extract_lo_<mode>"
3740 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3741 (vec_select:<avxhalfvecmode>
3742 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3743 (parallel [(const_int 0) (const_int 1)])))]
3745 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3746 [(set_attr "type" "sselog")
3747 (set_attr "memory" "none,store")
3748 (set_attr "prefix" "vex")
3749 (set_attr "mode" "V8SF")])
3751 (define_insn "vec_extract_hi_<mode>"
3752 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3753 (vec_select:<avxhalfvecmode>
3754 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3755 (parallel [(const_int 2) (const_int 3)])))]
3757 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3758 [(set_attr "type" "sselog")
3759 (set_attr "memory" "none,store")
3760 (set_attr "prefix" "vex")
3761 (set_attr "mode" "V8SF")])
3763 (define_insn "vec_extract_lo_<mode>"
3764 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3765 (vec_select:<avxhalfvecmode>
3766 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3767 (parallel [(const_int 0) (const_int 1)
3768 (const_int 2) (const_int 3)])))]
3770 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3771 [(set_attr "type" "sselog")
3772 (set_attr "memory" "none,store")
3773 (set_attr "prefix" "vex")
3774 (set_attr "mode" "V8SF")])
3776 (define_insn "vec_extract_hi_<mode>"
3777 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3778 (vec_select:<avxhalfvecmode>
3779 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3780 (parallel [(const_int 4) (const_int 5)
3781 (const_int 6) (const_int 7)])))]
3783 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3784 [(set_attr "type" "sselog")
3785 (set_attr "memory" "none,store")
3786 (set_attr "prefix" "vex")
3787 (set_attr "mode" "V8SF")])
3789 (define_insn "vec_extract_lo_v16hi"
3790 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3792 (match_operand:V16HI 1 "register_operand" "x,x")
3793 (parallel [(const_int 0) (const_int 1)
3794 (const_int 2) (const_int 3)
3795 (const_int 4) (const_int 5)
3796 (const_int 6) (const_int 7)])))]
3798 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3799 [(set_attr "type" "sselog")
3800 (set_attr "memory" "none,store")
3801 (set_attr "prefix" "vex")
3802 (set_attr "mode" "V8SF")])
3804 (define_insn "vec_extract_hi_v16hi"
3805 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3807 (match_operand:V16HI 1 "register_operand" "x,x")
3808 (parallel [(const_int 8) (const_int 9)
3809 (const_int 10) (const_int 11)
3810 (const_int 12) (const_int 13)
3811 (const_int 14) (const_int 15)])))]
3813 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3814 [(set_attr "type" "sselog")
3815 (set_attr "memory" "none,store")
3816 (set_attr "prefix" "vex")
3817 (set_attr "mode" "V8SF")])
3819 (define_insn "vec_extract_lo_v32qi"
3820 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3822 (match_operand:V32QI 1 "register_operand" "x,x")
3823 (parallel [(const_int 0) (const_int 1)
3824 (const_int 2) (const_int 3)
3825 (const_int 4) (const_int 5)
3826 (const_int 6) (const_int 7)
3827 (const_int 8) (const_int 9)
3828 (const_int 10) (const_int 11)
3829 (const_int 12) (const_int 13)
3830 (const_int 14) (const_int 15)])))]
3832 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3833 [(set_attr "type" "sselog")
3834 (set_attr "memory" "none,store")
3835 (set_attr "prefix" "vex")
3836 (set_attr "mode" "V8SF")])
3838 (define_insn "vec_extract_hi_v32qi"
3839 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3841 (match_operand:V32QI 1 "register_operand" "x,x")
3842 (parallel [(const_int 16) (const_int 17)
3843 (const_int 18) (const_int 19)
3844 (const_int 20) (const_int 21)
3845 (const_int 22) (const_int 23)
3846 (const_int 24) (const_int 25)
3847 (const_int 26) (const_int 27)
3848 (const_int 28) (const_int 29)
3849 (const_int 30) (const_int 31)])))]
3851 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3852 [(set_attr "type" "sselog")
3853 (set_attr "memory" "none,store")
3854 (set_attr "prefix" "vex")
3855 (set_attr "mode" "V8SF")])
3857 (define_insn "*sse4_1_extractps"
3858 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3860 (match_operand:V4SF 1 "register_operand" "x")
3861 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3863 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3864 [(set_attr "type" "sselog")
3865 (set_attr "prefix_extra" "1")
3866 (set_attr "prefix" "maybe_vex")
3867 (set_attr "mode" "V4SF")])
3869 (define_insn_and_split "*vec_extract_v4sf_mem"
3870 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3872 (match_operand:V4SF 1 "memory_operand" "o")
3873 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3879 int i = INTVAL (operands[2]);
3881 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3885 (define_expand "vec_extract<mode>"
3886 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3887 (match_operand:SSEMODE 1 "register_operand" "")
3888 (match_operand 2 "const_int_operand" "")]
3891 ix86_expand_vector_extract (false, operands[0], operands[1],
3892 INTVAL (operands[2]));
3896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3898 ;; Parallel double-precision floating point element swizzling
3900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3902 (define_insn "avx_unpckhpd256"
3903 [(set (match_operand:V4DF 0 "register_operand" "=x")
3906 (match_operand:V4DF 1 "register_operand" "x")
3907 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3908 (parallel [(const_int 1) (const_int 5)
3909 (const_int 3) (const_int 7)])))]
3911 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3912 [(set_attr "type" "sselog")
3913 (set_attr "prefix" "vex")
3914 (set_attr "mode" "V4DF")])
3916 (define_expand "sse2_unpckhpd_exp"
3917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
3920 (match_operand:V2DF 1 "nonimmediate_operand" "")
3921 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3922 (parallel [(const_int 1)
3925 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
3927 (define_insn "*avx_unpckhpd"
3928 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3931 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
3932 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
3933 (parallel [(const_int 1)
3935 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3937 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3938 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3939 vmovhpd\t{%1, %0|%0, %1}"
3940 [(set_attr "type" "sselog,ssemov,ssemov")
3941 (set_attr "prefix" "vex")
3942 (set_attr "mode" "V2DF,V1DF,V1DF")])
3944 (define_insn "sse2_unpckhpd"
3945 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3948 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3949 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3950 (parallel [(const_int 1)
3952 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3954 unpckhpd\t{%2, %0|%0, %2}
3955 movlpd\t{%H1, %0|%0, %H1}
3956 movhpd\t{%1, %0|%0, %1}"
3957 [(set_attr "type" "sselog,ssemov,ssemov")
3958 (set_attr "mode" "V2DF,V1DF,V1DF")])
3960 (define_insn "avx_movddup256"
3961 [(set (match_operand:V4DF 0 "register_operand" "=x")
3964 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
3966 (parallel [(const_int 0) (const_int 2)
3967 (const_int 4) (const_int 6)])))]
3969 "vmovddup\t{%1, %0|%0, %1}"
3970 [(set_attr "type" "sselog1")
3971 (set_attr "prefix" "vex")
3972 (set_attr "mode" "V4DF")])
3974 (define_insn "*avx_movddup"
3975 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3978 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3980 (parallel [(const_int 0)
3982 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3984 vmovddup\t{%1, %0|%0, %1}
3986 [(set_attr "type" "sselog1,ssemov")
3987 (set_attr "prefix" "vex")
3988 (set_attr "mode" "V2DF")])
3990 (define_insn "*sse3_movddup"
3991 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3994 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3996 (parallel [(const_int 0)
3998 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4000 movddup\t{%1, %0|%0, %1}
4002 [(set_attr "type" "sselog1,ssemov")
4003 (set_attr "mode" "V2DF")])
4006 [(set (match_operand:V2DF 0 "memory_operand" "")
4009 (match_operand:V2DF 1 "register_operand" "")
4011 (parallel [(const_int 0)
4013 "TARGET_SSE3 && reload_completed"
4016 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4017 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4018 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4022 (define_insn "avx_unpcklpd256"
4023 [(set (match_operand:V4DF 0 "register_operand" "=x")
4026 (match_operand:V4DF 1 "register_operand" "x")
4027 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4028 (parallel [(const_int 0) (const_int 4)
4029 (const_int 2) (const_int 6)])))]
4031 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4032 [(set_attr "type" "sselog")
4033 (set_attr "prefix" "vex")
4034 (set_attr "mode" "V4DF")])
4036 (define_expand "sse2_unpcklpd_exp"
4037 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4040 (match_operand:V2DF 1 "nonimmediate_operand" "")
4041 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4042 (parallel [(const_int 0)
4045 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4047 (define_insn "*avx_unpcklpd"
4048 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4051 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4052 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4053 (parallel [(const_int 0)
4055 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4057 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4058 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4059 vmovlpd\t{%2, %H0|%H0, %2}"
4060 [(set_attr "type" "sselog,ssemov,ssemov")
4061 (set_attr "prefix" "vex")
4062 (set_attr "mode" "V2DF,V1DF,V1DF")])
4064 (define_insn "sse2_unpcklpd"
4065 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4068 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4069 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4070 (parallel [(const_int 0)
4072 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4074 unpcklpd\t{%2, %0|%0, %2}
4075 movhpd\t{%2, %0|%0, %2}
4076 movlpd\t{%2, %H0|%H0, %2}"
4077 [(set_attr "type" "sselog,ssemov,ssemov")
4078 (set_attr "mode" "V2DF,V1DF,V1DF")])
4080 (define_expand "avx_shufpd256"
4081 [(match_operand:V4DF 0 "register_operand" "")
4082 (match_operand:V4DF 1 "register_operand" "")
4083 (match_operand:V4DF 2 "nonimmediate_operand" "")
4084 (match_operand:SI 3 "const_int_operand" "")]
4087 int mask = INTVAL (operands[3]);
4088 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4090 GEN_INT (mask & 2 ? 5 : 4),
4091 GEN_INT (mask & 4 ? 3 : 2),
4092 GEN_INT (mask & 8 ? 7 : 6)));
4096 (define_insn "avx_shufpd256_1"
4097 [(set (match_operand:V4DF 0 "register_operand" "=x")
4100 (match_operand:V4DF 1 "register_operand" "x")
4101 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4102 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4103 (match_operand 4 "const_4_to_5_operand" "")
4104 (match_operand 5 "const_2_to_3_operand" "")
4105 (match_operand 6 "const_6_to_7_operand" "")])))]
4109 mask = INTVAL (operands[3]);
4110 mask |= (INTVAL (operands[4]) - 4) << 1;
4111 mask |= (INTVAL (operands[5]) - 2) << 2;
4112 mask |= (INTVAL (operands[6]) - 6) << 3;
4113 operands[3] = GEN_INT (mask);
4115 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4117 [(set_attr "type" "sselog")
4118 (set_attr "prefix" "vex")
4119 (set_attr "mode" "V4DF")])
4121 (define_expand "sse2_shufpd"
4122 [(match_operand:V2DF 0 "register_operand" "")
4123 (match_operand:V2DF 1 "register_operand" "")
4124 (match_operand:V2DF 2 "nonimmediate_operand" "")
4125 (match_operand:SI 3 "const_int_operand" "")]
4128 int mask = INTVAL (operands[3]);
4129 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4131 GEN_INT (mask & 2 ? 3 : 2)));
4135 (define_expand "vec_extract_even<mode>"
4136 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4137 (vec_select:SSEMODE4S
4138 (vec_concat:<ssedoublesizemode>
4139 (match_operand:SSEMODE4S 1 "register_operand" "")
4140 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4141 (parallel [(const_int 0)
4147 (define_expand "vec_extract_odd<mode>"
4148 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4149 (vec_select:SSEMODE4S
4150 (vec_concat:<ssedoublesizemode>
4151 (match_operand:SSEMODE4S 1 "register_operand" "")
4152 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4153 (parallel [(const_int 1)
4159 (define_expand "vec_extract_even<mode>"
4160 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4161 (vec_select:SSEMODE2D
4162 (vec_concat:<ssedoublesizemode>
4163 (match_operand:SSEMODE2D 1 "register_operand" "")
4164 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4165 (parallel [(const_int 0)
4169 (define_expand "vec_extract_odd<mode>"
4170 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4171 (vec_select:SSEMODE2D
4172 (vec_concat:<ssedoublesizemode>
4173 (match_operand:SSEMODE2D 1 "register_operand" "")
4174 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4175 (parallel [(const_int 1)
4179 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4180 (define_insn "*avx_punpckhqdq"
4181 [(set (match_operand:V2DI 0 "register_operand" "=x")
4184 (match_operand:V2DI 1 "register_operand" "x")
4185 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4186 (parallel [(const_int 1)
4189 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4190 [(set_attr "type" "sselog")
4191 (set_attr "prefix" "vex")
4192 (set_attr "mode" "TI")])
4194 (define_insn "sse2_punpckhqdq"
4195 [(set (match_operand:V2DI 0 "register_operand" "=x")
4198 (match_operand:V2DI 1 "register_operand" "0")
4199 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4200 (parallel [(const_int 1)
4203 "punpckhqdq\t{%2, %0|%0, %2}"
4204 [(set_attr "type" "sselog")
4205 (set_attr "prefix_data16" "1")
4206 (set_attr "mode" "TI")])
4208 (define_insn "*avx_punpcklqdq"
4209 [(set (match_operand:V2DI 0 "register_operand" "=x")
4212 (match_operand:V2DI 1 "register_operand" "x")
4213 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4214 (parallel [(const_int 0)
4217 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4218 [(set_attr "type" "sselog")
4219 (set_attr "prefix" "vex")
4220 (set_attr "mode" "TI")])
4222 (define_insn "sse2_punpcklqdq"
4223 [(set (match_operand:V2DI 0 "register_operand" "=x")
4226 (match_operand:V2DI 1 "register_operand" "0")
4227 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4228 (parallel [(const_int 0)
4231 "punpcklqdq\t{%2, %0|%0, %2}"
4232 [(set_attr "type" "sselog")
4233 (set_attr "prefix_data16" "1")
4234 (set_attr "mode" "TI")])
4236 (define_insn "*avx_shufpd_<mode>"
4237 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4238 (vec_select:SSEMODE2D
4239 (vec_concat:<ssedoublesizemode>
4240 (match_operand:SSEMODE2D 1 "register_operand" "x")
4241 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4242 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4243 (match_operand 4 "const_2_to_3_operand" "")])))]
4247 mask = INTVAL (operands[3]);
4248 mask |= (INTVAL (operands[4]) - 2) << 1;
4249 operands[3] = GEN_INT (mask);
4251 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4253 [(set_attr "type" "sselog")
4254 (set_attr "prefix" "vex")
4255 (set_attr "mode" "V2DF")])
4257 (define_insn "sse2_shufpd_<mode>"
4258 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4259 (vec_select:SSEMODE2D
4260 (vec_concat:<ssedoublesizemode>
4261 (match_operand:SSEMODE2D 1 "register_operand" "0")
4262 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4263 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4264 (match_operand 4 "const_2_to_3_operand" "")])))]
4268 mask = INTVAL (operands[3]);
4269 mask |= (INTVAL (operands[4]) - 2) << 1;
4270 operands[3] = GEN_INT (mask);
4272 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4274 [(set_attr "type" "sselog")
4275 (set_attr "mode" "V2DF")])
4277 ;; Avoid combining registers from different units in a single alternative,
4278 ;; see comment above inline_secondary_memory_needed function in i386.c
4279 (define_insn "*avx_storehpd"
4280 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4282 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4283 (parallel [(const_int 1)])))]
4284 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4286 vmovhpd\t{%1, %0|%0, %1}
4287 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4291 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4292 (set_attr "prefix" "vex")
4293 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4295 (define_insn "sse2_storehpd"
4296 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4298 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4299 (parallel [(const_int 1)])))]
4300 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4302 movhpd\t{%1, %0|%0, %1}
4307 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4308 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4311 [(set (match_operand:DF 0 "register_operand" "")
4313 (match_operand:V2DF 1 "memory_operand" "")
4314 (parallel [(const_int 1)])))]
4315 "TARGET_SSE2 && reload_completed"
4316 [(set (match_dup 0) (match_dup 1))]
4318 operands[1] = adjust_address (operands[1], DFmode, 8);
4321 ;; Avoid combining registers from different units in a single alternative,
4322 ;; see comment above inline_secondary_memory_needed function in i386.c
4323 (define_insn "sse2_storelpd"
4324 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4326 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4327 (parallel [(const_int 0)])))]
4328 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4330 %vmovlpd\t{%1, %0|%0, %1}
4335 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4336 (set_attr "prefix" "maybe_vex")
4337 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4340 [(set (match_operand:DF 0 "register_operand" "")
4342 (match_operand:V2DF 1 "nonimmediate_operand" "")
4343 (parallel [(const_int 0)])))]
4344 "TARGET_SSE2 && reload_completed"
4347 rtx op1 = operands[1];
4349 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4351 op1 = gen_lowpart (DFmode, op1);
4352 emit_move_insn (operands[0], op1);
4356 (define_expand "sse2_loadhpd_exp"
4357 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4360 (match_operand:V2DF 1 "nonimmediate_operand" "")
4361 (parallel [(const_int 0)]))
4362 (match_operand:DF 2 "nonimmediate_operand" "")))]
4364 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4366 ;; Avoid combining registers from different units in a single alternative,
4367 ;; see comment above inline_secondary_memory_needed function in i386.c
4368 (define_insn "*avx_loadhpd"
4369 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4372 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4373 (parallel [(const_int 0)]))
4374 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4375 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4377 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4378 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4382 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4383 (set_attr "prefix" "vex")
4384 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4386 (define_insn "sse2_loadhpd"
4387 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4390 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4391 (parallel [(const_int 0)]))
4392 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4393 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4395 movhpd\t{%2, %0|%0, %2}
4396 unpcklpd\t{%2, %0|%0, %2}
4397 shufpd\t{$1, %1, %0|%0, %1, 1}
4401 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4402 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4405 [(set (match_operand:V2DF 0 "memory_operand" "")
4407 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4408 (match_operand:DF 1 "register_operand" "")))]
4409 "TARGET_SSE2 && reload_completed"
4410 [(set (match_dup 0) (match_dup 1))]
4412 operands[0] = adjust_address (operands[0], DFmode, 8);
4415 (define_expand "sse2_loadlpd_exp"
4416 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4418 (match_operand:DF 2 "nonimmediate_operand" "")
4420 (match_operand:V2DF 1 "nonimmediate_operand" "")
4421 (parallel [(const_int 1)]))))]
4423 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4425 ;; Avoid combining registers from different units in a single alternative,
4426 ;; see comment above inline_secondary_memory_needed function in i386.c
4427 (define_insn "*avx_loadlpd"
4428 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4430 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4432 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4433 (parallel [(const_int 1)]))))]
4434 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4436 vmovsd\t{%2, %0|%0, %2}
4437 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4438 vmovsd\t{%2, %1, %0|%0, %1, %2}
4439 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4443 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4444 (set_attr "prefix" "vex")
4445 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4447 (define_insn "sse2_loadlpd"
4448 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4450 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4452 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4453 (parallel [(const_int 1)]))))]
4454 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4456 movsd\t{%2, %0|%0, %2}
4457 movlpd\t{%2, %0|%0, %2}
4458 movsd\t{%2, %0|%0, %2}
4459 shufpd\t{$2, %2, %0|%0, %2, 2}
4460 movhpd\t{%H1, %0|%0, %H1}
4464 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4465 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4468 [(set (match_operand:V2DF 0 "memory_operand" "")
4470 (match_operand:DF 1 "register_operand" "")
4471 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4472 "TARGET_SSE2 && reload_completed"
4473 [(set (match_dup 0) (match_dup 1))]
4475 operands[0] = adjust_address (operands[0], DFmode, 8);
4478 ;; Not sure these two are ever used, but it doesn't hurt to have
4480 (define_insn "*vec_extractv2df_1_sse"
4481 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4483 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4484 (parallel [(const_int 1)])))]
4485 "!TARGET_SSE2 && TARGET_SSE
4486 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4488 movhps\t{%1, %0|%0, %1}
4489 movhlps\t{%1, %0|%0, %1}
4490 movlps\t{%H1, %0|%0, %H1}"
4491 [(set_attr "type" "ssemov")
4492 (set_attr "mode" "V2SF,V4SF,V2SF")])
4494 (define_insn "*vec_extractv2df_0_sse"
4495 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4497 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4498 (parallel [(const_int 0)])))]
4499 "!TARGET_SSE2 && TARGET_SSE
4500 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4502 movlps\t{%1, %0|%0, %1}
4503 movaps\t{%1, %0|%0, %1}
4504 movlps\t{%1, %0|%0, %1}"
4505 [(set_attr "type" "ssemov")
4506 (set_attr "mode" "V2SF,V4SF,V2SF")])
4508 (define_insn "*avx_movsd"
4509 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4511 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4512 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4516 vmovsd\t{%2, %1, %0|%0, %1, %2}
4517 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4518 vmovlpd\t{%2, %0|%0, %2}
4519 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4520 vmovhps\t{%1, %H0|%H0, %1}"
4521 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4522 (set_attr "prefix" "vex")
4523 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4525 (define_insn "sse2_movsd"
4526 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4528 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4529 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4533 movsd\t{%2, %0|%0, %2}
4534 movlpd\t{%2, %0|%0, %2}
4535 movlpd\t{%2, %0|%0, %2}
4536 shufpd\t{$2, %2, %0|%0, %2, 2}
4537 movhps\t{%H1, %0|%0, %H1}
4538 movhps\t{%1, %H0|%H0, %1}"
4539 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4540 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4542 (define_insn "*vec_dupv2df_sse3"
4543 [(set (match_operand:V2DF 0 "register_operand" "=x")
4545 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4547 "%vmovddup\t{%1, %0|%0, %1}"
4548 [(set_attr "type" "sselog1")
4549 (set_attr "prefix" "maybe_vex")
4550 (set_attr "mode" "DF")])
4552 (define_insn "vec_dupv2df"
4553 [(set (match_operand:V2DF 0 "register_operand" "=x")
4555 (match_operand:DF 1 "register_operand" "0")))]
4558 [(set_attr "type" "sselog1")
4559 (set_attr "mode" "V2DF")])
4561 (define_insn "*vec_concatv2df_sse3"
4562 [(set (match_operand:V2DF 0 "register_operand" "=x")
4564 (match_operand:DF 1 "nonimmediate_operand" "xm")
4567 "%vmovddup\t{%1, %0|%0, %1}"
4568 [(set_attr "type" "sselog1")
4569 (set_attr "prefix" "maybe_vex")
4570 (set_attr "mode" "DF")])
4572 (define_insn "*vec_concatv2df_avx"
4573 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4575 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4576 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4579 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4580 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4581 vmovsd\t{%1, %0|%0, %1}"
4582 [(set_attr "type" "ssemov")
4583 (set_attr "prefix" "vex")
4584 (set_attr "mode" "DF,V1DF,DF")])
4586 (define_insn "*vec_concatv2df"
4587 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4589 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4590 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4593 unpcklpd\t{%2, %0|%0, %2}
4594 movhpd\t{%2, %0|%0, %2}
4595 movsd\t{%1, %0|%0, %1}
4596 movlhps\t{%2, %0|%0, %2}
4597 movhps\t{%2, %0|%0, %2}"
4598 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4599 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4603 ;; Parallel integral arithmetic
4605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4607 (define_expand "neg<mode>2"
4608 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4611 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4613 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4615 (define_expand "<plusminus_insn><mode>3"
4616 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4618 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4619 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4621 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4623 (define_insn "*avx_<plusminus_insn><mode>3"
4624 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4626 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4627 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4628 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4629 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4630 [(set_attr "type" "sseiadd")
4631 (set_attr "prefix" "vex")
4632 (set_attr "mode" "TI")])
4634 (define_insn "*<plusminus_insn><mode>3"
4635 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4637 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4638 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4639 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4640 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4641 [(set_attr "type" "sseiadd")
4642 (set_attr "prefix_data16" "1")
4643 (set_attr "mode" "TI")])
4645 (define_expand "sse2_<plusminus_insn><mode>3"
4646 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4647 (sat_plusminus:SSEMODE12
4648 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4649 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4651 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4653 (define_insn "*avx_<plusminus_insn><mode>3"
4654 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4655 (sat_plusminus:SSEMODE12
4656 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4657 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4658 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4659 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4660 [(set_attr "type" "sseiadd")
4661 (set_attr "prefix" "vex")
4662 (set_attr "mode" "TI")])
4664 (define_insn "*sse2_<plusminus_insn><mode>3"
4665 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4666 (sat_plusminus:SSEMODE12
4667 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4668 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4669 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4670 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4671 [(set_attr "type" "sseiadd")
4672 (set_attr "prefix_data16" "1")
4673 (set_attr "mode" "TI")])
4675 (define_insn_and_split "mulv16qi3"
4676 [(set (match_operand:V16QI 0 "register_operand" "")
4677 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4678 (match_operand:V16QI 2 "register_operand" "")))]
4680 && !(reload_completed || reload_in_progress)"
4685 rtx t[12], op0, op[3];
4690 /* On SSE5, we can take advantage of the pperm instruction to pack and
4691 unpack the bytes. Unpack data such that we've got a source byte in
4692 each low byte of each word. We don't care what goes into the high
4693 byte, so put 0 there. */
4694 for (i = 0; i < 6; ++i)
4695 t[i] = gen_reg_rtx (V8HImode);
4697 for (i = 0; i < 2; i++)
4700 op[1] = operands[i+1];
4701 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4704 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4707 /* Multiply words. */
4708 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4709 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4711 /* Pack the low byte of each word back into a single xmm */
4712 op[0] = operands[0];
4715 ix86_expand_sse5_pack (op);
4719 for (i = 0; i < 12; ++i)
4720 t[i] = gen_reg_rtx (V16QImode);
4722 /* Unpack data such that we've got a source byte in each low byte of
4723 each word. We don't care what goes into the high byte of each word.
4724 Rather than trying to get zero in there, most convenient is to let
4725 it be a copy of the low byte. */
4726 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4727 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4728 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4729 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4731 /* Multiply words. The end-of-line annotations here give a picture of what
4732 the output of that instruction looks like. Dot means don't care; the
4733 letters are the bytes of the result with A being the most significant. */
4734 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4735 gen_lowpart (V8HImode, t[0]),
4736 gen_lowpart (V8HImode, t[1])));
4737 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4738 gen_lowpart (V8HImode, t[2]),
4739 gen_lowpart (V8HImode, t[3])));
4741 /* Extract the relevant bytes and merge them back together. */
4742 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4743 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4744 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4745 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4746 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4747 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4750 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4754 (define_expand "mulv8hi3"
4755 [(set (match_operand:V8HI 0 "register_operand" "")
4756 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4757 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4759 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4761 (define_insn "*avx_mulv8hi3"
4762 [(set (match_operand:V8HI 0 "register_operand" "=x")
4763 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4764 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4765 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4766 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4767 [(set_attr "type" "sseimul")
4768 (set_attr "prefix" "vex")
4769 (set_attr "mode" "TI")])
4771 (define_insn "*mulv8hi3"
4772 [(set (match_operand:V8HI 0 "register_operand" "=x")
4773 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4774 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4775 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4776 "pmullw\t{%2, %0|%0, %2}"
4777 [(set_attr "type" "sseimul")
4778 (set_attr "prefix_data16" "1")
4779 (set_attr "mode" "TI")])
4781 (define_expand "smulv8hi3_highpart"
4782 [(set (match_operand:V8HI 0 "register_operand" "")
4787 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4789 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4792 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4794 (define_insn "*avxv8hi3_highpart"
4795 [(set (match_operand:V8HI 0 "register_operand" "=x")
4800 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4802 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4804 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4805 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4806 [(set_attr "type" "sseimul")
4807 (set_attr "prefix" "vex")
4808 (set_attr "mode" "TI")])
4810 (define_insn "*smulv8hi3_highpart"
4811 [(set (match_operand:V8HI 0 "register_operand" "=x")
4816 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4818 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4820 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4821 "pmulhw\t{%2, %0|%0, %2}"
4822 [(set_attr "type" "sseimul")
4823 (set_attr "prefix_data16" "1")
4824 (set_attr "mode" "TI")])
4826 (define_expand "umulv8hi3_highpart"
4827 [(set (match_operand:V8HI 0 "register_operand" "")
4832 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4834 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4837 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4839 (define_insn "*avx_umulv8hi3_highpart"
4840 [(set (match_operand:V8HI 0 "register_operand" "=x")
4845 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4847 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4849 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4850 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4851 [(set_attr "type" "sseimul")
4852 (set_attr "prefix" "vex")
4853 (set_attr "mode" "TI")])
4855 (define_insn "*umulv8hi3_highpart"
4856 [(set (match_operand:V8HI 0 "register_operand" "=x")
4861 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4863 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4865 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4866 "pmulhuw\t{%2, %0|%0, %2}"
4867 [(set_attr "type" "sseimul")
4868 (set_attr "prefix_data16" "1")
4869 (set_attr "mode" "TI")])
4871 (define_expand "sse2_umulv2siv2di3"
4872 [(set (match_operand:V2DI 0 "register_operand" "")
4876 (match_operand:V4SI 1 "nonimmediate_operand" "")
4877 (parallel [(const_int 0) (const_int 2)])))
4880 (match_operand:V4SI 2 "nonimmediate_operand" "")
4881 (parallel [(const_int 0) (const_int 2)])))))]
4883 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4885 (define_insn "*avx_umulv2siv2di3"
4886 [(set (match_operand:V2DI 0 "register_operand" "=x")
4890 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4891 (parallel [(const_int 0) (const_int 2)])))
4894 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4895 (parallel [(const_int 0) (const_int 2)])))))]
4896 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4897 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4898 [(set_attr "type" "sseimul")
4899 (set_attr "prefix" "vex")
4900 (set_attr "mode" "TI")])
4902 (define_insn "*sse2_umulv2siv2di3"
4903 [(set (match_operand:V2DI 0 "register_operand" "=x")
4907 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4908 (parallel [(const_int 0) (const_int 2)])))
4911 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4912 (parallel [(const_int 0) (const_int 2)])))))]
4913 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4914 "pmuludq\t{%2, %0|%0, %2}"
4915 [(set_attr "type" "sseimul")
4916 (set_attr "prefix_data16" "1")
4917 (set_attr "mode" "TI")])
4919 (define_expand "sse4_1_mulv2siv2di3"
4920 [(set (match_operand:V2DI 0 "register_operand" "")
4924 (match_operand:V4SI 1 "nonimmediate_operand" "")
4925 (parallel [(const_int 0) (const_int 2)])))
4928 (match_operand:V4SI 2 "nonimmediate_operand" "")
4929 (parallel [(const_int 0) (const_int 2)])))))]
4931 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4933 (define_insn "*avx_mulv2siv2di3"
4934 [(set (match_operand:V2DI 0 "register_operand" "=x")
4938 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4939 (parallel [(const_int 0) (const_int 2)])))
4942 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4943 (parallel [(const_int 0) (const_int 2)])))))]
4944 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4945 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4946 [(set_attr "type" "sseimul")
4947 (set_attr "prefix" "vex")
4948 (set_attr "mode" "TI")])
4950 (define_insn "*sse4_1_mulv2siv2di3"
4951 [(set (match_operand:V2DI 0 "register_operand" "=x")
4955 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4956 (parallel [(const_int 0) (const_int 2)])))
4959 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4960 (parallel [(const_int 0) (const_int 2)])))))]
4961 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4962 "pmuldq\t{%2, %0|%0, %2}"
4963 [(set_attr "type" "sseimul")
4964 (set_attr "prefix_extra" "1")
4965 (set_attr "mode" "TI")])
4967 (define_expand "sse2_pmaddwd"
4968 [(set (match_operand:V4SI 0 "register_operand" "")
4973 (match_operand:V8HI 1 "nonimmediate_operand" "")
4974 (parallel [(const_int 0)
4980 (match_operand:V8HI 2 "nonimmediate_operand" "")
4981 (parallel [(const_int 0)
4987 (vec_select:V4HI (match_dup 1)
4988 (parallel [(const_int 1)
4993 (vec_select:V4HI (match_dup 2)
4994 (parallel [(const_int 1)
4997 (const_int 7)]))))))]
4999 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5001 (define_insn "*avx_pmaddwd"
5002 [(set (match_operand:V4SI 0 "register_operand" "=x")
5007 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5008 (parallel [(const_int 0)
5014 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5015 (parallel [(const_int 0)
5021 (vec_select:V4HI (match_dup 1)
5022 (parallel [(const_int 1)
5027 (vec_select:V4HI (match_dup 2)
5028 (parallel [(const_int 1)
5031 (const_int 7)]))))))]
5032 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5033 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5034 [(set_attr "type" "sseiadd")
5035 (set_attr "prefix" "vex")
5036 (set_attr "mode" "TI")])
5038 (define_insn "*sse2_pmaddwd"
5039 [(set (match_operand:V4SI 0 "register_operand" "=x")
5044 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5045 (parallel [(const_int 0)
5051 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5052 (parallel [(const_int 0)
5058 (vec_select:V4HI (match_dup 1)
5059 (parallel [(const_int 1)
5064 (vec_select:V4HI (match_dup 2)
5065 (parallel [(const_int 1)
5068 (const_int 7)]))))))]
5069 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5070 "pmaddwd\t{%2, %0|%0, %2}"
5071 [(set_attr "type" "sseiadd")
5072 (set_attr "prefix_data16" "1")
5073 (set_attr "mode" "TI")])
5075 (define_expand "mulv4si3"
5076 [(set (match_operand:V4SI 0 "register_operand" "")
5077 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5078 (match_operand:V4SI 2 "register_operand" "")))]
5081 if (TARGET_SSE4_1 || TARGET_SSE5)
5082 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5085 (define_insn "*avx_mulv4si3"
5086 [(set (match_operand:V4SI 0 "register_operand" "=x")
5087 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5088 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5089 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5090 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5091 [(set_attr "type" "sseimul")
5092 (set_attr "prefix" "vex")
5093 (set_attr "mode" "TI")])
5095 (define_insn "*sse4_1_mulv4si3"
5096 [(set (match_operand:V4SI 0 "register_operand" "=x")
5097 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5098 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5099 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5100 "pmulld\t{%2, %0|%0, %2}"
5101 [(set_attr "type" "sseimul")
5102 (set_attr "prefix_extra" "1")
5103 (set_attr "mode" "TI")])
5105 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5106 ;; multiply/add. In general, we expect the define_split to occur before
5107 ;; register allocation, so we have to handle the corner case where the target
5108 ;; is the same as one of the inputs.
5109 (define_insn_and_split "*sse5_mulv4si3"
5110 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5111 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5112 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5115 "&& (reload_completed
5116 || (!reg_mentioned_p (operands[0], operands[1])
5117 && !reg_mentioned_p (operands[0], operands[2])))"
5121 (plus:V4SI (mult:V4SI (match_dup 1)
5125 operands[3] = CONST0_RTX (V4SImode);
5127 [(set_attr "type" "ssemuladd")
5128 (set_attr "mode" "TI")])
5130 (define_insn_and_split "*sse2_mulv4si3"
5131 [(set (match_operand:V4SI 0 "register_operand" "")
5132 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5133 (match_operand:V4SI 2 "register_operand" "")))]
5134 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5135 && !(reload_completed || reload_in_progress)"
5140 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5146 t1 = gen_reg_rtx (V4SImode);
5147 t2 = gen_reg_rtx (V4SImode);
5148 t3 = gen_reg_rtx (V4SImode);
5149 t4 = gen_reg_rtx (V4SImode);
5150 t5 = gen_reg_rtx (V4SImode);
5151 t6 = gen_reg_rtx (V4SImode);
5152 thirtytwo = GEN_INT (32);
5154 /* Multiply elements 2 and 0. */
5155 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5158 /* Shift both input vectors down one element, so that elements 3
5159 and 1 are now in the slots for elements 2 and 0. For K8, at
5160 least, this is faster than using a shuffle. */
5161 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5162 gen_lowpart (TImode, op1),
5164 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5165 gen_lowpart (TImode, op2),
5167 /* Multiply elements 3 and 1. */
5168 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5171 /* Move the results in element 2 down to element 1; we don't care
5172 what goes in elements 2 and 3. */
5173 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5174 const0_rtx, const0_rtx));
5175 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5176 const0_rtx, const0_rtx));
5178 /* Merge the parts back together. */
5179 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5183 (define_insn_and_split "mulv2di3"
5184 [(set (match_operand:V2DI 0 "register_operand" "")
5185 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5186 (match_operand:V2DI 2 "register_operand" "")))]
5188 && !(reload_completed || reload_in_progress)"
5193 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5198 /* op1: A,B,C,D, op2: E,F,G,H */
5200 op1 = gen_lowpart (V4SImode, operands[1]);
5201 op2 = gen_lowpart (V4SImode, operands[2]);
5202 t1 = gen_reg_rtx (V4SImode);
5203 t2 = gen_reg_rtx (V4SImode);
5204 t3 = gen_reg_rtx (V4SImode);
5205 t4 = gen_reg_rtx (V2DImode);
5206 t5 = gen_reg_rtx (V2DImode);
5209 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5216 emit_move_insn (t2, CONST0_RTX (V4SImode));
5218 /* t3: (B*E),(A*F),(D*G),(C*H) */
5219 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5221 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5222 emit_insn (gen_sse5_phadddq (t4, t3));
5224 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5225 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5227 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5228 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5235 t1 = gen_reg_rtx (V2DImode);
5236 t2 = gen_reg_rtx (V2DImode);
5237 t3 = gen_reg_rtx (V2DImode);
5238 t4 = gen_reg_rtx (V2DImode);
5239 t5 = gen_reg_rtx (V2DImode);
5240 t6 = gen_reg_rtx (V2DImode);
5241 thirtytwo = GEN_INT (32);
5243 /* Multiply low parts. */
5244 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5245 gen_lowpart (V4SImode, op2)));
5247 /* Shift input vectors left 32 bits so we can multiply high parts. */
5248 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5249 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5251 /* Multiply high parts by low parts. */
5252 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5253 gen_lowpart (V4SImode, t3)));
5254 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5255 gen_lowpart (V4SImode, t2)));
5257 /* Shift them back. */
5258 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5259 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5261 /* Add the three parts together. */
5262 emit_insn (gen_addv2di3 (t6, t1, t4));
5263 emit_insn (gen_addv2di3 (op0, t6, t5));
5267 (define_expand "vec_widen_smult_hi_v8hi"
5268 [(match_operand:V4SI 0 "register_operand" "")
5269 (match_operand:V8HI 1 "register_operand" "")
5270 (match_operand:V8HI 2 "register_operand" "")]
5273 rtx op1, op2, t1, t2, dest;
5277 t1 = gen_reg_rtx (V8HImode);
5278 t2 = gen_reg_rtx (V8HImode);
5279 dest = gen_lowpart (V8HImode, operands[0]);
5281 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5282 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5283 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5287 (define_expand "vec_widen_smult_lo_v8hi"
5288 [(match_operand:V4SI 0 "register_operand" "")
5289 (match_operand:V8HI 1 "register_operand" "")
5290 (match_operand:V8HI 2 "register_operand" "")]
5293 rtx op1, op2, t1, t2, dest;
5297 t1 = gen_reg_rtx (V8HImode);
5298 t2 = gen_reg_rtx (V8HImode);
5299 dest = gen_lowpart (V8HImode, operands[0]);
5301 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5302 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5303 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5307 (define_expand "vec_widen_umult_hi_v8hi"
5308 [(match_operand:V4SI 0 "register_operand" "")
5309 (match_operand:V8HI 1 "register_operand" "")
5310 (match_operand:V8HI 2 "register_operand" "")]
5313 rtx op1, op2, t1, t2, dest;
5317 t1 = gen_reg_rtx (V8HImode);
5318 t2 = gen_reg_rtx (V8HImode);
5319 dest = gen_lowpart (V8HImode, operands[0]);
5321 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5322 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5323 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5327 (define_expand "vec_widen_umult_lo_v8hi"
5328 [(match_operand:V4SI 0 "register_operand" "")
5329 (match_operand:V8HI 1 "register_operand" "")
5330 (match_operand:V8HI 2 "register_operand" "")]
5333 rtx op1, op2, t1, t2, dest;
5337 t1 = gen_reg_rtx (V8HImode);
5338 t2 = gen_reg_rtx (V8HImode);
5339 dest = gen_lowpart (V8HImode, operands[0]);
5341 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5342 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5343 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5347 (define_expand "vec_widen_smult_hi_v4si"
5348 [(match_operand:V2DI 0 "register_operand" "")
5349 (match_operand:V4SI 1 "register_operand" "")
5350 (match_operand:V4SI 2 "register_operand" "")]
5355 t1 = gen_reg_rtx (V4SImode);
5356 t2 = gen_reg_rtx (V4SImode);
5358 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5363 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5368 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5372 (define_expand "vec_widen_smult_lo_v4si"
5373 [(match_operand:V2DI 0 "register_operand" "")
5374 (match_operand:V4SI 1 "register_operand" "")
5375 (match_operand:V4SI 2 "register_operand" "")]
5380 t1 = gen_reg_rtx (V4SImode);
5381 t2 = gen_reg_rtx (V4SImode);
5383 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5388 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5393 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5398 (define_expand "vec_widen_umult_hi_v4si"
5399 [(match_operand:V2DI 0 "register_operand" "")
5400 (match_operand:V4SI 1 "register_operand" "")
5401 (match_operand:V4SI 2 "register_operand" "")]
5404 rtx op1, op2, t1, t2;
5408 t1 = gen_reg_rtx (V4SImode);
5409 t2 = gen_reg_rtx (V4SImode);
5411 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5412 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5413 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5417 (define_expand "vec_widen_umult_lo_v4si"
5418 [(match_operand:V2DI 0 "register_operand" "")
5419 (match_operand:V4SI 1 "register_operand" "")
5420 (match_operand:V4SI 2 "register_operand" "")]
5423 rtx op1, op2, t1, t2;
5427 t1 = gen_reg_rtx (V4SImode);
5428 t2 = gen_reg_rtx (V4SImode);
5430 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5431 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5432 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5436 (define_expand "sdot_prodv8hi"
5437 [(match_operand:V4SI 0 "register_operand" "")
5438 (match_operand:V8HI 1 "register_operand" "")
5439 (match_operand:V8HI 2 "register_operand" "")
5440 (match_operand:V4SI 3 "register_operand" "")]
5443 rtx t = gen_reg_rtx (V4SImode);
5444 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5445 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5449 (define_expand "udot_prodv4si"
5450 [(match_operand:V2DI 0 "register_operand" "")
5451 (match_operand:V4SI 1 "register_operand" "")
5452 (match_operand:V4SI 2 "register_operand" "")
5453 (match_operand:V2DI 3 "register_operand" "")]
5458 t1 = gen_reg_rtx (V2DImode);
5459 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5460 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5462 t2 = gen_reg_rtx (V4SImode);
5463 t3 = gen_reg_rtx (V4SImode);
5464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5465 gen_lowpart (TImode, operands[1]),
5467 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5468 gen_lowpart (TImode, operands[2]),
5471 t4 = gen_reg_rtx (V2DImode);
5472 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5474 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5478 (define_insn "*avx_ashr<mode>3"
5479 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5481 (match_operand:SSEMODE24 1 "register_operand" "x")
5482 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5484 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5485 [(set_attr "type" "sseishft")
5486 (set_attr "prefix" "vex")
5487 (set_attr "mode" "TI")])
5489 (define_insn "ashr<mode>3"
5490 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5492 (match_operand:SSEMODE24 1 "register_operand" "0")
5493 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5495 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5496 [(set_attr "type" "sseishft")
5497 (set_attr "prefix_data16" "1")
5498 (set_attr "mode" "TI")])
5500 (define_insn "*avx_lshr<mode>3"
5501 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5502 (lshiftrt:SSEMODE248
5503 (match_operand:SSEMODE248 1 "register_operand" "x")
5504 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5506 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5507 [(set_attr "type" "sseishft")
5508 (set_attr "prefix" "vex")
5509 (set_attr "mode" "TI")])
5511 (define_insn "lshr<mode>3"
5512 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5513 (lshiftrt:SSEMODE248
5514 (match_operand:SSEMODE248 1 "register_operand" "0")
5515 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5517 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseishft")
5519 (set_attr "prefix_data16" "1")
5520 (set_attr "mode" "TI")])
5522 (define_insn "*avx_ashl<mode>3"
5523 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5525 (match_operand:SSEMODE248 1 "register_operand" "x")
5526 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5528 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5529 [(set_attr "type" "sseishft")
5530 (set_attr "prefix" "vex")
5531 (set_attr "mode" "TI")])
5533 (define_insn "ashl<mode>3"
5534 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5536 (match_operand:SSEMODE248 1 "register_operand" "0")
5537 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5539 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5540 [(set_attr "type" "sseishft")
5541 (set_attr "prefix_data16" "1")
5542 (set_attr "mode" "TI")])
5544 (define_expand "vec_shl_<mode>"
5545 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5546 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5547 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5550 operands[0] = gen_lowpart (TImode, operands[0]);
5551 operands[1] = gen_lowpart (TImode, operands[1]);
5554 (define_expand "vec_shr_<mode>"
5555 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5556 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5557 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5560 operands[0] = gen_lowpart (TImode, operands[0]);
5561 operands[1] = gen_lowpart (TImode, operands[1]);
5564 (define_insn "*avx_<code><mode>3"
5565 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5567 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5568 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5569 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5570 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5571 [(set_attr "type" "sseiadd")
5572 (set_attr "prefix" "vex")
5573 (set_attr "mode" "TI")])
5575 (define_expand "<code>v16qi3"
5576 [(set (match_operand:V16QI 0 "register_operand" "")
5578 (match_operand:V16QI 1 "nonimmediate_operand" "")
5579 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5581 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5583 (define_insn "*<code>v16qi3"
5584 [(set (match_operand:V16QI 0 "register_operand" "=x")
5586 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5587 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5588 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5589 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5590 [(set_attr "type" "sseiadd")
5591 (set_attr "prefix_data16" "1")
5592 (set_attr "mode" "TI")])
5594 (define_expand "<code>v8hi3"
5595 [(set (match_operand:V8HI 0 "register_operand" "")
5597 (match_operand:V8HI 1 "nonimmediate_operand" "")
5598 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5600 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5602 (define_insn "*<code>v8hi3"
5603 [(set (match_operand:V8HI 0 "register_operand" "=x")
5605 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5606 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5607 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5608 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5609 [(set_attr "type" "sseiadd")
5610 (set_attr "prefix_data16" "1")
5611 (set_attr "mode" "TI")])
5613 (define_expand "umaxv8hi3"
5614 [(set (match_operand:V8HI 0 "register_operand" "")
5615 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5616 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5620 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5623 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5624 if (rtx_equal_p (op3, op2))
5625 op3 = gen_reg_rtx (V8HImode);
5626 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5627 emit_insn (gen_addv8hi3 (op0, op3, op2));
5632 (define_expand "smax<mode>3"
5633 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5634 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5635 (match_operand:SSEMODE14 2 "register_operand" "")))]
5639 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5645 xops[0] = operands[0];
5646 xops[1] = operands[1];
5647 xops[2] = operands[2];
5648 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5649 xops[4] = operands[1];
5650 xops[5] = operands[2];
5651 ok = ix86_expand_int_vcond (xops);
5657 (define_insn "*sse4_1_<code><mode>3"
5658 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5660 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5661 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5662 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5663 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5664 [(set_attr "type" "sseiadd")
5665 (set_attr "prefix_extra" "1")
5666 (set_attr "mode" "TI")])
5668 (define_expand "umaxv4si3"
5669 [(set (match_operand:V4SI 0 "register_operand" "")
5670 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5671 (match_operand:V4SI 2 "register_operand" "")))]
5675 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5681 xops[0] = operands[0];
5682 xops[1] = operands[1];
5683 xops[2] = operands[2];
5684 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5685 xops[4] = operands[1];
5686 xops[5] = operands[2];
5687 ok = ix86_expand_int_vcond (xops);
5693 (define_insn "*sse4_1_<code><mode>3"
5694 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5696 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5697 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5698 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5699 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5700 [(set_attr "type" "sseiadd")
5701 (set_attr "prefix_extra" "1")
5702 (set_attr "mode" "TI")])
5704 (define_expand "smin<mode>3"
5705 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5706 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5707 (match_operand:SSEMODE14 2 "register_operand" "")))]
5711 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5717 xops[0] = operands[0];
5718 xops[1] = operands[2];
5719 xops[2] = operands[1];
5720 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5721 xops[4] = operands[1];
5722 xops[5] = operands[2];
5723 ok = ix86_expand_int_vcond (xops);
5729 (define_expand "umin<mode>3"
5730 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5731 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5732 (match_operand:SSEMODE24 2 "register_operand" "")))]
5736 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5742 xops[0] = operands[0];
5743 xops[1] = operands[2];
5744 xops[2] = operands[1];
5745 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5746 xops[4] = operands[1];
5747 xops[5] = operands[2];
5748 ok = ix86_expand_int_vcond (xops);
5754 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5756 ;; Parallel integral comparisons
5758 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5760 (define_expand "sse2_eq<mode>3"
5761 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5763 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5764 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5765 "TARGET_SSE2 && !TARGET_SSE5"
5766 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5768 (define_insn "*avx_eq<mode>3"
5769 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5771 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5772 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5773 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5774 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5775 [(set_attr "type" "ssecmp")
5776 (set_attr "prefix" "vex")
5777 (set_attr "mode" "TI")])
5779 (define_insn "*sse2_eq<mode>3"
5780 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5782 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5783 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5784 "TARGET_SSE2 && !TARGET_SSE5
5785 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5786 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5787 [(set_attr "type" "ssecmp")
5788 (set_attr "prefix_data16" "1")
5789 (set_attr "mode" "TI")])
5791 (define_expand "sse4_1_eqv2di3"
5792 [(set (match_operand:V2DI 0 "register_operand" "")
5794 (match_operand:V2DI 1 "nonimmediate_operand" "")
5795 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5797 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5799 (define_insn "*sse4_1_eqv2di3"
5800 [(set (match_operand:V2DI 0 "register_operand" "=x")
5802 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5803 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5804 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5805 "pcmpeqq\t{%2, %0|%0, %2}"
5806 [(set_attr "type" "ssecmp")
5807 (set_attr "prefix_extra" "1")
5808 (set_attr "mode" "TI")])
5810 (define_insn "*avx_gt<mode>3"
5811 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5813 (match_operand:SSEMODE1248 1 "register_operand" "x")
5814 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5816 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5817 [(set_attr "type" "ssecmp")
5818 (set_attr "prefix" "vex")
5819 (set_attr "mode" "TI")])
5821 (define_insn "sse2_gt<mode>3"
5822 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5824 (match_operand:SSEMODE124 1 "register_operand" "0")
5825 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5826 "TARGET_SSE2 && !TARGET_SSE5"
5827 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5828 [(set_attr "type" "ssecmp")
5829 (set_attr "prefix_data16" "1")
5830 (set_attr "mode" "TI")])
5832 (define_insn "sse4_2_gtv2di3"
5833 [(set (match_operand:V2DI 0 "register_operand" "=x")
5835 (match_operand:V2DI 1 "register_operand" "0")
5836 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5838 "pcmpgtq\t{%2, %0|%0, %2}"
5839 [(set_attr "type" "ssecmp")
5840 (set_attr "mode" "TI")])
5842 (define_expand "vcond<mode>"
5843 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5844 (if_then_else:SSEMODE124C8
5845 (match_operator 3 ""
5846 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
5847 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
5848 (match_operand:SSEMODE124C8 1 "general_operand" "")
5849 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
5852 bool ok = ix86_expand_int_vcond (operands);
5857 (define_expand "vcondu<mode>"
5858 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5859 (if_then_else:SSEMODE124C8
5860 (match_operator 3 ""
5861 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
5862 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
5863 (match_operand:SSEMODE124C8 1 "general_operand" "")
5864 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
5867 bool ok = ix86_expand_int_vcond (operands);
5872 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5874 ;; Parallel bitwise logical operations
5876 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5878 (define_expand "one_cmpl<mode>2"
5879 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5880 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5884 int i, n = GET_MODE_NUNITS (<MODE>mode);
5885 rtvec v = rtvec_alloc (n);
5887 for (i = 0; i < n; ++i)
5888 RTVEC_ELT (v, i) = constm1_rtx;
5890 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5893 (define_insn "*avx_andnot<mode>3"
5894 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5896 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5897 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5899 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5900 [(set_attr "type" "sselog")
5901 (set_attr "prefix" "vex")
5902 (set_attr "mode" "<avxvecpsmode>")])
5904 (define_insn "*sse_andnot<mode>3"
5905 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5907 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5908 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5909 "(TARGET_SSE && !TARGET_SSE2)"
5910 "andnps\t{%2, %0|%0, %2}"
5911 [(set_attr "type" "sselog")
5912 (set_attr "mode" "V4SF")])
5914 (define_insn "*avx_andnot<mode>3"
5915 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5917 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5918 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5920 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5921 [(set_attr "type" "sselog")
5922 (set_attr "prefix" "vex")
5923 (set_attr "mode" "TI")])
5925 (define_insn "sse2_andnot<mode>3"
5926 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5928 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5929 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5931 "pandn\t{%2, %0|%0, %2}"
5932 [(set_attr "type" "sselog")
5933 (set_attr "prefix_data16" "1")
5934 (set_attr "mode" "TI")])
5936 (define_insn "*andnottf3"
5937 [(set (match_operand:TF 0 "register_operand" "=x")
5939 (not:TF (match_operand:TF 1 "register_operand" "0"))
5940 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5942 "pandn\t{%2, %0|%0, %2}"
5943 [(set_attr "type" "sselog")
5944 (set_attr "prefix_data16" "1")
5945 (set_attr "mode" "TI")])
5947 (define_expand "<code><mode>3"
5948 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5950 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5951 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5953 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5955 (define_insn "*avx_<code><mode>3"
5956 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5958 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5959 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5961 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5962 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5963 [(set_attr "type" "sselog")
5964 (set_attr "prefix" "vex")
5965 (set_attr "mode" "<avxvecpsmode>")])
5967 (define_insn "*sse_<code><mode>3"
5968 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5970 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5971 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5972 "(TARGET_SSE && !TARGET_SSE2)
5973 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5974 "<plogicprefix>ps\t{%2, %0|%0, %2}"
5975 [(set_attr "type" "sselog")
5976 (set_attr "mode" "V4SF")])
5978 (define_insn "*avx_<code><mode>3"
5979 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5981 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5982 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5985 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
5986 [(set_attr "type" "sselog")
5987 (set_attr "prefix" "vex")
5988 (set_attr "mode" "TI")])
5990 (define_insn "*sse2_<code><mode>3"
5991 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5993 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5994 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5995 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5996 "p<plogicprefix>\t{%2, %0|%0, %2}"
5997 [(set_attr "type" "sselog")
5998 (set_attr "prefix_data16" "1")
5999 (set_attr "mode" "TI")])
6001 (define_expand "<code>tf3"
6002 [(set (match_operand:TF 0 "register_operand" "")
6004 (match_operand:TF 1 "nonimmediate_operand" "")
6005 (match_operand:TF 2 "nonimmediate_operand" "")))]
6007 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6009 (define_insn "*<code>tf3"
6010 [(set (match_operand:TF 0 "register_operand" "=x")
6012 (match_operand:TF 1 "nonimmediate_operand" "%0")
6013 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6014 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6015 "p<plogicprefix>\t{%2, %0|%0, %2}"
6016 [(set_attr "type" "sselog")
6017 (set_attr "prefix_data16" "1")
6018 (set_attr "mode" "TI")])
6020 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6022 ;; Parallel integral element swizzling
6024 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6027 ;; op1 = abcdefghijklmnop
6028 ;; op2 = qrstuvwxyz012345
6029 ;; h1 = aqbrcsdteufvgwhx
6030 ;; l1 = iyjzk0l1m2n3o4p5
6031 ;; h2 = aiqybjrzcks0dlt1
6032 ;; l2 = emu2fnv3gow4hpx5
6033 ;; h3 = aeimquy2bfjnrvz3
6034 ;; l3 = cgkosw04dhlptx15
6035 ;; result = bdfhjlnprtvxz135
6036 (define_expand "vec_pack_trunc_v8hi"
6037 [(match_operand:V16QI 0 "register_operand" "")
6038 (match_operand:V8HI 1 "register_operand" "")
6039 (match_operand:V8HI 2 "register_operand" "")]
6042 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6046 ix86_expand_sse5_pack (operands);
6050 op1 = gen_lowpart (V16QImode, operands[1]);
6051 op2 = gen_lowpart (V16QImode, operands[2]);
6052 h1 = gen_reg_rtx (V16QImode);
6053 l1 = gen_reg_rtx (V16QImode);
6054 h2 = gen_reg_rtx (V16QImode);
6055 l2 = gen_reg_rtx (V16QImode);
6056 h3 = gen_reg_rtx (V16QImode);
6057 l3 = gen_reg_rtx (V16QImode);
6059 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6060 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6061 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6062 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6063 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6064 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6065 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6076 ;; result = bdfhjlnp
6077 (define_expand "vec_pack_trunc_v4si"
6078 [(match_operand:V8HI 0 "register_operand" "")
6079 (match_operand:V4SI 1 "register_operand" "")
6080 (match_operand:V4SI 2 "register_operand" "")]
6083 rtx op1, op2, h1, l1, h2, l2;
6087 ix86_expand_sse5_pack (operands);
6091 op1 = gen_lowpart (V8HImode, operands[1]);
6092 op2 = gen_lowpart (V8HImode, operands[2]);
6093 h1 = gen_reg_rtx (V8HImode);
6094 l1 = gen_reg_rtx (V8HImode);
6095 h2 = gen_reg_rtx (V8HImode);
6096 l2 = gen_reg_rtx (V8HImode);
6098 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6099 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6100 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6101 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6102 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6112 (define_expand "vec_pack_trunc_v2di"
6113 [(match_operand:V4SI 0 "register_operand" "")
6114 (match_operand:V2DI 1 "register_operand" "")
6115 (match_operand:V2DI 2 "register_operand" "")]
6118 rtx op1, op2, h1, l1;
6122 ix86_expand_sse5_pack (operands);
6126 op1 = gen_lowpart (V4SImode, operands[1]);
6127 op2 = gen_lowpart (V4SImode, operands[2]);
6128 h1 = gen_reg_rtx (V4SImode);
6129 l1 = gen_reg_rtx (V4SImode);
6131 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6132 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6133 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6137 (define_expand "vec_interleave_highv16qi"
6138 [(set (match_operand:V16QI 0 "register_operand" "")
6141 (match_operand:V16QI 1 "register_operand" "")
6142 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6143 (parallel [(const_int 8) (const_int 24)
6144 (const_int 9) (const_int 25)
6145 (const_int 10) (const_int 26)
6146 (const_int 11) (const_int 27)
6147 (const_int 12) (const_int 28)
6148 (const_int 13) (const_int 29)
6149 (const_int 14) (const_int 30)
6150 (const_int 15) (const_int 31)])))]
6153 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6157 (define_expand "vec_interleave_lowv16qi"
6158 [(set (match_operand:V16QI 0 "register_operand" "")
6161 (match_operand:V16QI 1 "register_operand" "")
6162 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6163 (parallel [(const_int 0) (const_int 16)
6164 (const_int 1) (const_int 17)
6165 (const_int 2) (const_int 18)
6166 (const_int 3) (const_int 19)
6167 (const_int 4) (const_int 20)
6168 (const_int 5) (const_int 21)
6169 (const_int 6) (const_int 22)
6170 (const_int 7) (const_int 23)])))]
6173 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6177 (define_expand "vec_interleave_highv8hi"
6178 [(set (match_operand:V8HI 0 "register_operand" "=")
6181 (match_operand:V8HI 1 "register_operand" "")
6182 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6183 (parallel [(const_int 4) (const_int 12)
6184 (const_int 5) (const_int 13)
6185 (const_int 6) (const_int 14)
6186 (const_int 7) (const_int 15)])))]
6189 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6193 (define_expand "vec_interleave_lowv8hi"
6194 [(set (match_operand:V8HI 0 "register_operand" "")
6197 (match_operand:V8HI 1 "register_operand" "")
6198 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6199 (parallel [(const_int 0) (const_int 8)
6200 (const_int 1) (const_int 9)
6201 (const_int 2) (const_int 10)
6202 (const_int 3) (const_int 11)])))]
6205 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6209 (define_expand "vec_interleave_highv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "")
6213 (match_operand:V4SI 1 "register_operand" "")
6214 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6215 (parallel [(const_int 2) (const_int 6)
6216 (const_int 3) (const_int 7)])))]
6219 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6223 (define_expand "vec_interleave_lowv4si"
6224 [(set (match_operand:V4SI 0 "register_operand" "")
6227 (match_operand:V4SI 1 "register_operand" "")
6228 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6229 (parallel [(const_int 0) (const_int 4)
6230 (const_int 1) (const_int 5)])))]
6233 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6237 (define_expand "vec_interleave_highv2di"
6238 [(set (match_operand:V2DI 0 "register_operand" "")
6241 (match_operand:V2DI 1 "register_operand" "")
6242 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6243 (parallel [(const_int 1)
6247 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6251 (define_expand "vec_interleave_lowv2di"
6252 [(set (match_operand:V2DI 0 "register_operand" "")
6255 (match_operand:V2DI 1 "register_operand" "")
6256 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6257 (parallel [(const_int 0)
6261 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6265 (define_expand "vec_interleave_highv4sf"
6266 [(set (match_operand:V4SF 0 "register_operand" "")
6269 (match_operand:V4SF 1 "register_operand" "")
6270 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6271 (parallel [(const_int 2) (const_int 6)
6272 (const_int 3) (const_int 7)])))]
6275 (define_expand "vec_interleave_lowv4sf"
6276 [(set (match_operand:V4SF 0 "register_operand" "")
6279 (match_operand:V4SF 1 "register_operand" "")
6280 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6281 (parallel [(const_int 0) (const_int 4)
6282 (const_int 1) (const_int 5)])))]
6285 (define_expand "vec_interleave_highv2df"
6286 [(set (match_operand:V2DF 0 "register_operand" "")
6289 (match_operand:V2DF 1 "register_operand" "")
6290 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6291 (parallel [(const_int 1)
6295 (define_expand "vec_interleave_lowv2df"
6296 [(set (match_operand:V2DF 0 "register_operand" "")
6299 (match_operand:V2DF 1 "register_operand" "")
6300 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6301 (parallel [(const_int 0)
6305 (define_insn "*avx_packsswb"
6306 [(set (match_operand:V16QI 0 "register_operand" "=x")
6309 (match_operand:V8HI 1 "register_operand" "x"))
6311 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6313 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6314 [(set_attr "type" "sselog")
6315 (set_attr "prefix" "vex")
6316 (set_attr "mode" "TI")])
6318 (define_insn "sse2_packsswb"
6319 [(set (match_operand:V16QI 0 "register_operand" "=x")
6322 (match_operand:V8HI 1 "register_operand" "0"))
6324 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6326 "packsswb\t{%2, %0|%0, %2}"
6327 [(set_attr "type" "sselog")
6328 (set_attr "prefix_data16" "1")
6329 (set_attr "mode" "TI")])
6331 (define_insn "*avx_packssdw"
6332 [(set (match_operand:V8HI 0 "register_operand" "=x")
6335 (match_operand:V4SI 1 "register_operand" "x"))
6337 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6339 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6340 [(set_attr "type" "sselog")
6341 (set_attr "prefix" "vex")
6342 (set_attr "mode" "TI")])
6344 (define_insn "sse2_packssdw"
6345 [(set (match_operand:V8HI 0 "register_operand" "=x")
6348 (match_operand:V4SI 1 "register_operand" "0"))
6350 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6352 "packssdw\t{%2, %0|%0, %2}"
6353 [(set_attr "type" "sselog")
6354 (set_attr "prefix_data16" "1")
6355 (set_attr "mode" "TI")])
6357 (define_insn "*avx_packuswb"
6358 [(set (match_operand:V16QI 0 "register_operand" "=x")
6361 (match_operand:V8HI 1 "register_operand" "x"))
6363 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6365 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6366 [(set_attr "type" "sselog")
6367 (set_attr "prefix" "vex")
6368 (set_attr "mode" "TI")])
6370 (define_insn "sse2_packuswb"
6371 [(set (match_operand:V16QI 0 "register_operand" "=x")
6374 (match_operand:V8HI 1 "register_operand" "0"))
6376 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6378 "packuswb\t{%2, %0|%0, %2}"
6379 [(set_attr "type" "sselog")
6380 (set_attr "prefix_data16" "1")
6381 (set_attr "mode" "TI")])
6383 (define_insn "*avx_punpckhbw"
6384 [(set (match_operand:V16QI 0 "register_operand" "=x")
6387 (match_operand:V16QI 1 "register_operand" "x")
6388 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6389 (parallel [(const_int 8) (const_int 24)
6390 (const_int 9) (const_int 25)
6391 (const_int 10) (const_int 26)
6392 (const_int 11) (const_int 27)
6393 (const_int 12) (const_int 28)
6394 (const_int 13) (const_int 29)
6395 (const_int 14) (const_int 30)
6396 (const_int 15) (const_int 31)])))]
6398 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6399 [(set_attr "type" "sselog")
6400 (set_attr "prefix" "vex")
6401 (set_attr "mode" "TI")])
6403 (define_insn "sse2_punpckhbw"
6404 [(set (match_operand:V16QI 0 "register_operand" "=x")
6407 (match_operand:V16QI 1 "register_operand" "0")
6408 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6409 (parallel [(const_int 8) (const_int 24)
6410 (const_int 9) (const_int 25)
6411 (const_int 10) (const_int 26)
6412 (const_int 11) (const_int 27)
6413 (const_int 12) (const_int 28)
6414 (const_int 13) (const_int 29)
6415 (const_int 14) (const_int 30)
6416 (const_int 15) (const_int 31)])))]
6418 "punpckhbw\t{%2, %0|%0, %2}"
6419 [(set_attr "type" "sselog")
6420 (set_attr "prefix_data16" "1")
6421 (set_attr "mode" "TI")])
6423 (define_insn "*avx_punpcklbw"
6424 [(set (match_operand:V16QI 0 "register_operand" "=x")
6427 (match_operand:V16QI 1 "register_operand" "x")
6428 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6429 (parallel [(const_int 0) (const_int 16)
6430 (const_int 1) (const_int 17)
6431 (const_int 2) (const_int 18)
6432 (const_int 3) (const_int 19)
6433 (const_int 4) (const_int 20)
6434 (const_int 5) (const_int 21)
6435 (const_int 6) (const_int 22)
6436 (const_int 7) (const_int 23)])))]
6438 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6439 [(set_attr "type" "sselog")
6440 (set_attr "prefix" "vex")
6441 (set_attr "mode" "TI")])
6443 (define_insn "sse2_punpcklbw"
6444 [(set (match_operand:V16QI 0 "register_operand" "=x")
6447 (match_operand:V16QI 1 "register_operand" "0")
6448 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6449 (parallel [(const_int 0) (const_int 16)
6450 (const_int 1) (const_int 17)
6451 (const_int 2) (const_int 18)
6452 (const_int 3) (const_int 19)
6453 (const_int 4) (const_int 20)
6454 (const_int 5) (const_int 21)
6455 (const_int 6) (const_int 22)
6456 (const_int 7) (const_int 23)])))]
6458 "punpcklbw\t{%2, %0|%0, %2}"
6459 [(set_attr "type" "sselog")
6460 (set_attr "prefix_data16" "1")
6461 (set_attr "mode" "TI")])
6463 (define_insn "*avx_punpckhwd"
6464 [(set (match_operand:V8HI 0 "register_operand" "=x")
6467 (match_operand:V8HI 1 "register_operand" "x")
6468 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6469 (parallel [(const_int 4) (const_int 12)
6470 (const_int 5) (const_int 13)
6471 (const_int 6) (const_int 14)
6472 (const_int 7) (const_int 15)])))]
6474 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6475 [(set_attr "type" "sselog")
6476 (set_attr "prefix" "vex")
6477 (set_attr "mode" "TI")])
6479 (define_insn "sse2_punpckhwd"
6480 [(set (match_operand:V8HI 0 "register_operand" "=x")
6483 (match_operand:V8HI 1 "register_operand" "0")
6484 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6485 (parallel [(const_int 4) (const_int 12)
6486 (const_int 5) (const_int 13)
6487 (const_int 6) (const_int 14)
6488 (const_int 7) (const_int 15)])))]
6490 "punpckhwd\t{%2, %0|%0, %2}"
6491 [(set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1")
6493 (set_attr "mode" "TI")])
6495 (define_insn "*avx_punpcklwd"
6496 [(set (match_operand:V8HI 0 "register_operand" "=x")
6499 (match_operand:V8HI 1 "register_operand" "x")
6500 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6501 (parallel [(const_int 0) (const_int 8)
6502 (const_int 1) (const_int 9)
6503 (const_int 2) (const_int 10)
6504 (const_int 3) (const_int 11)])))]
6506 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6507 [(set_attr "type" "sselog")
6508 (set_attr "prefix" "vex")
6509 (set_attr "mode" "TI")])
6511 (define_insn "sse2_punpcklwd"
6512 [(set (match_operand:V8HI 0 "register_operand" "=x")
6515 (match_operand:V8HI 1 "register_operand" "0")
6516 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6517 (parallel [(const_int 0) (const_int 8)
6518 (const_int 1) (const_int 9)
6519 (const_int 2) (const_int 10)
6520 (const_int 3) (const_int 11)])))]
6522 "punpcklwd\t{%2, %0|%0, %2}"
6523 [(set_attr "type" "sselog")
6524 (set_attr "prefix_data16" "1")
6525 (set_attr "mode" "TI")])
6527 (define_insn "*avx_punpckhdq"
6528 [(set (match_operand:V4SI 0 "register_operand" "=x")
6531 (match_operand:V4SI 1 "register_operand" "x")
6532 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6533 (parallel [(const_int 2) (const_int 6)
6534 (const_int 3) (const_int 7)])))]
6536 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6537 [(set_attr "type" "sselog")
6538 (set_attr "prefix" "vex")
6539 (set_attr "mode" "TI")])
6541 (define_insn "sse2_punpckhdq"
6542 [(set (match_operand:V4SI 0 "register_operand" "=x")
6545 (match_operand:V4SI 1 "register_operand" "0")
6546 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6547 (parallel [(const_int 2) (const_int 6)
6548 (const_int 3) (const_int 7)])))]
6550 "punpckhdq\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "prefix_data16" "1")
6553 (set_attr "mode" "TI")])
6555 (define_insn "*avx_punpckldq"
6556 [(set (match_operand:V4SI 0 "register_operand" "=x")
6559 (match_operand:V4SI 1 "register_operand" "x")
6560 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6561 (parallel [(const_int 0) (const_int 4)
6562 (const_int 1) (const_int 5)])))]
6564 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6565 [(set_attr "type" "sselog")
6566 (set_attr "prefix" "vex")
6567 (set_attr "mode" "TI")])
6569 (define_insn "sse2_punpckldq"
6570 [(set (match_operand:V4SI 0 "register_operand" "=x")
6573 (match_operand:V4SI 1 "register_operand" "0")
6574 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6575 (parallel [(const_int 0) (const_int 4)
6576 (const_int 1) (const_int 5)])))]
6578 "punpckldq\t{%2, %0|%0, %2}"
6579 [(set_attr "type" "sselog")
6580 (set_attr "prefix_data16" "1")
6581 (set_attr "mode" "TI")])
6583 (define_insn "*avx_pinsr<avxmodesuffixs>"
6584 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6585 (vec_merge:SSEMODE124
6586 (vec_duplicate:SSEMODE124
6587 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6588 (match_operand:SSEMODE124 1 "register_operand" "x")
6589 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6592 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6593 if (MEM_P (operands[2]))
6594 return "vpinsr<avxmodesuffixs>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6596 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6598 [(set_attr "type" "sselog")
6599 (set_attr "prefix" "vex")
6600 (set_attr "mode" "TI")])
6602 (define_insn "*sse4_1_pinsrb"
6603 [(set (match_operand:V16QI 0 "register_operand" "=x")
6605 (vec_duplicate:V16QI
6606 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6607 (match_operand:V16QI 1 "register_operand" "0")
6608 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6611 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6612 if (MEM_P (operands[2]))
6613 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6615 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6617 [(set_attr "type" "sselog")
6618 (set_attr "prefix_extra" "1")
6619 (set_attr "mode" "TI")])
6621 (define_insn "*sse2_pinsrw"
6622 [(set (match_operand:V8HI 0 "register_operand" "=x")
6625 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6626 (match_operand:V8HI 1 "register_operand" "0")
6627 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6630 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6631 if (MEM_P (operands[2]))
6632 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6634 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6636 [(set_attr "type" "sselog")
6637 (set_attr "prefix_data16" "1")
6638 (set_attr "mode" "TI")])
6640 ;; It must come before sse2_loadld since it is preferred.
6641 (define_insn "*sse4_1_pinsrd"
6642 [(set (match_operand:V4SI 0 "register_operand" "=x")
6645 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6646 (match_operand:V4SI 1 "register_operand" "0")
6647 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6650 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6651 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6653 [(set_attr "type" "sselog")
6654 (set_attr "prefix_extra" "1")
6655 (set_attr "mode" "TI")])
6657 (define_insn "*avx_pinsrq"
6658 [(set (match_operand:V2DI 0 "register_operand" "=x")
6661 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6662 (match_operand:V2DI 1 "register_operand" "x")
6663 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6664 "TARGET_AVX && TARGET_64BIT"
6666 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6667 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6669 [(set_attr "type" "sselog")
6670 (set_attr "prefix" "vex")
6671 (set_attr "mode" "TI")])
6673 (define_insn "*sse4_1_pinsrq"
6674 [(set (match_operand:V2DI 0 "register_operand" "=x")
6677 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6678 (match_operand:V2DI 1 "register_operand" "0")
6679 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6680 "TARGET_SSE4_1 && TARGET_64BIT"
6682 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6683 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6685 [(set_attr "type" "sselog")
6686 (set_attr "prefix_extra" "1")
6687 (set_attr "mode" "TI")])
6689 (define_insn "*sse4_1_pextrb"
6690 [(set (match_operand:SI 0 "register_operand" "=r")
6693 (match_operand:V16QI 1 "register_operand" "x")
6694 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6696 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6697 [(set_attr "type" "sselog")
6698 (set_attr "prefix_extra" "1")
6699 (set_attr "prefix" "maybe_vex")
6700 (set_attr "mode" "TI")])
6702 (define_insn "*sse4_1_pextrb_memory"
6703 [(set (match_operand:QI 0 "memory_operand" "=m")
6705 (match_operand:V16QI 1 "register_operand" "x")
6706 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6708 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6709 [(set_attr "type" "sselog")
6710 (set_attr "prefix_extra" "1")
6711 (set_attr "prefix" "maybe_vex")
6712 (set_attr "mode" "TI")])
6714 (define_insn "*sse2_pextrw"
6715 [(set (match_operand:SI 0 "register_operand" "=r")
6718 (match_operand:V8HI 1 "register_operand" "x")
6719 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6721 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6722 [(set_attr "type" "sselog")
6723 (set_attr "prefix_data16" "1")
6724 (set_attr "prefix" "maybe_vex")
6725 (set_attr "mode" "TI")])
6727 (define_insn "*sse4_1_pextrw_memory"
6728 [(set (match_operand:HI 0 "memory_operand" "=m")
6730 (match_operand:V8HI 1 "register_operand" "x")
6731 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6733 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6734 [(set_attr "type" "sselog")
6735 (set_attr "prefix_extra" "1")
6736 (set_attr "prefix" "maybe_vex")
6737 (set_attr "mode" "TI")])
6739 (define_insn "*sse4_1_pextrd"
6740 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6742 (match_operand:V4SI 1 "register_operand" "x")
6743 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6745 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6746 [(set_attr "type" "sselog")
6747 (set_attr "prefix_extra" "1")
6748 (set_attr "prefix" "maybe_vex")
6749 (set_attr "mode" "TI")])
6751 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6752 (define_insn "*sse4_1_pextrq"
6753 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6755 (match_operand:V2DI 1 "register_operand" "x")
6756 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6757 "TARGET_SSE4_1 && TARGET_64BIT"
6758 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6759 [(set_attr "type" "sselog")
6760 (set_attr "prefix_extra" "1")
6761 (set_attr "prefix" "maybe_vex")
6762 (set_attr "mode" "TI")])
6764 (define_expand "sse2_pshufd"
6765 [(match_operand:V4SI 0 "register_operand" "")
6766 (match_operand:V4SI 1 "nonimmediate_operand" "")
6767 (match_operand:SI 2 "const_int_operand" "")]
6770 int mask = INTVAL (operands[2]);
6771 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6772 GEN_INT ((mask >> 0) & 3),
6773 GEN_INT ((mask >> 2) & 3),
6774 GEN_INT ((mask >> 4) & 3),
6775 GEN_INT ((mask >> 6) & 3)));
6779 (define_insn "sse2_pshufd_1"
6780 [(set (match_operand:V4SI 0 "register_operand" "=x")
6782 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6783 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6784 (match_operand 3 "const_0_to_3_operand" "")
6785 (match_operand 4 "const_0_to_3_operand" "")
6786 (match_operand 5 "const_0_to_3_operand" "")])))]
6790 mask |= INTVAL (operands[2]) << 0;
6791 mask |= INTVAL (operands[3]) << 2;
6792 mask |= INTVAL (operands[4]) << 4;
6793 mask |= INTVAL (operands[5]) << 6;
6794 operands[2] = GEN_INT (mask);
6796 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6798 [(set_attr "type" "sselog1")
6799 (set_attr "prefix_data16" "1")
6800 (set_attr "prefix" "vex")
6801 (set_attr "mode" "TI")])
6803 (define_expand "sse2_pshuflw"
6804 [(match_operand:V8HI 0 "register_operand" "")
6805 (match_operand:V8HI 1 "nonimmediate_operand" "")
6806 (match_operand:SI 2 "const_int_operand" "")]
6809 int mask = INTVAL (operands[2]);
6810 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6811 GEN_INT ((mask >> 0) & 3),
6812 GEN_INT ((mask >> 2) & 3),
6813 GEN_INT ((mask >> 4) & 3),
6814 GEN_INT ((mask >> 6) & 3)));
6818 (define_insn "sse2_pshuflw_1"
6819 [(set (match_operand:V8HI 0 "register_operand" "=x")
6821 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6822 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6823 (match_operand 3 "const_0_to_3_operand" "")
6824 (match_operand 4 "const_0_to_3_operand" "")
6825 (match_operand 5 "const_0_to_3_operand" "")
6833 mask |= INTVAL (operands[2]) << 0;
6834 mask |= INTVAL (operands[3]) << 2;
6835 mask |= INTVAL (operands[4]) << 4;
6836 mask |= INTVAL (operands[5]) << 6;
6837 operands[2] = GEN_INT (mask);
6839 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6841 [(set_attr "type" "sselog")
6842 (set_attr "prefix_rep" "1")
6843 (set_attr "prefix" "maybe_vex")
6844 (set_attr "mode" "TI")])
6846 (define_expand "sse2_pshufhw"
6847 [(match_operand:V8HI 0 "register_operand" "")
6848 (match_operand:V8HI 1 "nonimmediate_operand" "")
6849 (match_operand:SI 2 "const_int_operand" "")]
6852 int mask = INTVAL (operands[2]);
6853 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6854 GEN_INT (((mask >> 0) & 3) + 4),
6855 GEN_INT (((mask >> 2) & 3) + 4),
6856 GEN_INT (((mask >> 4) & 3) + 4),
6857 GEN_INT (((mask >> 6) & 3) + 4)));
6861 (define_insn "sse2_pshufhw_1"
6862 [(set (match_operand:V8HI 0 "register_operand" "=x")
6864 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6865 (parallel [(const_int 0)
6869 (match_operand 2 "const_4_to_7_operand" "")
6870 (match_operand 3 "const_4_to_7_operand" "")
6871 (match_operand 4 "const_4_to_7_operand" "")
6872 (match_operand 5 "const_4_to_7_operand" "")])))]
6876 mask |= (INTVAL (operands[2]) - 4) << 0;
6877 mask |= (INTVAL (operands[3]) - 4) << 2;
6878 mask |= (INTVAL (operands[4]) - 4) << 4;
6879 mask |= (INTVAL (operands[5]) - 4) << 6;
6880 operands[2] = GEN_INT (mask);
6882 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6884 [(set_attr "type" "sselog")
6885 (set_attr "prefix_rep" "1")
6886 (set_attr "prefix" "maybe_vex")
6887 (set_attr "mode" "TI")])
6889 (define_expand "sse2_loadd"
6890 [(set (match_operand:V4SI 0 "register_operand" "")
6893 (match_operand:SI 1 "nonimmediate_operand" ""))
6897 "operands[2] = CONST0_RTX (V4SImode);")
6899 (define_insn "*avx_loadld"
6900 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6903 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6904 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6908 vmovd\t{%2, %0|%0, %2}
6909 vmovd\t{%2, %0|%0, %2}
6910 vmovss\t{%2, %1, %0|%0, %1, %2}"
6911 [(set_attr "type" "ssemov")
6912 (set_attr "prefix" "vex")
6913 (set_attr "mode" "TI,TI,V4SF")])
6915 (define_insn "sse2_loadld"
6916 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6919 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6920 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6924 movd\t{%2, %0|%0, %2}
6925 movd\t{%2, %0|%0, %2}
6926 movss\t{%2, %0|%0, %2}
6927 movss\t{%2, %0|%0, %2}"
6928 [(set_attr "type" "ssemov")
6929 (set_attr "mode" "TI,TI,V4SF,SF")])
6931 (define_insn_and_split "sse2_stored"
6932 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6934 (match_operand:V4SI 1 "register_operand" "x,Yi")
6935 (parallel [(const_int 0)])))]
6938 "&& reload_completed
6939 && (TARGET_INTER_UNIT_MOVES
6940 || MEM_P (operands [0])
6941 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6942 [(set (match_dup 0) (match_dup 1))]
6944 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
6947 (define_insn_and_split "*vec_ext_v4si_mem"
6948 [(set (match_operand:SI 0 "register_operand" "=r")
6950 (match_operand:V4SI 1 "memory_operand" "o")
6951 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6957 int i = INTVAL (operands[2]);
6959 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6963 (define_expand "sse_storeq"
6964 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6966 (match_operand:V2DI 1 "register_operand" "")
6967 (parallel [(const_int 0)])))]
6971 (define_insn "*sse2_storeq_rex64"
6972 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6974 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6975 (parallel [(const_int 0)])))]
6976 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6980 %vmov{q}\t{%1, %0|%0, %1}"
6981 [(set_attr "type" "*,*,imov")
6982 (set_attr "prefix" "*,*,maybe_vex")
6983 (set_attr "mode" "*,*,DI")])
6985 (define_insn "*sse2_storeq"
6986 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
6988 (match_operand:V2DI 1 "register_operand" "x")
6989 (parallel [(const_int 0)])))]
6994 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6996 (match_operand:V2DI 1 "register_operand" "")
6997 (parallel [(const_int 0)])))]
7000 && (TARGET_INTER_UNIT_MOVES
7001 || MEM_P (operands [0])
7002 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7003 [(set (match_dup 0) (match_dup 1))]
7005 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7008 (define_insn "*vec_extractv2di_1_rex64_avx"
7009 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7011 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7012 (parallel [(const_int 1)])))]
7015 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7017 vmovhps\t{%1, %0|%0, %1}
7018 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7019 vmovq\t{%H1, %0|%0, %H1}
7020 vmov{q}\t{%H1, %0|%0, %H1}"
7021 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7022 (set_attr "memory" "*,none,*,*")
7023 (set_attr "prefix" "vex")
7024 (set_attr "mode" "V2SF,TI,TI,DI")])
7026 (define_insn "*vec_extractv2di_1_rex64"
7027 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7029 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7030 (parallel [(const_int 1)])))]
7031 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7033 movhps\t{%1, %0|%0, %1}
7034 psrldq\t{$8, %0|%0, 8}
7035 movq\t{%H1, %0|%0, %H1}
7036 mov{q}\t{%H1, %0|%0, %H1}"
7037 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7038 (set_attr "memory" "*,none,*,*")
7039 (set_attr "mode" "V2SF,TI,TI,DI")])
7041 (define_insn "*vec_extractv2di_1_avx"
7042 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7044 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7045 (parallel [(const_int 1)])))]
7048 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7050 vmovhps\t{%1, %0|%0, %1}
7051 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7052 vmovq\t{%H1, %0|%0, %H1}"
7053 [(set_attr "type" "ssemov,sseishft,ssemov")
7054 (set_attr "memory" "*,none,*")
7055 (set_attr "prefix" "vex")
7056 (set_attr "mode" "V2SF,TI,TI")])
7058 (define_insn "*vec_extractv2di_1_sse2"
7059 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7061 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7062 (parallel [(const_int 1)])))]
7064 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7066 movhps\t{%1, %0|%0, %1}
7067 psrldq\t{$8, %0|%0, 8}
7068 movq\t{%H1, %0|%0, %H1}"
7069 [(set_attr "type" "ssemov,sseishft,ssemov")
7070 (set_attr "memory" "*,none,*")
7071 (set_attr "mode" "V2SF,TI,TI")])
7073 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7074 (define_insn "*vec_extractv2di_1_sse"
7075 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7077 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7078 (parallel [(const_int 1)])))]
7079 "!TARGET_SSE2 && TARGET_SSE
7080 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7082 movhps\t{%1, %0|%0, %1}
7083 movhlps\t{%1, %0|%0, %1}
7084 movlps\t{%H1, %0|%0, %H1}"
7085 [(set_attr "type" "ssemov")
7086 (set_attr "mode" "V2SF,V4SF,V2SF")])
7088 (define_insn "*vec_dupv4si"
7089 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7091 (match_operand:SI 1 "register_operand" " Y2,0")))]
7094 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7095 shufps\t{$0, %0, %0|%0, %0, 0}"
7096 [(set_attr "type" "sselog1")
7097 (set_attr "prefix" "maybe_vex,orig")
7098 (set_attr "mode" "TI,V4SF")])
7100 (define_insn "*vec_dupv2di_avx"
7101 [(set (match_operand:V2DI 0 "register_operand" "=x")
7103 (match_operand:DI 1 "register_operand" "x")))]
7105 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7106 [(set_attr "type" "sselog1")
7107 (set_attr "prefix" "vex")
7108 (set_attr "mode" "TI")])
7110 (define_insn "*vec_dupv2di"
7111 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7113 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7118 [(set_attr "type" "sselog1,ssemov")
7119 (set_attr "mode" "TI,V4SF")])
7121 (define_insn "*vec_concatv2si_avx"
7122 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7124 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7125 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7128 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7129 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7130 vmovd\t{%1, %0|%0, %1}
7131 punpckldq\t{%2, %0|%0, %2}
7132 movd\t{%1, %0|%0, %1}"
7133 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7134 (set (attr "prefix")
7135 (if_then_else (eq_attr "alternative" "3,4")
7136 (const_string "orig")
7137 (const_string "vex")))
7138 (set_attr "mode" "TI,TI,TI,DI,DI")])
7140 (define_insn "*vec_concatv2si_sse4_1"
7141 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7143 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7144 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7147 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7148 punpckldq\t{%2, %0|%0, %2}
7149 movd\t{%1, %0|%0, %1}
7150 punpckldq\t{%2, %0|%0, %2}
7151 movd\t{%1, %0|%0, %1}"
7152 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7153 (set_attr "prefix_extra" "1,*,*,*,*")
7154 (set_attr "mode" "TI,TI,TI,DI,DI")])
7156 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7157 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7158 ;; alternatives pretty much forces the MMX alternative to be chosen.
7159 (define_insn "*vec_concatv2si_sse2"
7160 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7162 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7163 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7166 punpckldq\t{%2, %0|%0, %2}
7167 movd\t{%1, %0|%0, %1}
7168 punpckldq\t{%2, %0|%0, %2}
7169 movd\t{%1, %0|%0, %1}"
7170 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7171 (set_attr "mode" "TI,TI,DI,DI")])
7173 (define_insn "*vec_concatv2si_sse"
7174 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7176 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7177 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7180 unpcklps\t{%2, %0|%0, %2}
7181 movss\t{%1, %0|%0, %1}
7182 punpckldq\t{%2, %0|%0, %2}
7183 movd\t{%1, %0|%0, %1}"
7184 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7185 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7187 (define_insn "*vec_concatv4si_1_avx"
7188 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7190 (match_operand:V2SI 1 "register_operand" " x,x")
7191 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7194 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7195 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7196 [(set_attr "type" "sselog,ssemov")
7197 (set_attr "prefix" "vex")
7198 (set_attr "mode" "TI,V2SF")])
7200 (define_insn "*vec_concatv4si_1"
7201 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7203 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7204 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7207 punpcklqdq\t{%2, %0|%0, %2}
7208 movlhps\t{%2, %0|%0, %2}
7209 movhps\t{%2, %0|%0, %2}"
7210 [(set_attr "type" "sselog,ssemov,ssemov")
7211 (set_attr "mode" "TI,V4SF,V2SF")])
7213 (define_insn "*vec_concatv2di_avx"
7214 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7216 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7217 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7218 "!TARGET_64BIT && TARGET_AVX"
7220 vmovq\t{%1, %0|%0, %1}
7221 movq2dq\t{%1, %0|%0, %1}
7222 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7223 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7224 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7225 (set (attr "prefix")
7226 (if_then_else (eq_attr "alternative" "1")
7227 (const_string "orig")
7228 (const_string "vex")))
7229 (set_attr "mode" "TI,TI,TI,V2SF")])
7231 (define_insn "vec_concatv2di"
7232 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7234 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7235 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7236 "!TARGET_64BIT && TARGET_SSE"
7238 movq\t{%1, %0|%0, %1}
7239 movq2dq\t{%1, %0|%0, %1}
7240 punpcklqdq\t{%2, %0|%0, %2}
7241 movlhps\t{%2, %0|%0, %2}
7242 movhps\t{%2, %0|%0, %2}"
7243 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7244 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7246 (define_insn "*vec_concatv2di_rex64_avx"
7247 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7249 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7250 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7251 "TARGET_64BIT && TARGET_AVX"
7253 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7254 vmovq\t{%1, %0|%0, %1}
7255 vmovq\t{%1, %0|%0, %1}
7256 movq2dq\t{%1, %0|%0, %1}
7257 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7258 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7259 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7260 (set (attr "prefix")
7261 (if_then_else (eq_attr "alternative" "3")
7262 (const_string "orig")
7263 (const_string "vex")))
7264 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7266 (define_insn "*vec_concatv2di_rex64_sse4_1"
7267 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7269 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7270 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7271 "TARGET_64BIT && TARGET_SSE4_1"
7273 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7274 movq\t{%1, %0|%0, %1}
7275 movq\t{%1, %0|%0, %1}
7276 movq2dq\t{%1, %0|%0, %1}
7277 punpcklqdq\t{%2, %0|%0, %2}
7278 movlhps\t{%2, %0|%0, %2}
7279 movhps\t{%2, %0|%0, %2}"
7280 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7281 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7282 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7284 (define_insn "*vec_concatv2di_rex64_sse"
7285 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7287 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7288 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7289 "TARGET_64BIT && TARGET_SSE"
7291 movq\t{%1, %0|%0, %1}
7292 movq\t{%1, %0|%0, %1}
7293 movq2dq\t{%1, %0|%0, %1}
7294 punpcklqdq\t{%2, %0|%0, %2}
7295 movlhps\t{%2, %0|%0, %2}
7296 movhps\t{%2, %0|%0, %2}"
7297 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7298 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7300 (define_expand "vec_unpacku_hi_v16qi"
7301 [(match_operand:V8HI 0 "register_operand" "")
7302 (match_operand:V16QI 1 "register_operand" "")]
7306 ix86_expand_sse4_unpack (operands, true, true);
7307 else if (TARGET_SSE5)
7308 ix86_expand_sse5_unpack (operands, true, true);
7310 ix86_expand_sse_unpack (operands, true, true);
7314 (define_expand "vec_unpacks_hi_v16qi"
7315 [(match_operand:V8HI 0 "register_operand" "")
7316 (match_operand:V16QI 1 "register_operand" "")]
7320 ix86_expand_sse4_unpack (operands, false, true);
7321 else if (TARGET_SSE5)
7322 ix86_expand_sse5_unpack (operands, false, true);
7324 ix86_expand_sse_unpack (operands, false, true);
7328 (define_expand "vec_unpacku_lo_v16qi"
7329 [(match_operand:V8HI 0 "register_operand" "")
7330 (match_operand:V16QI 1 "register_operand" "")]
7334 ix86_expand_sse4_unpack (operands, true, false);
7335 else if (TARGET_SSE5)
7336 ix86_expand_sse5_unpack (operands, true, false);
7338 ix86_expand_sse_unpack (operands, true, false);
7342 (define_expand "vec_unpacks_lo_v16qi"
7343 [(match_operand:V8HI 0 "register_operand" "")
7344 (match_operand:V16QI 1 "register_operand" "")]
7348 ix86_expand_sse4_unpack (operands, false, false);
7349 else if (TARGET_SSE5)
7350 ix86_expand_sse5_unpack (operands, false, false);
7352 ix86_expand_sse_unpack (operands, false, false);
7356 (define_expand "vec_unpacku_hi_v8hi"
7357 [(match_operand:V4SI 0 "register_operand" "")
7358 (match_operand:V8HI 1 "register_operand" "")]
7362 ix86_expand_sse4_unpack (operands, true, true);
7363 else if (TARGET_SSE5)
7364 ix86_expand_sse5_unpack (operands, true, true);
7366 ix86_expand_sse_unpack (operands, true, true);
7370 (define_expand "vec_unpacks_hi_v8hi"
7371 [(match_operand:V4SI 0 "register_operand" "")
7372 (match_operand:V8HI 1 "register_operand" "")]
7376 ix86_expand_sse4_unpack (operands, false, true);
7377 else if (TARGET_SSE5)
7378 ix86_expand_sse5_unpack (operands, false, true);
7380 ix86_expand_sse_unpack (operands, false, true);
7384 (define_expand "vec_unpacku_lo_v8hi"
7385 [(match_operand:V4SI 0 "register_operand" "")
7386 (match_operand:V8HI 1 "register_operand" "")]
7390 ix86_expand_sse4_unpack (operands, true, false);
7391 else if (TARGET_SSE5)
7392 ix86_expand_sse5_unpack (operands, true, false);
7394 ix86_expand_sse_unpack (operands, true, false);
7398 (define_expand "vec_unpacks_lo_v8hi"
7399 [(match_operand:V4SI 0 "register_operand" "")
7400 (match_operand:V8HI 1 "register_operand" "")]
7404 ix86_expand_sse4_unpack (operands, false, false);
7405 else if (TARGET_SSE5)
7406 ix86_expand_sse5_unpack (operands, false, false);
7408 ix86_expand_sse_unpack (operands, false, false);
7412 (define_expand "vec_unpacku_hi_v4si"
7413 [(match_operand:V2DI 0 "register_operand" "")
7414 (match_operand:V4SI 1 "register_operand" "")]
7418 ix86_expand_sse4_unpack (operands, true, true);
7419 else if (TARGET_SSE5)
7420 ix86_expand_sse5_unpack (operands, true, true);
7422 ix86_expand_sse_unpack (operands, true, true);
7426 (define_expand "vec_unpacks_hi_v4si"
7427 [(match_operand:V2DI 0 "register_operand" "")
7428 (match_operand:V4SI 1 "register_operand" "")]
7432 ix86_expand_sse4_unpack (operands, false, true);
7433 else if (TARGET_SSE5)
7434 ix86_expand_sse5_unpack (operands, false, true);
7436 ix86_expand_sse_unpack (operands, false, true);
7440 (define_expand "vec_unpacku_lo_v4si"
7441 [(match_operand:V2DI 0 "register_operand" "")
7442 (match_operand:V4SI 1 "register_operand" "")]
7446 ix86_expand_sse4_unpack (operands, true, false);
7447 else if (TARGET_SSE5)
7448 ix86_expand_sse5_unpack (operands, true, false);
7450 ix86_expand_sse_unpack (operands, true, false);
7454 (define_expand "vec_unpacks_lo_v4si"
7455 [(match_operand:V2DI 0 "register_operand" "")
7456 (match_operand:V4SI 1 "register_operand" "")]
7460 ix86_expand_sse4_unpack (operands, false, false);
7461 else if (TARGET_SSE5)
7462 ix86_expand_sse5_unpack (operands, false, false);
7464 ix86_expand_sse_unpack (operands, false, false);
7468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7474 (define_expand "sse2_uavgv16qi3"
7475 [(set (match_operand:V16QI 0 "register_operand" "")
7481 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7483 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7484 (const_vector:V16QI [(const_int 1) (const_int 1)
7485 (const_int 1) (const_int 1)
7486 (const_int 1) (const_int 1)
7487 (const_int 1) (const_int 1)
7488 (const_int 1) (const_int 1)
7489 (const_int 1) (const_int 1)
7490 (const_int 1) (const_int 1)
7491 (const_int 1) (const_int 1)]))
7494 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7496 (define_insn "*avx_uavgv16qi3"
7497 [(set (match_operand:V16QI 0 "register_operand" "=x")
7503 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7505 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7506 (const_vector:V16QI [(const_int 1) (const_int 1)
7507 (const_int 1) (const_int 1)
7508 (const_int 1) (const_int 1)
7509 (const_int 1) (const_int 1)
7510 (const_int 1) (const_int 1)
7511 (const_int 1) (const_int 1)
7512 (const_int 1) (const_int 1)
7513 (const_int 1) (const_int 1)]))
7515 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7516 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7517 [(set_attr "type" "sseiadd")
7518 (set_attr "prefix" "vex")
7519 (set_attr "mode" "TI")])
7521 (define_insn "*sse2_uavgv16qi3"
7522 [(set (match_operand:V16QI 0 "register_operand" "=x")
7528 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7530 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7531 (const_vector:V16QI [(const_int 1) (const_int 1)
7532 (const_int 1) (const_int 1)
7533 (const_int 1) (const_int 1)
7534 (const_int 1) (const_int 1)
7535 (const_int 1) (const_int 1)
7536 (const_int 1) (const_int 1)
7537 (const_int 1) (const_int 1)
7538 (const_int 1) (const_int 1)]))
7540 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7541 "pavgb\t{%2, %0|%0, %2}"
7542 [(set_attr "type" "sseiadd")
7543 (set_attr "prefix_data16" "1")
7544 (set_attr "mode" "TI")])
7546 (define_expand "sse2_uavgv8hi3"
7547 [(set (match_operand:V8HI 0 "register_operand" "")
7553 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7555 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7556 (const_vector:V8HI [(const_int 1) (const_int 1)
7557 (const_int 1) (const_int 1)
7558 (const_int 1) (const_int 1)
7559 (const_int 1) (const_int 1)]))
7562 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7564 (define_insn "*avx_uavgv8hi3"
7565 [(set (match_operand:V8HI 0 "register_operand" "=x")
7571 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7573 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7574 (const_vector:V8HI [(const_int 1) (const_int 1)
7575 (const_int 1) (const_int 1)
7576 (const_int 1) (const_int 1)
7577 (const_int 1) (const_int 1)]))
7579 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7580 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7581 [(set_attr "type" "sseiadd")
7582 (set_attr "prefix" "vex")
7583 (set_attr "mode" "TI")])
7585 (define_insn "*sse2_uavgv8hi3"
7586 [(set (match_operand:V8HI 0 "register_operand" "=x")
7592 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7594 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7595 (const_vector:V8HI [(const_int 1) (const_int 1)
7596 (const_int 1) (const_int 1)
7597 (const_int 1) (const_int 1)
7598 (const_int 1) (const_int 1)]))
7600 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7601 "pavgw\t{%2, %0|%0, %2}"
7602 [(set_attr "type" "sseiadd")
7603 (set_attr "prefix_data16" "1")
7604 (set_attr "mode" "TI")])
7606 ;; The correct representation for this is absolutely enormous, and
7607 ;; surely not generally useful.
7608 (define_insn "*avx_psadbw"
7609 [(set (match_operand:V2DI 0 "register_operand" "=x")
7610 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7611 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7614 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7615 [(set_attr "type" "sseiadd")
7616 (set_attr "prefix" "vex")
7617 (set_attr "mode" "TI")])
7619 (define_insn "sse2_psadbw"
7620 [(set (match_operand:V2DI 0 "register_operand" "=x")
7621 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7622 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7625 "psadbw\t{%2, %0|%0, %2}"
7626 [(set_attr "type" "sseiadd")
7627 (set_attr "prefix_data16" "1")
7628 (set_attr "mode" "TI")])
7630 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7631 [(set (match_operand:SI 0 "register_operand" "=r")
7633 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7635 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7636 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7637 [(set_attr "type" "ssecvt")
7638 (set_attr "prefix" "vex")
7639 (set_attr "mode" "<MODE>")])
7641 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7642 [(set (match_operand:SI 0 "register_operand" "=r")
7644 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7646 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7647 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7648 [(set_attr "type" "ssecvt")
7649 (set_attr "prefix" "maybe_vex")
7650 (set_attr "mode" "<MODE>")])
7652 (define_insn "sse2_pmovmskb"
7653 [(set (match_operand:SI 0 "register_operand" "=r")
7654 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7657 "%vpmovmskb\t{%1, %0|%0, %1}"
7658 [(set_attr "type" "ssecvt")
7659 (set_attr "prefix_data16" "1")
7660 (set_attr "prefix" "maybe_vex")
7661 (set_attr "mode" "SI")])
7663 (define_expand "sse2_maskmovdqu"
7664 [(set (match_operand:V16QI 0 "memory_operand" "")
7665 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7666 (match_operand:V16QI 2 "register_operand" "")
7672 (define_insn "*sse2_maskmovdqu"
7673 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7674 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7675 (match_operand:V16QI 2 "register_operand" "x")
7676 (mem:V16QI (match_dup 0))]
7678 "TARGET_SSE2 && !TARGET_64BIT"
7679 ;; @@@ check ordering of operands in intel/nonintel syntax
7680 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7681 [(set_attr "type" "ssecvt")
7682 (set_attr "prefix_data16" "1")
7683 (set_attr "prefix" "maybe_vex")
7684 (set_attr "mode" "TI")])
7686 (define_insn "*sse2_maskmovdqu_rex64"
7687 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7688 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7689 (match_operand:V16QI 2 "register_operand" "x")
7690 (mem:V16QI (match_dup 0))]
7692 "TARGET_SSE2 && TARGET_64BIT"
7693 ;; @@@ check ordering of operands in intel/nonintel syntax
7694 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7695 [(set_attr "type" "ssecvt")
7696 (set_attr "prefix_data16" "1")
7697 (set_attr "prefix" "maybe_vex")
7698 (set_attr "mode" "TI")])
7700 (define_insn "sse_ldmxcsr"
7701 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7705 [(set_attr "type" "sse")
7706 (set_attr "prefix" "maybe_vex")
7707 (set_attr "memory" "load")])
7709 (define_insn "sse_stmxcsr"
7710 [(set (match_operand:SI 0 "memory_operand" "=m")
7711 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7714 [(set_attr "type" "sse")
7715 (set_attr "prefix" "maybe_vex")
7716 (set_attr "memory" "store")])
7718 (define_expand "sse_sfence"
7720 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7721 "TARGET_SSE || TARGET_3DNOW_A"
7723 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7724 MEM_VOLATILE_P (operands[0]) = 1;
7727 (define_insn "*sse_sfence"
7728 [(set (match_operand:BLK 0 "" "")
7729 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7730 "TARGET_SSE || TARGET_3DNOW_A"
7732 [(set_attr "type" "sse")
7733 (set_attr "memory" "unknown")])
7735 (define_insn "sse2_clflush"
7736 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7740 [(set_attr "type" "sse")
7741 (set_attr "memory" "unknown")])
7743 (define_expand "sse2_mfence"
7745 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7748 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7749 MEM_VOLATILE_P (operands[0]) = 1;
7752 (define_insn "*sse2_mfence"
7753 [(set (match_operand:BLK 0 "" "")
7754 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7755 "TARGET_64BIT || TARGET_SSE2"
7757 [(set_attr "type" "sse")
7758 (set_attr "memory" "unknown")])
7760 (define_expand "sse2_lfence"
7762 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7765 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7766 MEM_VOLATILE_P (operands[0]) = 1;
7769 (define_insn "*sse2_lfence"
7770 [(set (match_operand:BLK 0 "" "")
7771 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7774 [(set_attr "type" "sse")
7775 (set_attr "memory" "unknown")])
7777 (define_insn "sse3_mwait"
7778 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7779 (match_operand:SI 1 "register_operand" "c")]
7782 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7783 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7784 ;; we only need to set up 32bit registers.
7786 [(set_attr "length" "3")])
7788 (define_insn "sse3_monitor"
7789 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7790 (match_operand:SI 1 "register_operand" "c")
7791 (match_operand:SI 2 "register_operand" "d")]
7793 "TARGET_SSE3 && !TARGET_64BIT"
7794 "monitor\t%0, %1, %2"
7795 [(set_attr "length" "3")])
7797 (define_insn "sse3_monitor64"
7798 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7799 (match_operand:SI 1 "register_operand" "c")
7800 (match_operand:SI 2 "register_operand" "d")]
7802 "TARGET_SSE3 && TARGET_64BIT"
7803 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7804 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7805 ;; zero extended to 64bit, we only need to set up 32bit registers.
7807 [(set_attr "length" "3")])
7809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7811 ;; SSSE3 instructions
7813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7815 (define_insn "*avx_phaddwv8hi3"
7816 [(set (match_operand:V8HI 0 "register_operand" "=x")
7822 (match_operand:V8HI 1 "register_operand" "x")
7823 (parallel [(const_int 0)]))
7824 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7826 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7827 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7830 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7831 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7833 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7834 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7839 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7840 (parallel [(const_int 0)]))
7841 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7843 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7844 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7847 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7848 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7850 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7851 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7853 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7854 [(set_attr "type" "sseiadd")
7855 (set_attr "prefix" "vex")
7856 (set_attr "mode" "TI")])
7858 (define_insn "ssse3_phaddwv8hi3"
7859 [(set (match_operand:V8HI 0 "register_operand" "=x")
7865 (match_operand:V8HI 1 "register_operand" "0")
7866 (parallel [(const_int 0)]))
7867 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7869 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7870 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7873 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7874 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7876 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7877 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7882 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7883 (parallel [(const_int 0)]))
7884 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7886 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7887 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7890 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7891 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7893 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7894 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7896 "phaddw\t{%2, %0|%0, %2}"
7897 [(set_attr "type" "sseiadd")
7898 (set_attr "prefix_data16" "1")
7899 (set_attr "prefix_extra" "1")
7900 (set_attr "mode" "TI")])
7902 (define_insn "ssse3_phaddwv4hi3"
7903 [(set (match_operand:V4HI 0 "register_operand" "=y")
7908 (match_operand:V4HI 1 "register_operand" "0")
7909 (parallel [(const_int 0)]))
7910 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7912 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7913 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7917 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7918 (parallel [(const_int 0)]))
7919 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7921 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7922 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7924 "phaddw\t{%2, %0|%0, %2}"
7925 [(set_attr "type" "sseiadd")
7926 (set_attr "prefix_extra" "1")
7927 (set_attr "mode" "DI")])
7929 (define_insn "*avx_phadddv4si3"
7930 [(set (match_operand:V4SI 0 "register_operand" "=x")
7935 (match_operand:V4SI 1 "register_operand" "x")
7936 (parallel [(const_int 0)]))
7937 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7939 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7940 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7944 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7945 (parallel [(const_int 0)]))
7946 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7948 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7949 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7951 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7952 [(set_attr "type" "sseiadd")
7953 (set_attr "prefix" "vex")
7954 (set_attr "mode" "TI")])
7956 (define_insn "ssse3_phadddv4si3"
7957 [(set (match_operand:V4SI 0 "register_operand" "=x")
7962 (match_operand:V4SI 1 "register_operand" "0")
7963 (parallel [(const_int 0)]))
7964 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7966 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7967 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7971 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7972 (parallel [(const_int 0)]))
7973 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7975 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7976 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7978 "phaddd\t{%2, %0|%0, %2}"
7979 [(set_attr "type" "sseiadd")
7980 (set_attr "prefix_data16" "1")
7981 (set_attr "prefix_extra" "1")
7982 (set_attr "mode" "TI")])
7984 (define_insn "ssse3_phadddv2si3"
7985 [(set (match_operand:V2SI 0 "register_operand" "=y")
7989 (match_operand:V2SI 1 "register_operand" "0")
7990 (parallel [(const_int 0)]))
7991 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7994 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7995 (parallel [(const_int 0)]))
7996 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7998 "phaddd\t{%2, %0|%0, %2}"
7999 [(set_attr "type" "sseiadd")
8000 (set_attr "prefix_extra" "1")
8001 (set_attr "mode" "DI")])
8003 (define_insn "*avx_phaddswv8hi3"
8004 [(set (match_operand:V8HI 0 "register_operand" "=x")
8010 (match_operand:V8HI 1 "register_operand" "x")
8011 (parallel [(const_int 0)]))
8012 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8014 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8015 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8018 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8019 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8021 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8022 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8027 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8028 (parallel [(const_int 0)]))
8029 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8031 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8032 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8035 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8036 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8038 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8039 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8041 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8042 [(set_attr "type" "sseiadd")
8043 (set_attr "prefix" "vex")
8044 (set_attr "mode" "TI")])
8046 (define_insn "ssse3_phaddswv8hi3"
8047 [(set (match_operand:V8HI 0 "register_operand" "=x")
8053 (match_operand:V8HI 1 "register_operand" "0")
8054 (parallel [(const_int 0)]))
8055 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8057 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8058 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8061 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8062 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8064 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8065 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8070 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8071 (parallel [(const_int 0)]))
8072 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8074 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8075 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8078 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8079 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8082 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8084 "phaddsw\t{%2, %0|%0, %2}"
8085 [(set_attr "type" "sseiadd")
8086 (set_attr "prefix_data16" "1")
8087 (set_attr "prefix_extra" "1")
8088 (set_attr "mode" "TI")])
8090 (define_insn "ssse3_phaddswv4hi3"
8091 [(set (match_operand:V4HI 0 "register_operand" "=y")
8096 (match_operand:V4HI 1 "register_operand" "0")
8097 (parallel [(const_int 0)]))
8098 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8100 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8101 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8105 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8106 (parallel [(const_int 0)]))
8107 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8109 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8110 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8112 "phaddsw\t{%2, %0|%0, %2}"
8113 [(set_attr "type" "sseiadd")
8114 (set_attr "prefix_extra" "1")
8115 (set_attr "mode" "DI")])
8117 (define_insn "*avx_phsubwv8hi3"
8118 [(set (match_operand:V8HI 0 "register_operand" "=x")
8124 (match_operand:V8HI 1 "register_operand" "x")
8125 (parallel [(const_int 0)]))
8126 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8128 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8129 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8132 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8133 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8135 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8136 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8141 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8142 (parallel [(const_int 0)]))
8143 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8145 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8146 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8149 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8150 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8152 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8153 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8155 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8156 [(set_attr "type" "sseiadd")
8157 (set_attr "prefix" "vex")
8158 (set_attr "mode" "TI")])
8160 (define_insn "ssse3_phsubwv8hi3"
8161 [(set (match_operand:V8HI 0 "register_operand" "=x")
8167 (match_operand:V8HI 1 "register_operand" "0")
8168 (parallel [(const_int 0)]))
8169 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8171 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8172 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8175 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8176 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8178 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8179 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8184 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8185 (parallel [(const_int 0)]))
8186 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8188 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8189 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8192 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8193 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8195 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8196 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8198 "phsubw\t{%2, %0|%0, %2}"
8199 [(set_attr "type" "sseiadd")
8200 (set_attr "prefix_data16" "1")
8201 (set_attr "prefix_extra" "1")
8202 (set_attr "mode" "TI")])
8204 (define_insn "ssse3_phsubwv4hi3"
8205 [(set (match_operand:V4HI 0 "register_operand" "=y")
8210 (match_operand:V4HI 1 "register_operand" "0")
8211 (parallel [(const_int 0)]))
8212 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8214 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8215 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8219 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8220 (parallel [(const_int 0)]))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8224 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8226 "phsubw\t{%2, %0|%0, %2}"
8227 [(set_attr "type" "sseiadd")
8228 (set_attr "prefix_extra" "1")
8229 (set_attr "mode" "DI")])
8231 (define_insn "*avx_phsubdv4si3"
8232 [(set (match_operand:V4SI 0 "register_operand" "=x")
8237 (match_operand:V4SI 1 "register_operand" "x")
8238 (parallel [(const_int 0)]))
8239 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8241 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8242 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8246 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8247 (parallel [(const_int 0)]))
8248 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8250 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8251 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8253 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8254 [(set_attr "type" "sseiadd")
8255 (set_attr "prefix" "vex")
8256 (set_attr "mode" "TI")])
8258 (define_insn "ssse3_phsubdv4si3"
8259 [(set (match_operand:V4SI 0 "register_operand" "=x")
8264 (match_operand:V4SI 1 "register_operand" "0")
8265 (parallel [(const_int 0)]))
8266 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8268 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8269 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8273 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8274 (parallel [(const_int 0)]))
8275 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8277 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8278 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8280 "phsubd\t{%2, %0|%0, %2}"
8281 [(set_attr "type" "sseiadd")
8282 (set_attr "prefix_data16" "1")
8283 (set_attr "prefix_extra" "1")
8284 (set_attr "mode" "TI")])
8286 (define_insn "ssse3_phsubdv2si3"
8287 [(set (match_operand:V2SI 0 "register_operand" "=y")
8291 (match_operand:V2SI 1 "register_operand" "0")
8292 (parallel [(const_int 0)]))
8293 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8296 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8297 (parallel [(const_int 0)]))
8298 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8300 "phsubd\t{%2, %0|%0, %2}"
8301 [(set_attr "type" "sseiadd")
8302 (set_attr "prefix_extra" "1")
8303 (set_attr "mode" "DI")])
8305 (define_insn "*avx_phsubswv8hi3"
8306 [(set (match_operand:V8HI 0 "register_operand" "=x")
8312 (match_operand:V8HI 1 "register_operand" "x")
8313 (parallel [(const_int 0)]))
8314 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8316 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8317 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8320 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8321 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8323 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8324 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8329 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8330 (parallel [(const_int 0)]))
8331 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8333 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8334 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8337 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8338 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8340 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8341 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8343 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8344 [(set_attr "type" "sseiadd")
8345 (set_attr "prefix" "vex")
8346 (set_attr "mode" "TI")])
8348 (define_insn "ssse3_phsubswv8hi3"
8349 [(set (match_operand:V8HI 0 "register_operand" "=x")
8355 (match_operand:V8HI 1 "register_operand" "0")
8356 (parallel [(const_int 0)]))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8359 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8360 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8364 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8366 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8367 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8372 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8373 (parallel [(const_int 0)]))
8374 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8376 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8377 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8381 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8383 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8384 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8386 "phsubsw\t{%2, %0|%0, %2}"
8387 [(set_attr "type" "sseiadd")
8388 (set_attr "prefix_data16" "1")
8389 (set_attr "prefix_extra" "1")
8390 (set_attr "mode" "TI")])
8392 (define_insn "ssse3_phsubswv4hi3"
8393 [(set (match_operand:V4HI 0 "register_operand" "=y")
8398 (match_operand:V4HI 1 "register_operand" "0")
8399 (parallel [(const_int 0)]))
8400 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8403 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8407 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8408 (parallel [(const_int 0)]))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8412 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8414 "phsubsw\t{%2, %0|%0, %2}"
8415 [(set_attr "type" "sseiadd")
8416 (set_attr "prefix_extra" "1")
8417 (set_attr "mode" "DI")])
8419 (define_insn "*avx_pmaddubsw128"
8420 [(set (match_operand:V8HI 0 "register_operand" "=x")
8425 (match_operand:V16QI 1 "register_operand" "x")
8426 (parallel [(const_int 0)
8436 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8437 (parallel [(const_int 0)
8447 (vec_select:V16QI (match_dup 1)
8448 (parallel [(const_int 1)
8457 (vec_select:V16QI (match_dup 2)
8458 (parallel [(const_int 1)
8465 (const_int 15)]))))))]
8467 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8468 [(set_attr "type" "sseiadd")
8469 (set_attr "prefix" "vex")
8470 (set_attr "mode" "TI")])
8472 (define_insn "ssse3_pmaddubsw128"
8473 [(set (match_operand:V8HI 0 "register_operand" "=x")
8478 (match_operand:V16QI 1 "register_operand" "0")
8479 (parallel [(const_int 0)
8489 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8490 (parallel [(const_int 0)
8500 (vec_select:V16QI (match_dup 1)
8501 (parallel [(const_int 1)
8510 (vec_select:V16QI (match_dup 2)
8511 (parallel [(const_int 1)
8518 (const_int 15)]))))))]
8520 "pmaddubsw\t{%2, %0|%0, %2}"
8521 [(set_attr "type" "sseiadd")
8522 (set_attr "prefix_data16" "1")
8523 (set_attr "prefix_extra" "1")
8524 (set_attr "mode" "TI")])
8526 (define_insn "ssse3_pmaddubsw"
8527 [(set (match_operand:V4HI 0 "register_operand" "=y")
8532 (match_operand:V8QI 1 "register_operand" "0")
8533 (parallel [(const_int 0)
8539 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8540 (parallel [(const_int 0)
8546 (vec_select:V8QI (match_dup 1)
8547 (parallel [(const_int 1)
8552 (vec_select:V8QI (match_dup 2)
8553 (parallel [(const_int 1)
8556 (const_int 7)]))))))]
8558 "pmaddubsw\t{%2, %0|%0, %2}"
8559 [(set_attr "type" "sseiadd")
8560 (set_attr "prefix_extra" "1")
8561 (set_attr "mode" "DI")])
8563 (define_expand "ssse3_pmulhrswv8hi3"
8564 [(set (match_operand:V8HI 0 "register_operand" "")
8571 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8573 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8575 (const_vector:V8HI [(const_int 1) (const_int 1)
8576 (const_int 1) (const_int 1)
8577 (const_int 1) (const_int 1)
8578 (const_int 1) (const_int 1)]))
8581 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8583 (define_insn "*avx_pmulhrswv8hi3"
8584 [(set (match_operand:V8HI 0 "register_operand" "=x")
8591 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8593 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8595 (const_vector:V8HI [(const_int 1) (const_int 1)
8596 (const_int 1) (const_int 1)
8597 (const_int 1) (const_int 1)
8598 (const_int 1) (const_int 1)]))
8600 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8601 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8602 [(set_attr "type" "sseimul")
8603 (set_attr "prefix" "vex")
8604 (set_attr "mode" "TI")])
8606 (define_insn "*ssse3_pmulhrswv8hi3"
8607 [(set (match_operand:V8HI 0 "register_operand" "=x")
8614 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8616 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8618 (const_vector:V8HI [(const_int 1) (const_int 1)
8619 (const_int 1) (const_int 1)
8620 (const_int 1) (const_int 1)
8621 (const_int 1) (const_int 1)]))
8623 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8624 "pmulhrsw\t{%2, %0|%0, %2}"
8625 [(set_attr "type" "sseimul")
8626 (set_attr "prefix_data16" "1")
8627 (set_attr "prefix_extra" "1")
8628 (set_attr "mode" "TI")])
8630 (define_expand "ssse3_pmulhrswv4hi3"
8631 [(set (match_operand:V4HI 0 "register_operand" "")
8638 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8640 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8642 (const_vector:V4HI [(const_int 1) (const_int 1)
8643 (const_int 1) (const_int 1)]))
8646 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8648 (define_insn "*ssse3_pmulhrswv4hi3"
8649 [(set (match_operand:V4HI 0 "register_operand" "=y")
8656 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8658 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8660 (const_vector:V4HI [(const_int 1) (const_int 1)
8661 (const_int 1) (const_int 1)]))
8663 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8664 "pmulhrsw\t{%2, %0|%0, %2}"
8665 [(set_attr "type" "sseimul")
8666 (set_attr "prefix_extra" "1")
8667 (set_attr "mode" "DI")])
8669 (define_insn "*avx_pshufbv16qi3"
8670 [(set (match_operand:V16QI 0 "register_operand" "=x")
8671 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8672 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8675 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8676 [(set_attr "type" "sselog1")
8677 (set_attr "prefix" "vex")
8678 (set_attr "mode" "TI")])
8680 (define_insn "ssse3_pshufbv16qi3"
8681 [(set (match_operand:V16QI 0 "register_operand" "=x")
8682 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8683 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8686 "pshufb\t{%2, %0|%0, %2}";
8687 [(set_attr "type" "sselog1")
8688 (set_attr "prefix_data16" "1")
8689 (set_attr "prefix_extra" "1")
8690 (set_attr "mode" "TI")])
8692 (define_insn "ssse3_pshufbv8qi3"
8693 [(set (match_operand:V8QI 0 "register_operand" "=y")
8694 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8695 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8698 "pshufb\t{%2, %0|%0, %2}";
8699 [(set_attr "type" "sselog1")
8700 (set_attr "prefix_extra" "1")
8701 (set_attr "mode" "DI")])
8703 (define_insn "*avx_psign<mode>3"
8704 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8706 [(match_operand:SSEMODE124 1 "register_operand" "x")
8707 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8710 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8711 [(set_attr "type" "sselog1")
8712 (set_attr "prefix" "vex")
8713 (set_attr "mode" "TI")])
8715 (define_insn "ssse3_psign<mode>3"
8716 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8718 [(match_operand:SSEMODE124 1 "register_operand" "0")
8719 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8722 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8723 [(set_attr "type" "sselog1")
8724 (set_attr "prefix_data16" "1")
8725 (set_attr "prefix_extra" "1")
8726 (set_attr "mode" "TI")])
8728 (define_insn "ssse3_psign<mode>3"
8729 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8731 [(match_operand:MMXMODEI 1 "register_operand" "0")
8732 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8735 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8736 [(set_attr "type" "sselog1")
8737 (set_attr "prefix_extra" "1")
8738 (set_attr "mode" "DI")])
8740 (define_insn "*avx_palignrti"
8741 [(set (match_operand:TI 0 "register_operand" "=x")
8742 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8743 (match_operand:TI 2 "nonimmediate_operand" "xm")
8744 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8748 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8749 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8751 [(set_attr "type" "sseishft")
8752 (set_attr "prefix" "vex")
8753 (set_attr "mode" "TI")])
8755 (define_insn "ssse3_palignrti"
8756 [(set (match_operand:TI 0 "register_operand" "=x")
8757 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8758 (match_operand:TI 2 "nonimmediate_operand" "xm")
8759 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8763 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8764 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8766 [(set_attr "type" "sseishft")
8767 (set_attr "prefix_data16" "1")
8768 (set_attr "prefix_extra" "1")
8769 (set_attr "mode" "TI")])
8771 (define_insn "ssse3_palignrdi"
8772 [(set (match_operand:DI 0 "register_operand" "=y")
8773 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8774 (match_operand:DI 2 "nonimmediate_operand" "ym")
8775 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8779 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8780 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8782 [(set_attr "type" "sseishft")
8783 (set_attr "prefix_extra" "1")
8784 (set_attr "mode" "DI")])
8786 (define_insn "abs<mode>2"
8787 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8788 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8790 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8791 [(set_attr "type" "sselog1")
8792 (set_attr "prefix_data16" "1")
8793 (set_attr "prefix_extra" "1")
8794 (set_attr "prefix" "maybe_vex")
8795 (set_attr "mode" "TI")])
8797 (define_insn "abs<mode>2"
8798 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8799 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8801 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8802 [(set_attr "type" "sselog1")
8803 (set_attr "prefix_extra" "1")
8804 (set_attr "mode" "DI")])
8806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8808 ;; AMD SSE4A instructions
8810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8812 (define_insn "sse4a_movnt<mode>"
8813 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8815 [(match_operand:MODEF 1 "register_operand" "x")]
8818 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8819 [(set_attr "type" "ssemov")
8820 (set_attr "mode" "<MODE>")])
8822 (define_insn "sse4a_vmmovnt<mode>"
8823 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8824 (unspec:<ssescalarmode>
8825 [(vec_select:<ssescalarmode>
8826 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8827 (parallel [(const_int 0)]))]
8830 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8831 [(set_attr "type" "ssemov")
8832 (set_attr "mode" "<ssescalarmode>")])
8834 (define_insn "sse4a_extrqi"
8835 [(set (match_operand:V2DI 0 "register_operand" "=x")
8836 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8837 (match_operand 2 "const_int_operand" "")
8838 (match_operand 3 "const_int_operand" "")]
8841 "extrq\t{%3, %2, %0|%0, %2, %3}"
8842 [(set_attr "type" "sse")
8843 (set_attr "prefix_data16" "1")
8844 (set_attr "mode" "TI")])
8846 (define_insn "sse4a_extrq"
8847 [(set (match_operand:V2DI 0 "register_operand" "=x")
8848 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8849 (match_operand:V16QI 2 "register_operand" "x")]
8852 "extrq\t{%2, %0|%0, %2}"
8853 [(set_attr "type" "sse")
8854 (set_attr "prefix_data16" "1")
8855 (set_attr "mode" "TI")])
8857 (define_insn "sse4a_insertqi"
8858 [(set (match_operand:V2DI 0 "register_operand" "=x")
8859 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8860 (match_operand:V2DI 2 "register_operand" "x")
8861 (match_operand 3 "const_int_operand" "")
8862 (match_operand 4 "const_int_operand" "")]
8865 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8866 [(set_attr "type" "sseins")
8867 (set_attr "prefix_rep" "1")
8868 (set_attr "mode" "TI")])
8870 (define_insn "sse4a_insertq"
8871 [(set (match_operand:V2DI 0 "register_operand" "=x")
8872 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8873 (match_operand:V2DI 2 "register_operand" "x")]
8876 "insertq\t{%2, %0|%0, %2}"
8877 [(set_attr "type" "sseins")
8878 (set_attr "prefix_rep" "1")
8879 (set_attr "mode" "TI")])
8881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8883 ;; Intel SSE4.1 instructions
8885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8887 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
8888 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8889 (vec_merge:AVXMODEF2P
8890 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8891 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8892 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8894 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8895 [(set_attr "type" "ssemov")
8896 (set_attr "prefix" "vex")
8897 (set_attr "mode" "<avxvecmode>")])
8899 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8900 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8902 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8903 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8904 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8907 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8908 [(set_attr "type" "ssemov")
8909 (set_attr "prefix" "vex")
8910 (set_attr "mode" "<avxvecmode>")])
8912 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8914 (vec_merge:SSEMODEF2P
8915 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8916 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8917 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8919 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8920 [(set_attr "type" "ssemov")
8921 (set_attr "prefix_extra" "1")
8922 (set_attr "mode" "<MODE>")])
8924 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8925 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8927 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8928 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8929 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8932 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8933 [(set_attr "type" "ssemov")
8934 (set_attr "prefix_extra" "1")
8935 (set_attr "mode" "<MODE>")])
8937 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8938 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8940 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8941 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8942 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8945 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8946 [(set_attr "type" "ssemul")
8947 (set_attr "prefix" "vex")
8948 (set_attr "mode" "<avxvecmode>")])
8950 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8953 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8954 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8955 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8958 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8959 [(set_attr "type" "ssemul")
8960 (set_attr "prefix_extra" "1")
8961 (set_attr "mode" "<MODE>")])
8963 (define_insn "sse4_1_movntdqa"
8964 [(set (match_operand:V2DI 0 "register_operand" "=x")
8965 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8968 "%vmovntdqa\t{%1, %0|%0, %1}"
8969 [(set_attr "type" "ssecvt")
8970 (set_attr "prefix_extra" "1")
8971 (set_attr "prefix" "maybe_vex")
8972 (set_attr "mode" "TI")])
8974 (define_insn "*avx_mpsadbw"
8975 [(set (match_operand:V16QI 0 "register_operand" "=x")
8976 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8977 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8978 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8981 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8982 [(set_attr "type" "sselog1")
8983 (set_attr "prefix" "vex")
8984 (set_attr "mode" "TI")])
8986 (define_insn "sse4_1_mpsadbw"
8987 [(set (match_operand:V16QI 0 "register_operand" "=x")
8988 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8989 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8990 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8993 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
8994 [(set_attr "type" "sselog1")
8995 (set_attr "prefix_extra" "1")
8996 (set_attr "mode" "TI")])
8998 (define_insn "*avx_packusdw"
8999 [(set (match_operand:V8HI 0 "register_operand" "=x")
9002 (match_operand:V4SI 1 "register_operand" "x"))
9004 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9006 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9007 [(set_attr "type" "sselog")
9008 (set_attr "prefix" "vex")
9009 (set_attr "mode" "TI")])
9011 (define_insn "sse4_1_packusdw"
9012 [(set (match_operand:V8HI 0 "register_operand" "=x")
9015 (match_operand:V4SI 1 "register_operand" "0"))
9017 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9019 "packusdw\t{%2, %0|%0, %2}"
9020 [(set_attr "type" "sselog")
9021 (set_attr "prefix_extra" "1")
9022 (set_attr "mode" "TI")])
9024 (define_insn "*avx_pblendvb"
9025 [(set (match_operand:V16QI 0 "register_operand" "=x")
9026 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9027 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9028 (match_operand:V16QI 3 "register_operand" "x")]
9031 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9032 [(set_attr "type" "ssemov")
9033 (set_attr "prefix" "vex")
9034 (set_attr "mode" "TI")])
9036 (define_insn "sse4_1_pblendvb"
9037 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9038 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9039 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9040 (match_operand:V16QI 3 "register_operand" "Yz")]
9043 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9044 [(set_attr "type" "ssemov")
9045 (set_attr "prefix_extra" "1")
9046 (set_attr "mode" "TI")])
9048 (define_insn "*avx_pblendw"
9049 [(set (match_operand:V8HI 0 "register_operand" "=x")
9051 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9052 (match_operand:V8HI 1 "register_operand" "x")
9053 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9055 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9056 [(set_attr "type" "ssemov")
9057 (set_attr "prefix" "vex")
9058 (set_attr "mode" "TI")])
9060 (define_insn "sse4_1_pblendw"
9061 [(set (match_operand:V8HI 0 "register_operand" "=x")
9063 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9064 (match_operand:V8HI 1 "register_operand" "0")
9065 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9067 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9068 [(set_attr "type" "ssemov")
9069 (set_attr "prefix_extra" "1")
9070 (set_attr "mode" "TI")])
9072 (define_insn "sse4_1_phminposuw"
9073 [(set (match_operand:V8HI 0 "register_operand" "=x")
9074 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9075 UNSPEC_PHMINPOSUW))]
9077 "%vphminposuw\t{%1, %0|%0, %1}"
9078 [(set_attr "type" "sselog1")
9079 (set_attr "prefix_extra" "1")
9080 (set_attr "prefix" "maybe_vex")
9081 (set_attr "mode" "TI")])
9083 (define_insn "sse4_1_extendv8qiv8hi2"
9084 [(set (match_operand:V8HI 0 "register_operand" "=x")
9087 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9088 (parallel [(const_int 0)
9097 "%vpmovsxbw\t{%1, %0|%0, %1}"
9098 [(set_attr "type" "ssemov")
9099 (set_attr "prefix_extra" "1")
9100 (set_attr "prefix" "maybe_vex")
9101 (set_attr "mode" "TI")])
9103 (define_insn "sse4_1_extendv4qiv4si2"
9104 [(set (match_operand:V4SI 0 "register_operand" "=x")
9107 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9108 (parallel [(const_int 0)
9113 "%vpmovsxbd\t{%1, %0|%0, %1}"
9114 [(set_attr "type" "ssemov")
9115 (set_attr "prefix_extra" "1")
9116 (set_attr "prefix" "maybe_vex")
9117 (set_attr "mode" "TI")])
9119 (define_insn "sse4_1_extendv2qiv2di2"
9120 [(set (match_operand:V2DI 0 "register_operand" "=x")
9123 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9124 (parallel [(const_int 0)
9127 "%vpmovsxbq\t{%1, %0|%0, %1}"
9128 [(set_attr "type" "ssemov")
9129 (set_attr "prefix_extra" "1")
9130 (set_attr "prefix" "maybe_vex")
9131 (set_attr "mode" "TI")])
9133 (define_insn "sse4_1_extendv4hiv4si2"
9134 [(set (match_operand:V4SI 0 "register_operand" "=x")
9137 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9138 (parallel [(const_int 0)
9143 "%vpmovsxwd\t{%1, %0|%0, %1}"
9144 [(set_attr "type" "ssemov")
9145 (set_attr "prefix_extra" "1")
9146 (set_attr "prefix" "maybe_vex")
9147 (set_attr "mode" "TI")])
9149 (define_insn "sse4_1_extendv2hiv2di2"
9150 [(set (match_operand:V2DI 0 "register_operand" "=x")
9153 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9154 (parallel [(const_int 0)
9157 "%vpmovsxwq\t{%1, %0|%0, %1}"
9158 [(set_attr "type" "ssemov")
9159 (set_attr "prefix_extra" "1")
9160 (set_attr "prefix" "maybe_vex")
9161 (set_attr "mode" "TI")])
9163 (define_insn "sse4_1_extendv2siv2di2"
9164 [(set (match_operand:V2DI 0 "register_operand" "=x")
9167 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9168 (parallel [(const_int 0)
9171 "%vpmovsxdq\t{%1, %0|%0, %1}"
9172 [(set_attr "type" "ssemov")
9173 (set_attr "prefix_extra" "1")
9174 (set_attr "prefix" "maybe_vex")
9175 (set_attr "mode" "TI")])
9177 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9178 [(set (match_operand:V8HI 0 "register_operand" "=x")
9181 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9182 (parallel [(const_int 0)
9191 "%vpmovzxbw\t{%1, %0|%0, %1}"
9192 [(set_attr "type" "ssemov")
9193 (set_attr "prefix_extra" "1")
9194 (set_attr "prefix" "maybe_vex")
9195 (set_attr "mode" "TI")])
9197 (define_insn "sse4_1_zero_extendv4qiv4si2"
9198 [(set (match_operand:V4SI 0 "register_operand" "=x")
9201 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9202 (parallel [(const_int 0)
9207 "%vpmovzxbd\t{%1, %0|%0, %1}"
9208 [(set_attr "type" "ssemov")
9209 (set_attr "prefix_extra" "1")
9210 (set_attr "prefix" "maybe_vex")
9211 (set_attr "mode" "TI")])
9213 (define_insn "sse4_1_zero_extendv2qiv2di2"
9214 [(set (match_operand:V2DI 0 "register_operand" "=x")
9217 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9218 (parallel [(const_int 0)
9221 "%vpmovzxbq\t{%1, %0|%0, %1}"
9222 [(set_attr "type" "ssemov")
9223 (set_attr "prefix_extra" "1")
9224 (set_attr "prefix" "maybe_vex")
9225 (set_attr "mode" "TI")])
9227 (define_insn "sse4_1_zero_extendv4hiv4si2"
9228 [(set (match_operand:V4SI 0 "register_operand" "=x")
9231 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9232 (parallel [(const_int 0)
9237 "%vpmovzxwd\t{%1, %0|%0, %1}"
9238 [(set_attr "type" "ssemov")
9239 (set_attr "prefix_extra" "1")
9240 (set_attr "prefix" "maybe_vex")
9241 (set_attr "mode" "TI")])
9243 (define_insn "sse4_1_zero_extendv2hiv2di2"
9244 [(set (match_operand:V2DI 0 "register_operand" "=x")
9247 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9248 (parallel [(const_int 0)
9251 "%vpmovzxwq\t{%1, %0|%0, %1}"
9252 [(set_attr "type" "ssemov")
9253 (set_attr "prefix_extra" "1")
9254 (set_attr "prefix" "maybe_vex")
9255 (set_attr "mode" "TI")])
9257 (define_insn "sse4_1_zero_extendv2siv2di2"
9258 [(set (match_operand:V2DI 0 "register_operand" "=x")
9261 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9262 (parallel [(const_int 0)
9265 "%vpmovzxdq\t{%1, %0|%0, %1}"
9266 [(set_attr "type" "ssemov")
9267 (set_attr "prefix_extra" "1")
9268 (set_attr "prefix" "maybe_vex")
9269 (set_attr "mode" "TI")])
9271 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9272 ;; setting FLAGS_REG. But it is not a really compare instruction.
9273 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9274 [(set (reg:CC FLAGS_REG)
9275 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9276 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9279 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9280 [(set_attr "type" "ssecomi")
9281 (set_attr "prefix" "vex")
9282 (set_attr "mode" "<MODE>")])
9284 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9285 ;; But it is not a really compare instruction.
9286 (define_insn "avx_ptest256"
9287 [(set (reg:CC FLAGS_REG)
9288 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9289 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9292 "vptest\t{%1, %0|%0, %1}"
9293 [(set_attr "type" "ssecomi")
9294 (set_attr "prefix" "vex")
9295 (set_attr "mode" "OI")])
9297 (define_insn "sse4_1_ptest"
9298 [(set (reg:CC FLAGS_REG)
9299 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9300 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9303 "%vptest\t{%1, %0|%0, %1}"
9304 [(set_attr "type" "ssecomi")
9305 (set_attr "prefix_extra" "1")
9306 (set_attr "prefix" "maybe_vex")
9307 (set_attr "mode" "TI")])
9309 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9310 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9311 (unspec:AVX256MODEF2P
9312 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9313 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9316 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9317 [(set_attr "type" "ssecvt")
9318 (set_attr "prefix" "vex")
9319 (set_attr "mode" "<MODE>")])
9321 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9322 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9324 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9325 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9328 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9329 [(set_attr "type" "ssecvt")
9330 (set_attr "prefix_extra" "1")
9331 (set_attr "prefix" "maybe_vex")
9332 (set_attr "mode" "<MODE>")])
9334 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9335 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9336 (vec_merge:SSEMODEF2P
9338 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9339 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9341 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9344 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9345 [(set_attr "type" "ssecvt")
9346 (set_attr "prefix" "vex")
9347 (set_attr "mode" "<MODE>")])
9349 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9350 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9351 (vec_merge:SSEMODEF2P
9353 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9354 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9356 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9359 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9360 [(set_attr "type" "ssecvt")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "mode" "<MODE>")])
9364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9366 ;; Intel SSE4.2 string/text processing instructions
9368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9370 (define_insn_and_split "sse4_2_pcmpestr"
9371 [(set (match_operand:SI 0 "register_operand" "=c,c")
9373 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9374 (match_operand:SI 3 "register_operand" "a,a")
9375 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9376 (match_operand:SI 5 "register_operand" "d,d")
9377 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9379 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9387 (set (reg:CC FLAGS_REG)
9396 && !(reload_completed || reload_in_progress)"
9401 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9402 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9403 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9406 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9407 operands[3], operands[4],
9408 operands[5], operands[6]));
9410 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9411 operands[3], operands[4],
9412 operands[5], operands[6]));
9413 if (flags && !(ecx || xmm0))
9414 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9415 operands[2], operands[3],
9416 operands[4], operands[5],
9420 [(set_attr "type" "sselog")
9421 (set_attr "prefix_data16" "1")
9422 (set_attr "prefix_extra" "1")
9423 (set_attr "memory" "none,load")
9424 (set_attr "mode" "TI")])
9426 (define_insn "sse4_2_pcmpestri"
9427 [(set (match_operand:SI 0 "register_operand" "=c,c")
9429 [(match_operand:V16QI 1 "register_operand" "x,x")
9430 (match_operand:SI 2 "register_operand" "a,a")
9431 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9432 (match_operand:SI 4 "register_operand" "d,d")
9433 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9435 (set (reg:CC FLAGS_REG)
9444 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9445 [(set_attr "type" "sselog")
9446 (set_attr "prefix_data16" "1")
9447 (set_attr "prefix_extra" "1")
9448 (set_attr "prefix" "maybe_vex")
9449 (set_attr "memory" "none,load")
9450 (set_attr "mode" "TI")])
9452 (define_insn "sse4_2_pcmpestrm"
9453 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9455 [(match_operand:V16QI 1 "register_operand" "x,x")
9456 (match_operand:SI 2 "register_operand" "a,a")
9457 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9458 (match_operand:SI 4 "register_operand" "d,d")
9459 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9461 (set (reg:CC FLAGS_REG)
9470 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9471 [(set_attr "type" "sselog")
9472 (set_attr "prefix_data16" "1")
9473 (set_attr "prefix_extra" "1")
9474 (set_attr "prefix" "maybe_vex")
9475 (set_attr "memory" "none,load")
9476 (set_attr "mode" "TI")])
9478 (define_insn "sse4_2_pcmpestr_cconly"
9479 [(set (reg:CC FLAGS_REG)
9481 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9482 (match_operand:SI 3 "register_operand" "a,a,a,a")
9483 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9484 (match_operand:SI 5 "register_operand" "d,d,d,d")
9485 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9487 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9488 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9491 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9492 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9493 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9494 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9495 [(set_attr "type" "sselog")
9496 (set_attr "prefix_data16" "1")
9497 (set_attr "prefix_extra" "1")
9498 (set_attr "memory" "none,load,none,load")
9499 (set_attr "prefix" "maybe_vex")
9500 (set_attr "mode" "TI")])
9502 (define_insn_and_split "sse4_2_pcmpistr"
9503 [(set (match_operand:SI 0 "register_operand" "=c,c")
9505 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9506 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9507 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9509 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9515 (set (reg:CC FLAGS_REG)
9522 && !(reload_completed || reload_in_progress)"
9527 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9528 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9529 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9532 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9533 operands[3], operands[4]));
9535 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9536 operands[3], operands[4]));
9537 if (flags && !(ecx || xmm0))
9538 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9539 operands[2], operands[3],
9543 [(set_attr "type" "sselog")
9544 (set_attr "prefix_data16" "1")
9545 (set_attr "prefix_extra" "1")
9546 (set_attr "memory" "none,load")
9547 (set_attr "mode" "TI")])
9549 (define_insn "sse4_2_pcmpistri"
9550 [(set (match_operand:SI 0 "register_operand" "=c,c")
9552 [(match_operand:V16QI 1 "register_operand" "x,x")
9553 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9554 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9556 (set (reg:CC FLAGS_REG)
9563 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9564 [(set_attr "type" "sselog")
9565 (set_attr "prefix_data16" "1")
9566 (set_attr "prefix_extra" "1")
9567 (set_attr "prefix" "maybe_vex")
9568 (set_attr "memory" "none,load")
9569 (set_attr "mode" "TI")])
9571 (define_insn "sse4_2_pcmpistrm"
9572 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9574 [(match_operand:V16QI 1 "register_operand" "x,x")
9575 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9576 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9578 (set (reg:CC FLAGS_REG)
9585 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9586 [(set_attr "type" "sselog")
9587 (set_attr "prefix_data16" "1")
9588 (set_attr "prefix_extra" "1")
9589 (set_attr "prefix" "maybe_vex")
9590 (set_attr "memory" "none,load")
9591 (set_attr "mode" "TI")])
9593 (define_insn "sse4_2_pcmpistr_cconly"
9594 [(set (reg:CC FLAGS_REG)
9596 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9597 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9598 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9600 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9601 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9604 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9605 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9606 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9607 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9608 [(set_attr "type" "sselog")
9609 (set_attr "prefix_data16" "1")
9610 (set_attr "prefix_extra" "1")
9611 (set_attr "memory" "none,load,none,load")
9612 (set_attr "prefix" "maybe_vex")
9613 (set_attr "mode" "TI")])
9615 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9617 ;; SSE5 instructions
9619 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9621 ;; SSE5 parallel integer multiply/add instructions.
9622 ;; Note the instruction does not allow the value being added to be a memory
9623 ;; operation. However by pretending via the nonimmediate_operand predicate
9624 ;; that it does and splitting it later allows the following to be recognized:
9625 ;; a[i] = b[i] * c[i] + d[i];
9626 (define_insn "sse5_pmacsww"
9627 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9630 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
9631 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
9632 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9633 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9635 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9636 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9637 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9638 [(set_attr "type" "ssemuladd")
9639 (set_attr "mode" "TI")])
9641 ;; Split pmacsww with two memory operands into a load and the pmacsww.
9643 [(set (match_operand:V8HI 0 "register_operand" "")
9645 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9646 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9647 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9649 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9650 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9651 && !reg_mentioned_p (operands[0], operands[1])
9652 && !reg_mentioned_p (operands[0], operands[2])
9653 && !reg_mentioned_p (operands[0], operands[3])"
9656 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9657 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9662 (define_insn "sse5_pmacssww"
9663 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9665 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9666 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
9667 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9668 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9670 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9671 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9672 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9673 [(set_attr "type" "ssemuladd")
9674 (set_attr "mode" "TI")])
9676 ;; Note the instruction does not allow the value being added to be a memory
9677 ;; operation. However by pretending via the nonimmediate_operand predicate
9678 ;; that it does and splitting it later allows the following to be recognized:
9679 ;; a[i] = b[i] * c[i] + d[i];
9680 (define_insn "sse5_pmacsdd"
9681 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9684 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9685 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9686 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9687 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9689 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9690 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9691 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9692 [(set_attr "type" "ssemuladd")
9693 (set_attr "mode" "TI")])
9695 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
9697 [(set (match_operand:V4SI 0 "register_operand" "")
9699 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9700 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9701 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9703 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9704 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9705 && !reg_mentioned_p (operands[0], operands[1])
9706 && !reg_mentioned_p (operands[0], operands[2])
9707 && !reg_mentioned_p (operands[0], operands[3])"
9710 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9711 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9716 (define_insn "sse5_pmacssdd"
9717 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9719 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9720 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9721 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9722 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9724 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9725 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9726 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9727 [(set_attr "type" "ssemuladd")
9728 (set_attr "mode" "TI")])
9730 (define_insn "sse5_pmacssdql"
9731 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9736 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9737 (parallel [(const_int 1)
9740 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9741 (parallel [(const_int 1)
9743 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9744 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9746 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9747 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9748 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9749 [(set_attr "type" "ssemuladd")
9750 (set_attr "mode" "TI")])
9752 (define_insn "sse5_pmacssdqh"
9753 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9758 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9759 (parallel [(const_int 0)
9763 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9764 (parallel [(const_int 0)
9766 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9767 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9769 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9770 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9771 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9772 [(set_attr "type" "ssemuladd")
9773 (set_attr "mode" "TI")])
9775 (define_insn "sse5_pmacsdql"
9776 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9781 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9782 (parallel [(const_int 1)
9786 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9787 (parallel [(const_int 1)
9789 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9790 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9792 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9793 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9794 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9795 [(set_attr "type" "ssemuladd")
9796 (set_attr "mode" "TI")])
9798 (define_insn_and_split "*sse5_pmacsdql_mem"
9799 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9804 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9805 (parallel [(const_int 1)
9809 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9810 (parallel [(const_int 1)
9812 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
9813 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
9815 "&& (reload_completed
9816 || (!reg_mentioned_p (operands[0], operands[1])
9817 && !reg_mentioned_p (operands[0], operands[2])))"
9826 (parallel [(const_int 1)
9831 (parallel [(const_int 1)
9835 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
9836 ;; fake it with a multiply/add. In general, we expect the define_split to
9837 ;; occur before register allocation, so we have to handle the corner case where
9838 ;; the target is the same as operands 1/2
9839 (define_insn_and_split "sse5_mulv2div2di3_low"
9840 [(set (match_operand:V2DI 0 "register_operand" "=&x")
9844 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9845 (parallel [(const_int 1)
9849 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9850 (parallel [(const_int 1)
9851 (const_int 3)])))))]
9854 "&& (reload_completed
9855 || (!reg_mentioned_p (operands[0], operands[1])
9856 && !reg_mentioned_p (operands[0], operands[2])))"
9865 (parallel [(const_int 1)
9870 (parallel [(const_int 1)
9874 operands[3] = CONST0_RTX (V2DImode);
9876 [(set_attr "type" "ssemuladd")
9877 (set_attr "mode" "TI")])
9879 (define_insn "sse5_pmacsdqh"
9880 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9885 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9886 (parallel [(const_int 0)
9890 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9891 (parallel [(const_int 0)
9893 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9894 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9896 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9897 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9898 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9899 [(set_attr "type" "ssemuladd")
9900 (set_attr "mode" "TI")])
9902 (define_insn_and_split "*sse5_pmacsdqh_mem"
9903 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9908 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9909 (parallel [(const_int 0)
9913 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9914 (parallel [(const_int 0)
9916 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
9917 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
9919 "&& (reload_completed
9920 || (!reg_mentioned_p (operands[0], operands[1])
9921 && !reg_mentioned_p (operands[0], operands[2])))"
9930 (parallel [(const_int 0)
9935 (parallel [(const_int 0)
9939 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
9940 ;; fake it with a multiply/add. In general, we expect the define_split to
9941 ;; occur before register allocation, so we have to handle the corner case where
9942 ;; the target is the same as either operands[1] or operands[2]
9943 (define_insn_and_split "sse5_mulv2div2di3_high"
9944 [(set (match_operand:V2DI 0 "register_operand" "=&x")
9948 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9949 (parallel [(const_int 0)
9953 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9954 (parallel [(const_int 0)
9955 (const_int 2)])))))]
9958 "&& (reload_completed
9959 || (!reg_mentioned_p (operands[0], operands[1])
9960 && !reg_mentioned_p (operands[0], operands[2])))"
9969 (parallel [(const_int 0)
9974 (parallel [(const_int 0)
9978 operands[3] = CONST0_RTX (V2DImode);
9980 [(set_attr "type" "ssemuladd")
9981 (set_attr "mode" "TI")])
9983 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
9984 (define_insn "sse5_pmacsswd"
9985 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9990 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9991 (parallel [(const_int 1)
9997 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
9998 (parallel [(const_int 1)
10002 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10003 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10005 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10006 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10007 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10008 [(set_attr "type" "ssemuladd")
10009 (set_attr "mode" "TI")])
10011 (define_insn "sse5_pmacswd"
10012 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10017 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10018 (parallel [(const_int 1)
10024 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10025 (parallel [(const_int 1)
10029 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10030 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10032 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10033 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10034 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10035 [(set_attr "type" "ssemuladd")
10036 (set_attr "mode" "TI")])
10038 (define_insn "sse5_pmadcsswd"
10039 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10045 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10046 (parallel [(const_int 0)
10052 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10053 (parallel [(const_int 0)
10061 (parallel [(const_int 1)
10068 (parallel [(const_int 1)
10071 (const_int 7)])))))
10072 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10073 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10075 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10076 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10077 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10078 [(set_attr "type" "ssemuladd")
10079 (set_attr "mode" "TI")])
10081 (define_insn "sse5_pmadcswd"
10082 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10088 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10089 (parallel [(const_int 0)
10095 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10096 (parallel [(const_int 0)
10104 (parallel [(const_int 1)
10111 (parallel [(const_int 1)
10114 (const_int 7)])))))
10115 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10116 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10118 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10119 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10120 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10121 [(set_attr "type" "ssemuladd")
10122 (set_attr "mode" "TI")])
10124 ;; SSE5 parallel XMM conditional moves
10125 (define_insn "sse5_pcmov_<mode>"
10126 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10127 (if_then_else:SSEMODE
10128 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10129 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10130 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10131 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10133 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10134 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10135 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10136 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10137 [(set_attr "type" "sse4arg")])
10139 ;; SSE5 horizontal add/subtract instructions
10140 (define_insn "sse5_phaddbw"
10141 [(set (match_operand:V8HI 0 "register_operand" "=x")
10145 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10146 (parallel [(const_int 0)
10157 (parallel [(const_int 1)
10164 (const_int 15)])))))]
10166 "phaddbw\t{%1, %0|%0, %1}"
10167 [(set_attr "type" "sseiadd1")])
10169 (define_insn "sse5_phaddbd"
10170 [(set (match_operand:V4SI 0 "register_operand" "=x")
10175 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10176 (parallel [(const_int 0)
10183 (parallel [(const_int 1)
10186 (const_int 13)]))))
10191 (parallel [(const_int 2)
10198 (parallel [(const_int 3)
10201 (const_int 15)]))))))]
10203 "phaddbd\t{%1, %0|%0, %1}"
10204 [(set_attr "type" "sseiadd1")])
10206 (define_insn "sse5_phaddbq"
10207 [(set (match_operand:V2DI 0 "register_operand" "=x")
10213 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10214 (parallel [(const_int 0)
10219 (parallel [(const_int 1)
10225 (parallel [(const_int 2)
10230 (parallel [(const_int 3)
10231 (const_int 7)])))))
10237 (parallel [(const_int 8)
10242 (parallel [(const_int 9)
10243 (const_int 13)]))))
10248 (parallel [(const_int 10)
10253 (parallel [(const_int 11)
10254 (const_int 15)])))))))]
10256 "phaddbq\t{%1, %0|%0, %1}"
10257 [(set_attr "type" "sseiadd1")])
10259 (define_insn "sse5_phaddwd"
10260 [(set (match_operand:V4SI 0 "register_operand" "=x")
10264 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10265 (parallel [(const_int 0)
10272 (parallel [(const_int 1)
10275 (const_int 7)])))))]
10277 "phaddwd\t{%1, %0|%0, %1}"
10278 [(set_attr "type" "sseiadd1")])
10280 (define_insn "sse5_phaddwq"
10281 [(set (match_operand:V2DI 0 "register_operand" "=x")
10286 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10287 (parallel [(const_int 0)
10292 (parallel [(const_int 1)
10298 (parallel [(const_int 2)
10303 (parallel [(const_int 3)
10304 (const_int 7)]))))))]
10306 "phaddwq\t{%1, %0|%0, %1}"
10307 [(set_attr "type" "sseiadd1")])
10309 (define_insn "sse5_phadddq"
10310 [(set (match_operand:V2DI 0 "register_operand" "=x")
10314 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10315 (parallel [(const_int 0)
10320 (parallel [(const_int 1)
10321 (const_int 3)])))))]
10323 "phadddq\t{%1, %0|%0, %1}"
10324 [(set_attr "type" "sseiadd1")])
10326 (define_insn "sse5_phaddubw"
10327 [(set (match_operand:V8HI 0 "register_operand" "=x")
10331 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10332 (parallel [(const_int 0)
10343 (parallel [(const_int 1)
10350 (const_int 15)])))))]
10352 "phaddubw\t{%1, %0|%0, %1}"
10353 [(set_attr "type" "sseiadd1")])
10355 (define_insn "sse5_phaddubd"
10356 [(set (match_operand:V4SI 0 "register_operand" "=x")
10361 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10362 (parallel [(const_int 0)
10369 (parallel [(const_int 1)
10372 (const_int 13)]))))
10377 (parallel [(const_int 2)
10384 (parallel [(const_int 3)
10387 (const_int 15)]))))))]
10389 "phaddubd\t{%1, %0|%0, %1}"
10390 [(set_attr "type" "sseiadd1")])
10392 (define_insn "sse5_phaddubq"
10393 [(set (match_operand:V2DI 0 "register_operand" "=x")
10399 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10400 (parallel [(const_int 0)
10405 (parallel [(const_int 1)
10411 (parallel [(const_int 2)
10416 (parallel [(const_int 3)
10417 (const_int 7)])))))
10423 (parallel [(const_int 8)
10428 (parallel [(const_int 9)
10429 (const_int 13)]))))
10434 (parallel [(const_int 10)
10439 (parallel [(const_int 11)
10440 (const_int 15)])))))))]
10442 "phaddubq\t{%1, %0|%0, %1}"
10443 [(set_attr "type" "sseiadd1")])
10445 (define_insn "sse5_phadduwd"
10446 [(set (match_operand:V4SI 0 "register_operand" "=x")
10450 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10451 (parallel [(const_int 0)
10458 (parallel [(const_int 1)
10461 (const_int 7)])))))]
10463 "phadduwd\t{%1, %0|%0, %1}"
10464 [(set_attr "type" "sseiadd1")])
10466 (define_insn "sse5_phadduwq"
10467 [(set (match_operand:V2DI 0 "register_operand" "=x")
10472 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10473 (parallel [(const_int 0)
10478 (parallel [(const_int 1)
10484 (parallel [(const_int 2)
10489 (parallel [(const_int 3)
10490 (const_int 7)]))))))]
10492 "phadduwq\t{%1, %0|%0, %1}"
10493 [(set_attr "type" "sseiadd1")])
10495 (define_insn "sse5_phaddudq"
10496 [(set (match_operand:V2DI 0 "register_operand" "=x")
10500 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10501 (parallel [(const_int 0)
10506 (parallel [(const_int 1)
10507 (const_int 3)])))))]
10509 "phaddudq\t{%1, %0|%0, %1}"
10510 [(set_attr "type" "sseiadd1")])
10512 (define_insn "sse5_phsubbw"
10513 [(set (match_operand:V8HI 0 "register_operand" "=x")
10517 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 0)
10529 (parallel [(const_int 1)
10536 (const_int 15)])))))]
10538 "phsubbw\t{%1, %0|%0, %1}"
10539 [(set_attr "type" "sseiadd1")])
10541 (define_insn "sse5_phsubwd"
10542 [(set (match_operand:V4SI 0 "register_operand" "=x")
10546 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10547 (parallel [(const_int 0)
10554 (parallel [(const_int 1)
10557 (const_int 7)])))))]
10559 "phsubwd\t{%1, %0|%0, %1}"
10560 [(set_attr "type" "sseiadd1")])
10562 (define_insn "sse5_phsubdq"
10563 [(set (match_operand:V2DI 0 "register_operand" "=x")
10567 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10568 (parallel [(const_int 0)
10573 (parallel [(const_int 1)
10574 (const_int 3)])))))]
10576 "phsubdq\t{%1, %0|%0, %1}"
10577 [(set_attr "type" "sseiadd1")])
10579 ;; SSE5 permute instructions
10580 (define_insn "sse5_pperm"
10581 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10583 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
10584 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
10585 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10586 UNSPEC_SSE5_PERMUTE))]
10587 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10588 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10589 [(set_attr "type" "sse4arg")
10590 (set_attr "mode" "TI")])
10592 ;; The following are for the various unpack insns which doesn't need the first
10593 ;; source operand, so we can just use the output operand for the first operand.
10594 ;; This allows either of the other two operands to be a memory operand. We
10595 ;; can't just use the first operand as an argument to the normal pperm because
10596 ;; then an output only argument, suddenly becomes an input operand.
10597 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10598 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10601 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10602 (match_operand 2 "" "")))) ;; parallel with const_int's
10603 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10605 && (register_operand (operands[1], V16QImode)
10606 || register_operand (operands[2], V16QImode))"
10607 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10608 [(set_attr "type" "sseadd")
10609 (set_attr "mode" "TI")])
10611 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10612 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10615 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10616 (match_operand 2 "" "")))) ;; parallel with const_int's
10617 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10619 && (register_operand (operands[1], V16QImode)
10620 || register_operand (operands[2], V16QImode))"
10621 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10622 [(set_attr "type" "sseadd")
10623 (set_attr "mode" "TI")])
10625 (define_insn "sse5_pperm_zero_v8hi_v4si"
10626 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10629 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10630 (match_operand 2 "" "")))) ;; parallel with const_int's
10631 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10633 && (register_operand (operands[1], V8HImode)
10634 || register_operand (operands[2], V16QImode))"
10635 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10636 [(set_attr "type" "sseadd")
10637 (set_attr "mode" "TI")])
10639 (define_insn "sse5_pperm_sign_v8hi_v4si"
10640 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10643 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10644 (match_operand 2 "" "")))) ;; parallel with const_int's
10645 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10647 && (register_operand (operands[1], V8HImode)
10648 || register_operand (operands[2], V16QImode))"
10649 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10650 [(set_attr "type" "sseadd")
10651 (set_attr "mode" "TI")])
10653 (define_insn "sse5_pperm_zero_v4si_v2di"
10654 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10657 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10658 (match_operand 2 "" "")))) ;; parallel with const_int's
10659 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10661 && (register_operand (operands[1], V4SImode)
10662 || register_operand (operands[2], V16QImode))"
10663 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10664 [(set_attr "type" "sseadd")
10665 (set_attr "mode" "TI")])
10667 (define_insn "sse5_pperm_sign_v4si_v2di"
10668 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10671 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10672 (match_operand 2 "" "")))) ;; parallel with const_int's
10673 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10675 && (register_operand (operands[1], V4SImode)
10676 || register_operand (operands[2], V16QImode))"
10677 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10678 [(set_attr "type" "sseadd")
10679 (set_attr "mode" "TI")])
10681 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10682 (define_insn "sse5_pperm_pack_v2di_v4si"
10683 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10686 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
10688 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10689 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10690 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10691 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10692 [(set_attr "type" "sse4arg")
10693 (set_attr "mode" "TI")])
10695 (define_insn "sse5_pperm_pack_v4si_v8hi"
10696 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
10699 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
10701 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10702 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10703 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10704 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10705 [(set_attr "type" "sse4arg")
10706 (set_attr "mode" "TI")])
10708 (define_insn "sse5_pperm_pack_v8hi_v16qi"
10709 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10712 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
10714 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10715 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10716 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10717 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10718 [(set_attr "type" "sse4arg")
10719 (set_attr "mode" "TI")])
10721 ;; Floating point permutation (permps, permpd)
10722 (define_insn "sse5_perm<mode>"
10723 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10725 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10726 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10727 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10728 UNSPEC_SSE5_PERMUTE))]
10729 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10730 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10731 [(set_attr "type" "sse4arg")
10732 (set_attr "mode" "<MODE>")])
10734 ;; SSE5 packed rotate instructions
10735 (define_expand "rotl<mode>3"
10736 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10737 (rotate:SSEMODE1248
10738 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10739 (match_operand:SI 2 "general_operand")))]
10742 /* If we were given a scalar, convert it to parallel */
10743 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10745 rtvec vs = rtvec_alloc (<ssescalarnum>);
10746 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10747 rtx reg = gen_reg_rtx (<MODE>mode);
10748 rtx op2 = operands[2];
10751 if (GET_MODE (op2) != <ssescalarmode>mode)
10753 op2 = gen_reg_rtx (<ssescalarmode>mode);
10754 convert_move (op2, operands[2], false);
10757 for (i = 0; i < <ssescalarnum>; i++)
10758 RTVEC_ELT (vs, i) = op2;
10760 emit_insn (gen_vec_init<mode> (reg, par));
10761 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10766 (define_expand "rotr<mode>3"
10767 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10768 (rotatert:SSEMODE1248
10769 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10770 (match_operand:SI 2 "general_operand")))]
10773 /* If we were given a scalar, convert it to parallel */
10774 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10776 rtvec vs = rtvec_alloc (<ssescalarnum>);
10777 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10778 rtx neg = gen_reg_rtx (<MODE>mode);
10779 rtx reg = gen_reg_rtx (<MODE>mode);
10780 rtx op2 = operands[2];
10783 if (GET_MODE (op2) != <ssescalarmode>mode)
10785 op2 = gen_reg_rtx (<ssescalarmode>mode);
10786 convert_move (op2, operands[2], false);
10789 for (i = 0; i < <ssescalarnum>; i++)
10790 RTVEC_ELT (vs, i) = op2;
10792 emit_insn (gen_vec_init<mode> (reg, par));
10793 emit_insn (gen_neg<mode>2 (neg, reg));
10794 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
10799 (define_insn "sse5_rotl<mode>3"
10800 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10801 (rotate:SSEMODE1248
10802 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10803 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10805 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10806 [(set_attr "type" "sseishft")
10807 (set_attr "mode" "TI")])
10809 (define_insn "sse5_rotr<mode>3"
10810 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10811 (rotatert:SSEMODE1248
10812 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10813 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10816 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10817 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10819 [(set_attr "type" "sseishft")
10820 (set_attr "mode" "TI")])
10822 (define_expand "vrotr<mode>3"
10823 [(match_operand:SSEMODE1248 0 "register_operand" "")
10824 (match_operand:SSEMODE1248 1 "register_operand" "")
10825 (match_operand:SSEMODE1248 2 "register_operand" "")]
10828 rtx reg = gen_reg_rtx (<MODE>mode);
10829 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10830 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10834 (define_expand "vrotl<mode>3"
10835 [(match_operand:SSEMODE1248 0 "register_operand" "")
10836 (match_operand:SSEMODE1248 1 "register_operand" "")
10837 (match_operand:SSEMODE1248 2 "register_operand" "")]
10840 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10844 (define_insn "sse5_vrotl<mode>3"
10845 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10846 (if_then_else:SSEMODE1248
10848 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
10850 (rotate:SSEMODE1248
10851 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
10853 (rotatert:SSEMODE1248
10855 (neg:SSEMODE1248 (match_dup 2)))))]
10856 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
10857 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10858 [(set_attr "type" "sseishft")
10859 (set_attr "mode" "TI")])
10861 ;; SSE5 packed shift instructions.
10862 ;; FIXME: add V2DI back in
10863 (define_expand "vlshr<mode>3"
10864 [(match_operand:SSEMODE124 0 "register_operand" "")
10865 (match_operand:SSEMODE124 1 "register_operand" "")
10866 (match_operand:SSEMODE124 2 "register_operand" "")]
10869 rtx neg = gen_reg_rtx (<MODE>mode);
10870 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10871 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
10875 (define_expand "vashr<mode>3"
10876 [(match_operand:SSEMODE124 0 "register_operand" "")
10877 (match_operand:SSEMODE124 1 "register_operand" "")
10878 (match_operand:SSEMODE124 2 "register_operand" "")]
10881 rtx neg = gen_reg_rtx (<MODE>mode);
10882 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10883 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
10887 (define_expand "vashl<mode>3"
10888 [(match_operand:SSEMODE124 0 "register_operand" "")
10889 (match_operand:SSEMODE124 1 "register_operand" "")
10890 (match_operand:SSEMODE124 2 "register_operand" "")]
10893 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
10897 (define_insn "sse5_ashl<mode>3"
10898 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10899 (if_then_else:SSEMODE1248
10901 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
10903 (ashift:SSEMODE1248
10904 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
10906 (ashiftrt:SSEMODE1248
10908 (neg:SSEMODE1248 (match_dup 2)))))]
10909 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
10910 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10911 [(set_attr "type" "sseishft")
10912 (set_attr "mode" "TI")])
10914 (define_insn "sse5_lshl<mode>3"
10915 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10916 (if_then_else:SSEMODE1248
10918 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
10920 (ashift:SSEMODE1248
10921 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
10923 (lshiftrt:SSEMODE1248
10925 (neg:SSEMODE1248 (match_dup 2)))))]
10926 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
10927 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10928 [(set_attr "type" "sseishft")
10929 (set_attr "mode" "TI")])
10931 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
10932 (define_expand "ashlv16qi3"
10933 [(match_operand:V16QI 0 "register_operand" "")
10934 (match_operand:V16QI 1 "register_operand" "")
10935 (match_operand:SI 2 "nonmemory_operand" "")]
10938 rtvec vs = rtvec_alloc (16);
10939 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10940 rtx reg = gen_reg_rtx (V16QImode);
10942 for (i = 0; i < 16; i++)
10943 RTVEC_ELT (vs, i) = operands[2];
10945 emit_insn (gen_vec_initv16qi (reg, par));
10946 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
10950 (define_expand "lshlv16qi3"
10951 [(match_operand:V16QI 0 "register_operand" "")
10952 (match_operand:V16QI 1 "register_operand" "")
10953 (match_operand:SI 2 "nonmemory_operand" "")]
10956 rtvec vs = rtvec_alloc (16);
10957 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10958 rtx reg = gen_reg_rtx (V16QImode);
10960 for (i = 0; i < 16; i++)
10961 RTVEC_ELT (vs, i) = operands[2];
10963 emit_insn (gen_vec_initv16qi (reg, par));
10964 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
10968 (define_expand "ashrv16qi3"
10969 [(match_operand:V16QI 0 "register_operand" "")
10970 (match_operand:V16QI 1 "register_operand" "")
10971 (match_operand:SI 2 "nonmemory_operand" "")]
10974 rtvec vs = rtvec_alloc (16);
10975 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
10976 rtx reg = gen_reg_rtx (V16QImode);
10978 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
10979 ? GEN_INT (- INTVAL (operands[2]))
10982 for (i = 0; i < 16; i++)
10983 RTVEC_ELT (vs, i) = ele;
10985 emit_insn (gen_vec_initv16qi (reg, par));
10987 if (GET_CODE (operands[2]) != CONST_INT)
10989 rtx neg = gen_reg_rtx (V16QImode);
10990 emit_insn (gen_negv16qi2 (neg, reg));
10991 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
10994 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
10999 (define_expand "ashrv2di3"
11000 [(match_operand:V2DI 0 "register_operand" "")
11001 (match_operand:V2DI 1 "register_operand" "")
11002 (match_operand:DI 2 "nonmemory_operand" "")]
11005 rtvec vs = rtvec_alloc (2);
11006 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11007 rtx reg = gen_reg_rtx (V2DImode);
11010 if (GET_CODE (operands[2]) == CONST_INT)
11011 ele = GEN_INT (- INTVAL (operands[2]));
11012 else if (GET_MODE (operands[2]) != DImode)
11014 rtx move = gen_reg_rtx (DImode);
11015 ele = gen_reg_rtx (DImode);
11016 convert_move (move, operands[2], false);
11017 emit_insn (gen_negdi2 (ele, move));
11021 ele = gen_reg_rtx (DImode);
11022 emit_insn (gen_negdi2 (ele, operands[2]));
11025 RTVEC_ELT (vs, 0) = ele;
11026 RTVEC_ELT (vs, 1) = ele;
11027 emit_insn (gen_vec_initv2di (reg, par));
11028 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11032 ;; SSE5 FRCZ support
11034 (define_insn "sse5_frcz<mode>2"
11035 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11037 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11040 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11041 [(set_attr "type" "ssecvt1")
11042 (set_attr "prefix_extra" "1")
11043 (set_attr "mode" "<MODE>")])
11046 (define_insn "sse5_vmfrcz<mode>2"
11047 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11048 (vec_merge:SSEMODEF2P
11050 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11052 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11055 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11056 [(set_attr "type" "ssecvt1")
11057 (set_attr "prefix_extra" "1")
11058 (set_attr "mode" "<MODE>")])
11060 (define_insn "sse5_cvtph2ps"
11061 [(set (match_operand:V4SF 0 "register_operand" "=x")
11062 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11065 "cvtph2ps\t{%1, %0|%0, %1}"
11066 [(set_attr "type" "ssecvt")
11067 (set_attr "mode" "V4SF")])
11069 (define_insn "sse5_cvtps2ph"
11070 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11071 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11074 "cvtps2ph\t{%1, %0|%0, %1}"
11075 [(set_attr "type" "ssecvt")
11076 (set_attr "mode" "V4SF")])
11078 ;; Scalar versions of the com instructions that use vector types that are
11079 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11080 ;; com instructions fill in 0's in the upper bits instead of leaving them
11081 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11082 (define_expand "sse5_vmmaskcmp<mode>3"
11083 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11084 (vec_merge:SSEMODEF2P
11085 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11086 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11087 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11092 operands[4] = CONST0_RTX (<MODE>mode);
11095 (define_insn "*sse5_vmmaskcmp<mode>3"
11096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11097 (vec_merge:SSEMODEF2P
11098 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11099 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11100 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11101 (match_operand:SSEMODEF2P 4 "")
11104 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11105 [(set_attr "type" "sse4arg")
11106 (set_attr "mode" "<ssescalarmode>")])
11108 ;; We don't have a comparison operator that always returns true/false, so
11109 ;; handle comfalse and comtrue specially.
11110 (define_insn "sse5_com_tf<mode>3"
11111 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11113 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11114 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11115 (match_operand:SI 3 "const_int_operand" "n")]
11116 UNSPEC_SSE5_TRUEFALSE))]
11119 const char *ret = NULL;
11121 switch (INTVAL (operands[3]))
11124 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11128 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11132 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11136 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11140 gcc_unreachable ();
11145 [(set_attr "type" "ssecmp")
11146 (set_attr "mode" "<MODE>")])
11148 (define_insn "sse5_maskcmp<mode>3"
11149 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11150 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11151 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11152 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11154 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11155 [(set_attr "type" "ssecmp")
11156 (set_attr "mode" "<MODE>")])
11158 (define_insn "sse5_maskcmp<mode>3"
11159 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11160 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11161 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11162 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11164 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11165 [(set_attr "type" "sse4arg")
11166 (set_attr "mode" "TI")])
11168 (define_insn "sse5_maskcmp_uns<mode>3"
11169 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11170 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11171 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11172 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11174 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11175 [(set_attr "type" "ssecmp")
11176 (set_attr "mode" "TI")])
11178 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11179 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11180 ;; the exact instruction generated for the intrinsic.
11181 (define_insn "sse5_maskcmp_uns2<mode>3"
11182 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11183 (unspec:SSEMODE1248
11184 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11185 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11186 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11187 UNSPEC_SSE5_UNSIGNED_CMP))]
11189 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11190 [(set_attr "type" "ssecmp")
11191 (set_attr "mode" "TI")])
11193 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11194 ;; being added here to be complete.
11195 (define_insn "sse5_pcom_tf<mode>3"
11196 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11197 (unspec:SSEMODE1248
11198 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11199 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11200 (match_operand:SI 3 "const_int_operand" "n")]
11201 UNSPEC_SSE5_TRUEFALSE))]
11204 return ((INTVAL (operands[3]) != 0)
11205 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11206 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11208 [(set_attr "type" "ssecmp")
11209 (set_attr "mode" "TI")])
11211 (define_insn "*avx_aesenc"
11212 [(set (match_operand:V2DI 0 "register_operand" "=x")
11213 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11214 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11216 "TARGET_AES && TARGET_AVX"
11217 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11218 [(set_attr "type" "sselog1")
11219 (set_attr "prefix" "vex")
11220 (set_attr "mode" "TI")])
11222 (define_insn "aesenc"
11223 [(set (match_operand:V2DI 0 "register_operand" "=x")
11224 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11225 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11228 "aesenc\t{%2, %0|%0, %2}"
11229 [(set_attr "type" "sselog1")
11230 (set_attr "prefix_extra" "1")
11231 (set_attr "mode" "TI")])
11233 (define_insn "*avx_aesenclast"
11234 [(set (match_operand:V2DI 0 "register_operand" "=x")
11235 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11236 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11237 UNSPEC_AESENCLAST))]
11238 "TARGET_AES && TARGET_AVX"
11239 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11240 [(set_attr "type" "sselog1")
11241 (set_attr "prefix" "vex")
11242 (set_attr "mode" "TI")])
11244 (define_insn "aesenclast"
11245 [(set (match_operand:V2DI 0 "register_operand" "=x")
11246 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11247 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11248 UNSPEC_AESENCLAST))]
11250 "aesenclast\t{%2, %0|%0, %2}"
11251 [(set_attr "type" "sselog1")
11252 (set_attr "prefix_extra" "1")
11253 (set_attr "mode" "TI")])
11255 (define_insn "*avx_aesdec"
11256 [(set (match_operand:V2DI 0 "register_operand" "=x")
11257 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11258 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11260 "TARGET_AES && TARGET_AVX"
11261 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11262 [(set_attr "type" "sselog1")
11263 (set_attr "prefix" "vex")
11264 (set_attr "mode" "TI")])
11266 (define_insn "aesdec"
11267 [(set (match_operand:V2DI 0 "register_operand" "=x")
11268 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11269 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11272 "aesdec\t{%2, %0|%0, %2}"
11273 [(set_attr "type" "sselog1")
11274 (set_attr "prefix_extra" "1")
11275 (set_attr "mode" "TI")])
11277 (define_insn "*avx_aesdeclast"
11278 [(set (match_operand:V2DI 0 "register_operand" "=x")
11279 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11280 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11281 UNSPEC_AESDECLAST))]
11282 "TARGET_AES && TARGET_AVX"
11283 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11284 [(set_attr "type" "sselog1")
11285 (set_attr "prefix" "vex")
11286 (set_attr "mode" "TI")])
11288 (define_insn "aesdeclast"
11289 [(set (match_operand:V2DI 0 "register_operand" "=x")
11290 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11291 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11292 UNSPEC_AESDECLAST))]
11294 "aesdeclast\t{%2, %0|%0, %2}"
11295 [(set_attr "type" "sselog1")
11296 (set_attr "prefix_extra" "1")
11297 (set_attr "mode" "TI")])
11299 (define_insn "aesimc"
11300 [(set (match_operand:V2DI 0 "register_operand" "=x")
11301 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11304 "%vaesimc\t{%1, %0|%0, %1}"
11305 [(set_attr "type" "sselog1")
11306 (set_attr "prefix_extra" "1")
11307 (set_attr "prefix" "maybe_vex")
11308 (set_attr "mode" "TI")])
11310 (define_insn "aeskeygenassist"
11311 [(set (match_operand:V2DI 0 "register_operand" "=x")
11312 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11313 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11314 UNSPEC_AESKEYGENASSIST))]
11316 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11317 [(set_attr "type" "sselog1")
11318 (set_attr "prefix_extra" "1")
11319 (set_attr "prefix" "maybe_vex")
11320 (set_attr "mode" "TI")])
11322 (define_insn "*vpclmulqdq"
11323 [(set (match_operand:V2DI 0 "register_operand" "=x")
11324 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11325 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11326 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11328 "TARGET_PCLMUL && TARGET_AVX"
11329 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11330 [(set_attr "type" "sselog1")
11331 (set_attr "prefix" "vex")
11332 (set_attr "mode" "TI")])
11334 (define_insn "pclmulqdq"
11335 [(set (match_operand:V2DI 0 "register_operand" "=x")
11336 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11337 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11338 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11341 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11342 [(set_attr "type" "sselog1")
11343 (set_attr "prefix_extra" "1")
11344 (set_attr "mode" "TI")])
11346 (define_expand "avx_vzeroall"
11347 [(match_par_dup 0 [(const_int 0)])]
11350 int nregs = TARGET_64BIT ? 16 : 8;
11353 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11355 XVECEXP (operands[0], 0, 0)
11356 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11359 for (regno = 0; regno < nregs; regno++)
11360 XVECEXP (operands[0], 0, regno + 1)
11361 = gen_rtx_SET (VOIDmode,
11362 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11363 CONST0_RTX (V8SImode));
11366 (define_insn "*avx_vzeroall"
11367 [(match_parallel 0 "vzeroall_operation"
11368 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11369 (set (match_operand 1 "register_operand" "=x")
11370 (match_operand 2 "const0_operand" "X"))])]
11373 [(set_attr "type" "sse")
11374 (set_attr "memory" "none")
11375 (set_attr "prefix" "vex")
11376 (set_attr "mode" "OI")])
11378 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11379 (define_insn "avx_vzeroupper"
11380 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11381 (clobber (reg:V8SI XMM0_REG))
11382 (clobber (reg:V8SI XMM1_REG))
11383 (clobber (reg:V8SI XMM2_REG))
11384 (clobber (reg:V8SI XMM3_REG))
11385 (clobber (reg:V8SI XMM4_REG))
11386 (clobber (reg:V8SI XMM5_REG))
11387 (clobber (reg:V8SI XMM6_REG))
11388 (clobber (reg:V8SI XMM7_REG))]
11389 "TARGET_AVX && !TARGET_64BIT"
11391 [(set_attr "type" "sse")
11392 (set_attr "memory" "none")
11393 (set_attr "prefix" "vex")
11394 (set_attr "mode" "OI")])
11396 (define_insn "avx_vzeroupper_rex64"
11397 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11398 (clobber (reg:V8SI XMM0_REG))
11399 (clobber (reg:V8SI XMM1_REG))
11400 (clobber (reg:V8SI XMM2_REG))
11401 (clobber (reg:V8SI XMM3_REG))
11402 (clobber (reg:V8SI XMM4_REG))
11403 (clobber (reg:V8SI XMM5_REG))
11404 (clobber (reg:V8SI XMM6_REG))
11405 (clobber (reg:V8SI XMM7_REG))
11406 (clobber (reg:V8SI XMM8_REG))
11407 (clobber (reg:V8SI XMM9_REG))
11408 (clobber (reg:V8SI XMM10_REG))
11409 (clobber (reg:V8SI XMM11_REG))
11410 (clobber (reg:V8SI XMM12_REG))
11411 (clobber (reg:V8SI XMM13_REG))
11412 (clobber (reg:V8SI XMM14_REG))
11413 (clobber (reg:V8SI XMM15_REG))]
11414 "TARGET_AVX && TARGET_64BIT"
11416 [(set_attr "type" "sse")
11417 (set_attr "memory" "none")
11418 (set_attr "prefix" "vex")
11419 (set_attr "mode" "OI")])
11421 (define_insn "avx_vpermil<mode>"
11422 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11424 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11425 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11428 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11429 [(set_attr "type" "sselog")
11430 (set_attr "prefix" "vex")
11431 (set_attr "mode" "<MODE>")])
11433 (define_insn "avx_vpermilvar<mode>3"
11434 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11436 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11437 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11440 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11441 [(set_attr "type" "sselog")
11442 (set_attr "prefix" "vex")
11443 (set_attr "mode" "<MODE>")])
11445 (define_insn "avx_vperm2f128<mode>3"
11446 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11447 (unspec:AVX256MODE2P
11448 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11449 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11450 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11451 UNSPEC_VPERMIL2F128))]
11453 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11454 [(set_attr "type" "sselog")
11455 (set_attr "prefix" "vex")
11456 (set_attr "mode" "V8SF")])
11458 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11459 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11460 (vec_concat:AVXMODEF4P
11461 (vec_concat:<avxhalfvecmode>
11462 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11464 (vec_concat:<avxhalfvecmode>
11468 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11469 [(set_attr "type" "ssemov")
11470 (set_attr "prefix" "vex")
11471 (set_attr "mode" "<avxscalarmode>")])
11473 (define_insn "avx_vbroadcastss256"
11474 [(set (match_operand:V8SF 0 "register_operand" "=x")
11478 (match_operand:SF 1 "memory_operand" "m")
11491 "vbroadcastss\t{%1, %0|%0, %1}"
11492 [(set_attr "type" "ssemov")
11493 (set_attr "prefix" "vex")
11494 (set_attr "mode" "SF")])
11496 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11497 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11498 (vec_concat:AVX256MODEF2P
11499 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11502 "vbroadcastf128\t{%1, %0|%0, %1}"
11503 [(set_attr "type" "ssemov")
11504 (set_attr "prefix" "vex")
11505 (set_attr "mode" "V4SF")])
11507 (define_expand "avx_vinsertf128<mode>"
11508 [(match_operand:AVX256MODE 0 "register_operand" "")
11509 (match_operand:AVX256MODE 1 "register_operand" "")
11510 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11511 (match_operand:SI 3 "const_0_to_1_operand" "")]
11514 switch (INTVAL (operands[3]))
11517 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11521 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11525 gcc_unreachable ();
11530 (define_insn "vec_set_lo_<mode>"
11531 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11532 (vec_concat:AVX256MODE4P
11533 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11534 (vec_select:<avxhalfvecmode>
11535 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11536 (parallel [(const_int 2) (const_int 3)]))))]
11538 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11539 [(set_attr "type" "sselog")
11540 (set_attr "prefix" "vex")
11541 (set_attr "mode" "V8SF")])
11543 (define_insn "vec_set_hi_<mode>"
11544 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11545 (vec_concat:AVX256MODE4P
11546 (vec_select:<avxhalfvecmode>
11547 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11548 (parallel [(const_int 0) (const_int 1)]))
11549 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11551 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11552 [(set_attr "type" "sselog")
11553 (set_attr "prefix" "vex")
11554 (set_attr "mode" "V8SF")])
11556 (define_insn "vec_set_lo_<mode>"
11557 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11558 (vec_concat:AVX256MODE8P
11559 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11560 (vec_select:<avxhalfvecmode>
11561 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11562 (parallel [(const_int 4) (const_int 5)
11563 (const_int 6) (const_int 7)]))))]
11565 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11566 [(set_attr "type" "sselog")
11567 (set_attr "prefix" "vex")
11568 (set_attr "mode" "V8SF")])
11570 (define_insn "vec_set_hi_<mode>"
11571 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11572 (vec_concat:AVX256MODE8P
11573 (vec_select:<avxhalfvecmode>
11574 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11575 (parallel [(const_int 0) (const_int 1)
11576 (const_int 2) (const_int 3)]))
11577 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11579 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11580 [(set_attr "type" "sselog")
11581 (set_attr "prefix" "vex")
11582 (set_attr "mode" "V8SF")])
11584 (define_insn "vec_set_lo_v16hi"
11585 [(set (match_operand:V16HI 0 "register_operand" "=x")
11587 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11589 (match_operand:V16HI 1 "register_operand" "x")
11590 (parallel [(const_int 8) (const_int 9)
11591 (const_int 10) (const_int 11)
11592 (const_int 12) (const_int 13)
11593 (const_int 14) (const_int 15)]))))]
11595 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11596 [(set_attr "type" "sselog")
11597 (set_attr "prefix" "vex")
11598 (set_attr "mode" "V8SF")])
11600 (define_insn "vec_set_hi_v16hi"
11601 [(set (match_operand:V16HI 0 "register_operand" "=x")
11604 (match_operand:V16HI 1 "register_operand" "x")
11605 (parallel [(const_int 0) (const_int 1)
11606 (const_int 2) (const_int 3)
11607 (const_int 4) (const_int 5)
11608 (const_int 6) (const_int 7)]))
11609 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11611 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11612 [(set_attr "type" "sselog")
11613 (set_attr "prefix" "vex")
11614 (set_attr "mode" "V8SF")])
11616 (define_insn "vec_set_lo_v32qi"
11617 [(set (match_operand:V32QI 0 "register_operand" "=x")
11619 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11621 (match_operand:V32QI 1 "register_operand" "x")
11622 (parallel [(const_int 16) (const_int 17)
11623 (const_int 18) (const_int 19)
11624 (const_int 20) (const_int 21)
11625 (const_int 22) (const_int 23)
11626 (const_int 24) (const_int 25)
11627 (const_int 26) (const_int 27)
11628 (const_int 28) (const_int 29)
11629 (const_int 30) (const_int 31)]))))]
11631 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11632 [(set_attr "type" "sselog")
11633 (set_attr "prefix" "vex")
11634 (set_attr "mode" "V8SF")])
11636 (define_insn "vec_set_hi_v32qi"
11637 [(set (match_operand:V32QI 0 "register_operand" "=x")
11640 (match_operand:V32QI 1 "register_operand" "x")
11641 (parallel [(const_int 0) (const_int 1)
11642 (const_int 2) (const_int 3)
11643 (const_int 4) (const_int 5)
11644 (const_int 6) (const_int 7)
11645 (const_int 8) (const_int 9)
11646 (const_int 10) (const_int 11)
11647 (const_int 12) (const_int 13)
11648 (const_int 14) (const_int 15)]))
11649 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11651 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11652 [(set_attr "type" "sselog")
11653 (set_attr "prefix" "vex")
11654 (set_attr "mode" "V8SF")])
11656 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11657 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11659 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11660 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11664 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11665 [(set_attr "type" "sselog1")
11666 (set_attr "prefix" "vex")
11667 (set_attr "mode" "<MODE>")])
11669 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11670 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11672 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11673 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11675 UNSPEC_MASKSTORE))]
11677 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11678 [(set_attr "type" "sselog1")
11679 (set_attr "prefix" "vex")
11680 (set_attr "mode" "<MODE>")])
11682 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11683 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11684 (unspec:AVX256MODE2P
11685 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
11689 switch (which_alternative)
11694 switch (get_attr_mode (insn))
11697 return "vmovaps\t{%1, %x0|%x0, %1}";
11699 return "vmovapd\t{%1, %x0|%x0, %1}";
11701 return "vmovdqa\t{%1, %x0|%x0, %1}";
11708 gcc_unreachable ();
11710 [(set_attr "type" "ssemov")
11711 (set_attr "prefix" "vex")
11712 (set_attr "mode" "<avxvecmode>")
11713 (set (attr "length")
11714 (if_then_else (eq_attr "alternative" "0")
11716 (const_string "*")))])
11718 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
11719 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
11720 (unspec:<avxhalfvecmode>
11721 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
11725 switch (which_alternative)
11730 switch (get_attr_mode (insn))
11733 return "vmovaps\t{%x1, %0|%0, %x1}";
11735 return "vmovapd\t{%x1, %0|%0, %x1}";
11737 return "vmovdqa\t{%x1, %0|%0, %x1}";
11744 gcc_unreachable ();
11746 [(set_attr "type" "ssemov")
11747 (set_attr "prefix" "vex")
11748 (set_attr "mode" "<avxvecmode>")
11749 (set (attr "length")
11750 (if_then_else (eq_attr "alternative" "0")
11752 (const_string "*")))])
11754 (define_expand "vec_init<mode>"
11755 [(match_operand:AVX256MODE 0 "register_operand" "")
11756 (match_operand 1 "" "")]
11759 ix86_expand_vector_init (false, operands[0], operands[1]);
11763 (define_insn "*vec_concat<mode>_avx"
11764 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11765 (vec_concat:AVX256MODE
11766 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11767 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11770 switch (which_alternative)
11773 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11775 switch (get_attr_mode (insn))
11778 return "vmovaps\t{%1, %x0|%x0, %1}";
11780 return "vmovapd\t{%1, %x0|%x0, %1}";
11782 return "vmovdqa\t{%1, %x0|%x0, %1}";
11785 gcc_unreachable ();
11788 [(set_attr "type" "sselog,ssemov")
11789 (set_attr "prefix" "vex")
11790 (set_attr "mode" "<avxvecmode>")])