1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Modes handled by integer vcond pattern
68 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
69 (V2DI "TARGET_SSE4_2 || TARGET_SSE5")])
71 ;; Mapping from float mode to required SSE level
72 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
74 ;; Mapping from integer vector mode to mnemonic suffix
75 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
77 ;; Mapping of the sse5 suffix
78 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
79 (V4SF "ps") (V2DF "pd")])
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
81 (V4SF "ss") (V2DF "sd")])
82 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
84 ;; Mapping of the max integer size for sse5 rotate immediate constraint
85 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
87 ;; Mapping of vector modes back to the scalar modes
88 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
89 (V16QI "QI") (V8HI "HI")
90 (V4SI "SI") (V2DI "DI")])
92 ;; Mapping of vector modes to a vector mode of double size
93 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
94 (V4SF "V8SF") (V4SI "V8SI")])
96 ;; Number of scalar elements in each vector type
97 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
98 (V16QI "16") (V8HI "8")
99 (V4SI "4") (V2DI "2")])
102 (define_mode_attr avxvecmode
103 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
104 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
105 (V8SF "V8SF") (V4DF "V4DF")])
106 (define_mode_attr avxvecpsmode
107 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
108 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
109 (define_mode_attr avxhalfvecmode
110 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
111 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
112 (define_mode_attr avxscalarmode
113 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
114 (V8SF "SF") (V4DF "DF")])
115 (define_mode_attr avxcvtvecmode
116 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
117 (define_mode_attr avxpermvecmode
118 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
119 (define_mode_attr avxmodesuffixf2c
120 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
121 (define_mode_attr avxmodesuffixp
122 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
124 (define_mode_attr avxmodesuffixs
125 [(V16QI "b") (V8HI "w") (V4SI "d")])
126 (define_mode_attr avxmodesuffix
127 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
128 (V8SI "256") (V8SF "256") (V4DF "256")])
130 ;; Mapping of immediate bits for blend instructions
131 (define_mode_attr blendbits
132 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
134 ;; Mapping of immediate bits for vpermil instructions
135 (define_mode_attr vpermilbits
136 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
138 ;; Mapping of immediate bits for pinsr instructions
139 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
141 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
149 (define_expand "mov<mode>"
150 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
151 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
154 ix86_expand_vector_move (<MODE>mode, operands);
158 (define_insn "*avx_mov<mode>_internal"
159 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
160 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
162 && (register_operand (operands[0], <MODE>mode)
163 || register_operand (operands[1], <MODE>mode))"
165 switch (which_alternative)
168 return standard_sse_constant_opcode (insn, operands[1]);
171 switch (get_attr_mode (insn))
175 return "vmovaps\t{%1, %0|%0, %1}";
178 return "vmovapd\t{%1, %0|%0, %1}";
180 return "vmovdqa\t{%1, %0|%0, %1}";
186 [(set_attr "type" "sselog1,ssemov,ssemov")
187 (set_attr "prefix" "vex")
188 (set_attr "mode" "<avxvecmode>")])
190 ;; All of these patterns are enabled for SSE1 as well as SSE2.
191 ;; This is essential for maintaining stable calling conventions.
193 (define_expand "mov<mode>"
194 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
195 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
198 ix86_expand_vector_move (<MODE>mode, operands);
202 (define_insn "*mov<mode>_internal"
203 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
204 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
206 && (register_operand (operands[0], <MODE>mode)
207 || register_operand (operands[1], <MODE>mode))"
209 switch (which_alternative)
212 return standard_sse_constant_opcode (insn, operands[1]);
215 switch (get_attr_mode (insn))
218 return "movaps\t{%1, %0|%0, %1}";
220 return "movapd\t{%1, %0|%0, %1}";
222 return "movdqa\t{%1, %0|%0, %1}";
228 [(set_attr "type" "sselog1,ssemov,ssemov")
230 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
231 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
232 (and (eq_attr "alternative" "2")
233 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
235 (const_string "V4SF")
236 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
237 (const_string "V4SF")
238 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
239 (const_string "V2DF")
241 (const_string "TI")))])
243 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
244 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
245 ;; from memory, we'd prefer to load the memory directly into the %xmm
246 ;; register. To facilitate this happy circumstance, this pattern won't
247 ;; split until after register allocation. If the 64-bit value didn't
248 ;; come from memory, this is the best we can do. This is much better
249 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
252 (define_insn_and_split "movdi_to_sse"
254 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
255 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
256 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
257 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
259 "&& reload_completed"
262 if (register_operand (operands[1], DImode))
264 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
265 Assemble the 64-bit DImode value in an xmm register. */
266 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
267 gen_rtx_SUBREG (SImode, operands[1], 0)));
268 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
269 gen_rtx_SUBREG (SImode, operands[1], 4)));
270 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
272 else if (memory_operand (operands[1], DImode))
273 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
279 [(set (match_operand:V4SF 0 "register_operand" "")
280 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
281 "TARGET_SSE && reload_completed"
284 (vec_duplicate:V4SF (match_dup 1))
288 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
289 operands[2] = CONST0_RTX (V4SFmode);
293 [(set (match_operand:V2DF 0 "register_operand" "")
294 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
295 "TARGET_SSE2 && reload_completed"
296 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
298 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
299 operands[2] = CONST0_RTX (DFmode);
302 (define_expand "push<mode>1"
303 [(match_operand:AVX256MODE 0 "register_operand" "")]
306 ix86_expand_push (<MODE>mode, operands[0]);
310 (define_expand "push<mode>1"
311 [(match_operand:SSEMODE 0 "register_operand" "")]
314 ix86_expand_push (<MODE>mode, operands[0]);
318 (define_expand "movmisalign<mode>"
319 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
320 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
323 ix86_expand_vector_move_misalign (<MODE>mode, operands);
327 (define_expand "movmisalign<mode>"
328 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
329 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
332 ix86_expand_vector_move_misalign (<MODE>mode, operands);
336 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
337 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
339 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
341 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
343 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
344 [(set_attr "type" "ssemov")
345 (set_attr "prefix" "vex")
346 (set_attr "mode" "<MODE>")])
348 (define_insn "sse2_movq128"
349 [(set (match_operand:V2DI 0 "register_operand" "=x")
352 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
353 (parallel [(const_int 0)]))
356 "%vmovq\t{%1, %0|%0, %1}"
357 [(set_attr "type" "ssemov")
358 (set_attr "prefix" "maybe_vex")
359 (set_attr "mode" "TI")])
361 (define_insn "<sse>_movup<ssemodesuffixf2c>"
362 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
364 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
366 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
367 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
368 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
369 [(set_attr "type" "ssemov")
370 (set_attr "mode" "<MODE>")])
372 (define_insn "avx_movdqu<avxmodesuffix>"
373 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
375 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
377 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
378 "vmovdqu\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "vex")
381 (set_attr "mode" "<avxvecmode>")])
383 (define_insn "sse2_movdqu"
384 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
385 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
387 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
388 "movdqu\t{%1, %0|%0, %1}"
389 [(set_attr "type" "ssemov")
390 (set_attr "prefix_data16" "1")
391 (set_attr "mode" "TI")])
393 (define_insn "avx_movnt<mode>"
394 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
396 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
398 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
399 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
400 [(set_attr "type" "ssemov")
401 (set_attr "prefix" "vex")
402 (set_attr "mode" "<MODE>")])
404 (define_insn "<sse>_movnt<mode>"
405 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
407 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
409 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
410 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
411 [(set_attr "type" "ssemov")
412 (set_attr "mode" "<MODE>")])
414 (define_insn "avx_movnt<mode>"
415 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
417 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
420 "vmovntdq\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssecvt")
422 (set_attr "prefix" "vex")
423 (set_attr "mode" "<avxvecmode>")])
425 (define_insn "sse2_movntv2di"
426 [(set (match_operand:V2DI 0 "memory_operand" "=m")
427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
430 "movntdq\t{%1, %0|%0, %1}"
431 [(set_attr "type" "ssecvt")
432 (set_attr "prefix_data16" "1")
433 (set_attr "mode" "TI")])
435 (define_insn "sse2_movntsi"
436 [(set (match_operand:SI 0 "memory_operand" "=m")
437 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
440 "movnti\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssecvt")
442 (set_attr "mode" "V2DF")])
444 (define_insn "avx_lddqu<avxmodesuffix>"
445 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
447 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
450 "vlddqu\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssecvt")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse3_lddqu"
456 [(set (match_operand:V16QI 0 "register_operand" "=x")
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
460 "lddqu\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssecvt")
462 (set_attr "prefix_rep" "1")
463 (set_attr "mode" "TI")])
465 ; Expand patterns for non-temporal stores. At the moment, only those
466 ; that directly map to insns are defined; it would be possible to
467 ; define patterns for other modes that would expand to several insns.
469 (define_expand "storent<mode>"
470 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
472 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
474 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
477 (define_expand "storent<mode>"
478 [(set (match_operand:MODEF 0 "memory_operand" "")
480 [(match_operand:MODEF 1 "register_operand" "")]
485 (define_expand "storentv2di"
486 [(set (match_operand:V2DI 0 "memory_operand" "")
487 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
492 (define_expand "storentsi"
493 [(set (match_operand:SI 0 "memory_operand" "")
494 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 ;; Parallel floating point arithmetic
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 (define_expand "<code><mode>2"
506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
508 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
509 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
510 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
512 (define_expand "<plusminus_insn><mode>3"
513 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
514 (plusminus:AVX256MODEF2P
515 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
516 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
517 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
518 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
520 (define_insn "*avx_<plusminus_insn><mode>3"
521 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
522 (plusminus:AVXMODEF2P
523 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
524 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
525 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
526 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
527 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
528 [(set_attr "type" "sseadd")
529 (set_attr "prefix" "vex")
530 (set_attr "mode" "<avxvecmode>")])
532 (define_expand "<plusminus_insn><mode>3"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
534 (plusminus:SSEMODEF2P
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
538 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
540 (define_insn "*<plusminus_insn><mode>3"
541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
542 (plusminus:SSEMODEF2P
543 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
544 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
545 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
546 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
547 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
548 [(set_attr "type" "sseadd")
549 (set_attr "mode" "<MODE>")])
551 (define_insn "*avx_vm<plusminus_insn><mode>3"
552 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
553 (vec_merge:SSEMODEF2P
554 (plusminus:SSEMODEF2P
555 (match_operand:SSEMODEF2P 1 "register_operand" "x")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
559 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
560 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<ssescalarmode>")])
565 (define_insn "<sse>_vm<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
567 (vec_merge:SSEMODEF2P
568 (plusminus:SSEMODEF2P
569 (match_operand:SSEMODEF2P 1 "register_operand" "0")
570 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
573 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
574 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
575 [(set_attr "type" "sseadd")
576 (set_attr "mode" "<ssescalarmode>")])
578 (define_expand "mul<mode>3"
579 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
581 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
582 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
583 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
584 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
586 (define_insn "*avx_mul<mode>3"
587 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
589 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
590 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
591 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
592 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
593 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "ssemul")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<avxvecmode>")])
598 (define_expand "mul<mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
601 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
602 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
603 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
604 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
606 (define_insn "*mul<mode>3"
607 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
609 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
610 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
611 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
612 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
613 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
614 [(set_attr "type" "ssemul")
615 (set_attr "mode" "<MODE>")])
617 (define_insn "*avx_vmmul<mode>3"
618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
619 (vec_merge:SSEMODEF2P
621 (match_operand:SSEMODEF2P 1 "register_operand" "x")
622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
625 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
626 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<ssescalarmode>")])
631 (define_insn "<sse>_vmmul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
633 (vec_merge:SSEMODEF2P
635 (match_operand:SSEMODEF2P 1 "register_operand" "0")
636 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
639 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
640 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
641 [(set_attr "type" "ssemul")
642 (set_attr "mode" "<ssescalarmode>")])
644 (define_expand "divv8sf3"
645 [(set (match_operand:V8SF 0 "register_operand" "")
646 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
647 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
650 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
652 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
653 && flag_finite_math_only && !flag_trapping_math
654 && flag_unsafe_math_optimizations)
656 ix86_emit_swdivsf (operands[0], operands[1],
657 operands[2], V8SFmode);
662 (define_expand "divv4df3"
663 [(set (match_operand:V4DF 0 "register_operand" "")
664 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
665 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
667 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
669 (define_insn "avx_div<mode>3"
670 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
672 (match_operand:AVXMODEF2P 1 "register_operand" "x")
673 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
674 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
675 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
676 [(set_attr "type" "ssediv")
677 (set_attr "prefix" "vex")
678 (set_attr "mode" "<MODE>")])
680 (define_expand "divv4sf3"
681 [(set (match_operand:V4SF 0 "register_operand" "")
682 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
683 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
686 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
687 && flag_finite_math_only && !flag_trapping_math
688 && flag_unsafe_math_optimizations)
690 ix86_emit_swdivsf (operands[0], operands[1],
691 operands[2], V4SFmode);
696 (define_expand "divv2df3"
697 [(set (match_operand:V2DF 0 "register_operand" "")
698 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
703 (define_insn "*avx_div<mode>3"
704 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
706 (match_operand:SSEMODEF2P 1 "register_operand" "x")
707 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
708 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
709 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
710 [(set_attr "type" "ssediv")
711 (set_attr "prefix" "vex")
712 (set_attr "mode" "<MODE>")])
714 (define_insn "<sse>_div<mode>3"
715 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
717 (match_operand:SSEMODEF2P 1 "register_operand" "0")
718 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
719 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
721 [(set_attr "type" "ssediv")
722 (set_attr "mode" "<MODE>")])
724 (define_insn "*avx_vmdiv<mode>3"
725 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
726 (vec_merge:SSEMODEF2P
728 (match_operand:SSEMODEF2P 1 "register_operand" "x")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
732 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
733 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
734 [(set_attr "type" "ssediv")
735 (set_attr "prefix" "vex")
736 (set_attr "mode" "<ssescalarmode>")])
738 (define_insn "<sse>_vmdiv<mode>3"
739 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
740 (vec_merge:SSEMODEF2P
742 (match_operand:SSEMODEF2P 1 "register_operand" "0")
743 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
746 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
747 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
748 [(set_attr "type" "ssediv")
749 (set_attr "mode" "<ssescalarmode>")])
751 (define_insn "avx_rcpv8sf2"
752 [(set (match_operand:V8SF 0 "register_operand" "=x")
754 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
756 "vrcpps\t{%1, %0|%0, %1}"
757 [(set_attr "type" "sse")
758 (set_attr "prefix" "vex")
759 (set_attr "mode" "V8SF")])
761 (define_insn "sse_rcpv4sf2"
762 [(set (match_operand:V4SF 0 "register_operand" "=x")
764 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
766 "%vrcpps\t{%1, %0|%0, %1}"
767 [(set_attr "type" "sse")
768 (set_attr "prefix" "maybe_vex")
769 (set_attr "mode" "V4SF")])
771 (define_insn "*avx_vmrcpv4sf2"
772 [(set (match_operand:V4SF 0 "register_operand" "=x")
774 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
776 (match_operand:V4SF 2 "register_operand" "x")
779 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
780 [(set_attr "type" "sse")
781 (set_attr "prefix" "vex")
782 (set_attr "mode" "SF")])
784 (define_insn "sse_vmrcpv4sf2"
785 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
789 (match_operand:V4SF 2 "register_operand" "0")
792 "rcpss\t{%1, %0|%0, %1}"
793 [(set_attr "type" "sse")
794 (set_attr "mode" "SF")])
796 (define_expand "sqrtv8sf2"
797 [(set (match_operand:V8SF 0 "register_operand" "")
798 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
801 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
802 && flag_finite_math_only && !flag_trapping_math
803 && flag_unsafe_math_optimizations)
805 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
810 (define_insn "avx_sqrtv8sf2"
811 [(set (match_operand:V8SF 0 "register_operand" "=x")
812 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
814 "vsqrtps\t{%1, %0|%0, %1}"
815 [(set_attr "type" "sse")
816 (set_attr "prefix" "vex")
817 (set_attr "mode" "V8SF")])
819 (define_expand "sqrtv4sf2"
820 [(set (match_operand:V4SF 0 "register_operand" "")
821 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
824 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
825 && flag_finite_math_only && !flag_trapping_math
826 && flag_unsafe_math_optimizations)
828 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
833 (define_insn "sse_sqrtv4sf2"
834 [(set (match_operand:V4SF 0 "register_operand" "=x")
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
837 "%vsqrtps\t{%1, %0|%0, %1}"
838 [(set_attr "type" "sse")
839 (set_attr "prefix" "maybe_vex")
840 (set_attr "mode" "V4SF")])
842 (define_insn "sqrtv4df2"
843 [(set (match_operand:V4DF 0 "register_operand" "=x")
844 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
846 "vsqrtpd\t{%1, %0|%0, %1}"
847 [(set_attr "type" "sse")
848 (set_attr "prefix" "vex")
849 (set_attr "mode" "V4DF")])
851 (define_insn "sqrtv2df2"
852 [(set (match_operand:V2DF 0 "register_operand" "=x")
853 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
855 "%vsqrtpd\t{%1, %0|%0, %1}"
856 [(set_attr "type" "sse")
857 (set_attr "prefix" "maybe_vex")
858 (set_attr "mode" "V2DF")])
860 (define_insn "*avx_vmsqrt<mode>2"
861 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
862 (vec_merge:SSEMODEF2P
864 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
865 (match_operand:SSEMODEF2P 2 "register_operand" "x")
867 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
868 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "prefix" "vex")
871 (set_attr "mode" "<ssescalarmode>")])
873 (define_insn "<sse>_vmsqrt<mode>2"
874 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
875 (vec_merge:SSEMODEF2P
877 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
878 (match_operand:SSEMODEF2P 2 "register_operand" "0")
880 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
881 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse")
883 (set_attr "mode" "<ssescalarmode>")])
885 (define_expand "rsqrtv8sf2"
886 [(set (match_operand:V8SF 0 "register_operand" "")
888 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
889 "TARGET_AVX && TARGET_SSE_MATH"
891 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
895 (define_insn "avx_rsqrtv8sf2"
896 [(set (match_operand:V8SF 0 "register_operand" "=x")
898 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
900 "vrsqrtps\t{%1, %0|%0, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "V8SF")])
905 (define_expand "rsqrtv4sf2"
906 [(set (match_operand:V4SF 0 "register_operand" "")
908 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
911 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
915 (define_insn "sse_rsqrtv4sf2"
916 [(set (match_operand:V4SF 0 "register_operand" "=x")
918 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
920 "%vrsqrtps\t{%1, %0|%0, %1}"
921 [(set_attr "type" "sse")
922 (set_attr "prefix" "maybe_vex")
923 (set_attr "mode" "V4SF")])
925 (define_insn "*avx_vmrsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x")
928 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
930 (match_operand:V4SF 2 "register_operand" "x")
933 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "SF")])
938 (define_insn "sse_vmrsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "=x")
941 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
943 (match_operand:V4SF 2 "register_operand" "0")
946 "rsqrtss\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sse")
948 (set_attr "mode" "SF")])
950 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
951 ;; isn't really correct, as those rtl operators aren't defined when
952 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
954 (define_expand "<code><mode>3"
955 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
956 (smaxmin:AVX256MODEF2P
957 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
958 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
959 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
961 if (!flag_finite_math_only)
962 operands[1] = force_reg (<MODE>mode, operands[1]);
963 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
966 (define_expand "<code><mode>3"
967 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
969 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
970 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
971 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
973 if (!flag_finite_math_only)
974 operands[1] = force_reg (<MODE>mode, operands[1]);
975 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
978 (define_insn "*avx_<code><mode>3_finite"
979 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
981 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
982 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
983 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
985 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
986 [(set_attr "type" "sseadd")
987 (set_attr "prefix" "vex")
988 (set_attr "mode" "<MODE>")])
990 (define_insn "*<code><mode>3_finite"
991 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
993 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
994 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
995 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
996 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
997 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
998 [(set_attr "type" "sseadd")
999 (set_attr "mode" "<MODE>")])
1001 (define_insn "*avx_<code><mode>3"
1002 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1004 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1005 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1006 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1007 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1008 [(set_attr "type" "sseadd")
1009 (set_attr "prefix" "vex")
1010 (set_attr "mode" "<avxvecmode>")])
1012 (define_insn "*<code><mode>3"
1013 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1015 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1016 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1017 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1018 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "mode" "<MODE>")])
1022 (define_insn "*avx_vm<code><mode>3"
1023 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1024 (vec_merge:SSEMODEF2P
1026 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1030 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1031 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1032 [(set_attr "type" "sse")
1033 (set_attr "prefix" "vex")
1034 (set_attr "mode" "<ssescalarmode>")])
1036 (define_insn "<sse>_vm<code><mode>3"
1037 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1038 (vec_merge:SSEMODEF2P
1040 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1041 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1044 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1045 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1046 [(set_attr "type" "sse")
1047 (set_attr "mode" "<ssescalarmode>")])
1049 ;; These versions of the min/max patterns implement exactly the operations
1050 ;; min = (op1 < op2 ? op1 : op2)
1051 ;; max = (!(op1 < op2) ? op1 : op2)
1052 ;; Their operands are not commutative, and thus they may be used in the
1053 ;; presence of -0.0 and NaN.
1055 (define_insn "*avx_ieee_smin<mode>3"
1056 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1058 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1059 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1061 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1062 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1063 [(set_attr "type" "sseadd")
1064 (set_attr "prefix" "vex")
1065 (set_attr "mode" "<avxvecmode>")])
1067 (define_insn "*avx_ieee_smax<mode>3"
1068 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1070 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1071 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1073 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1074 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1075 [(set_attr "type" "sseadd")
1076 (set_attr "prefix" "vex")
1077 (set_attr "mode" "<avxvecmode>")])
1079 (define_insn "*ieee_smin<mode>3"
1080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1082 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1083 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1085 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1086 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1087 [(set_attr "type" "sseadd")
1088 (set_attr "mode" "<MODE>")])
1090 (define_insn "*ieee_smax<mode>3"
1091 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1093 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1094 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1096 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1097 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1098 [(set_attr "type" "sseadd")
1099 (set_attr "mode" "<MODE>")])
1101 (define_insn "avx_addsubv8sf3"
1102 [(set (match_operand:V8SF 0 "register_operand" "=x")
1105 (match_operand:V8SF 1 "register_operand" "x")
1106 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1107 (minus:V8SF (match_dup 1) (match_dup 2))
1110 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sseadd")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "V8SF")])
1115 (define_insn "avx_addsubv4df3"
1116 [(set (match_operand:V4DF 0 "register_operand" "=x")
1119 (match_operand:V4DF 1 "register_operand" "x")
1120 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1121 (minus:V4DF (match_dup 1) (match_dup 2))
1124 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1125 [(set_attr "type" "sseadd")
1126 (set_attr "prefix" "vex")
1127 (set_attr "mode" "V4DF")])
1129 (define_insn "*avx_addsubv4sf3"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1133 (match_operand:V4SF 1 "register_operand" "x")
1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1135 (minus:V4SF (match_dup 1) (match_dup 2))
1138 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1139 [(set_attr "type" "sseadd")
1140 (set_attr "prefix" "vex")
1141 (set_attr "mode" "V4SF")])
1143 (define_insn "sse3_addsubv4sf3"
1144 [(set (match_operand:V4SF 0 "register_operand" "=x")
1147 (match_operand:V4SF 1 "register_operand" "0")
1148 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1149 (minus:V4SF (match_dup 1) (match_dup 2))
1152 "addsubps\t{%2, %0|%0, %2}"
1153 [(set_attr "type" "sseadd")
1154 (set_attr "prefix_rep" "1")
1155 (set_attr "mode" "V4SF")])
1157 (define_insn "*avx_addsubv2df3"
1158 [(set (match_operand:V2DF 0 "register_operand" "=x")
1161 (match_operand:V2DF 1 "register_operand" "x")
1162 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1163 (minus:V2DF (match_dup 1) (match_dup 2))
1166 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1167 [(set_attr "type" "sseadd")
1168 (set_attr "prefix" "vex")
1169 (set_attr "mode" "V2DF")])
1171 (define_insn "sse3_addsubv2df3"
1172 [(set (match_operand:V2DF 0 "register_operand" "=x")
1175 (match_operand:V2DF 1 "register_operand" "0")
1176 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1177 (minus:V2DF (match_dup 1) (match_dup 2))
1180 "addsubpd\t{%2, %0|%0, %2}"
1181 [(set_attr "type" "sseadd")
1182 (set_attr "mode" "V2DF")])
1184 (define_insn "avx_h<plusminus_insn>v4df3"
1185 [(set (match_operand:V4DF 0 "register_operand" "=x")
1190 (match_operand:V4DF 1 "register_operand" "x")
1191 (parallel [(const_int 0)]))
1192 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1194 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1195 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1199 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1200 (parallel [(const_int 0)]))
1201 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1203 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1204 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1206 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1207 [(set_attr "type" "sseadd")
1208 (set_attr "prefix" "vex")
1209 (set_attr "mode" "V4DF")])
1211 (define_insn "avx_h<plusminus_insn>v8sf3"
1212 [(set (match_operand:V8SF 0 "register_operand" "=x")
1218 (match_operand:V8SF 1 "register_operand" "x")
1219 (parallel [(const_int 0)]))
1220 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1222 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1223 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1227 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1228 (parallel [(const_int 0)]))
1229 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1231 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1232 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1237 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1239 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1240 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1243 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1249 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1250 [(set_attr "type" "sseadd")
1251 (set_attr "prefix" "vex")
1252 (set_attr "mode" "V8SF")])
1254 (define_insn "*avx_h<plusminus_insn>v4sf3"
1255 [(set (match_operand:V4SF 0 "register_operand" "=x")
1260 (match_operand:V4SF 1 "register_operand" "x")
1261 (parallel [(const_int 0)]))
1262 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1264 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1265 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1269 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1270 (parallel [(const_int 0)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1273 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1274 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1276 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1277 [(set_attr "type" "sseadd")
1278 (set_attr "prefix" "vex")
1279 (set_attr "mode" "V4SF")])
1281 (define_insn "sse3_h<plusminus_insn>v4sf3"
1282 [(set (match_operand:V4SF 0 "register_operand" "=x")
1287 (match_operand:V4SF 1 "register_operand" "0")
1288 (parallel [(const_int 0)]))
1289 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1291 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1292 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1297 (parallel [(const_int 0)]))
1298 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1300 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1301 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1303 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1304 [(set_attr "type" "sseadd")
1305 (set_attr "prefix_rep" "1")
1306 (set_attr "mode" "V4SF")])
1308 (define_insn "*avx_h<plusminus_insn>v2df3"
1309 [(set (match_operand:V2DF 0 "register_operand" "=x")
1313 (match_operand:V2DF 1 "register_operand" "x")
1314 (parallel [(const_int 0)]))
1315 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1318 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1319 (parallel [(const_int 0)]))
1320 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1322 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1323 [(set_attr "type" "sseadd")
1324 (set_attr "prefix" "vex")
1325 (set_attr "mode" "V2DF")])
1327 (define_insn "sse3_h<plusminus_insn>v2df3"
1328 [(set (match_operand:V2DF 0 "register_operand" "=x")
1332 (match_operand:V2DF 1 "register_operand" "0")
1333 (parallel [(const_int 0)]))
1334 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1337 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1338 (parallel [(const_int 0)]))
1339 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1341 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1342 [(set_attr "type" "sseadd")
1343 (set_attr "mode" "V2DF")])
1345 (define_expand "reduc_splus_v4sf"
1346 [(match_operand:V4SF 0 "register_operand" "")
1347 (match_operand:V4SF 1 "register_operand" "")]
1352 rtx tmp = gen_reg_rtx (V4SFmode);
1353 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1354 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1357 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1361 (define_expand "reduc_splus_v2df"
1362 [(match_operand:V2DF 0 "register_operand" "")
1363 (match_operand:V2DF 1 "register_operand" "")]
1366 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1370 (define_expand "reduc_smax_v4sf"
1371 [(match_operand:V4SF 0 "register_operand" "")
1372 (match_operand:V4SF 1 "register_operand" "")]
1375 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1379 (define_expand "reduc_smin_v4sf"
1380 [(match_operand:V4SF 0 "register_operand" "")
1381 (match_operand:V4SF 1 "register_operand" "")]
1384 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 ;; Parallel floating point comparisons
1392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1394 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1395 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1397 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1398 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1402 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp")
1404 (set_attr "prefix" "vex")
1405 (set_attr "mode" "<MODE>")])
1407 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1408 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1409 (vec_merge:SSEMODEF2P
1411 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1412 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1413 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1418 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1419 [(set_attr "type" "ssecmp")
1420 (set_attr "prefix" "vex")
1421 (set_attr "mode" "<ssescalarmode>")])
1423 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1424 ;; may generate 256bit vector compare instructions.
1425 (define_insn "*avx_maskcmp<mode>3"
1426 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1427 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1428 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1429 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1430 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1431 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1432 [(set_attr "type" "ssecmp")
1433 (set_attr "prefix" "vex")
1434 (set_attr "mode" "<avxvecmode>")])
1436 (define_insn "<sse>_maskcmp<mode>3"
1437 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1438 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1439 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1440 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1441 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1443 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1444 [(set_attr "type" "ssecmp")
1445 (set_attr "mode" "<MODE>")])
1447 (define_insn "<sse>_vmmaskcmp<mode>3"
1448 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1449 (vec_merge:SSEMODEF2P
1450 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1451 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1452 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1455 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1456 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1457 [(set_attr "type" "ssecmp")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 (define_insn "<sse>_comi"
1461 [(set (reg:CCFP FLAGS_REG)
1464 (match_operand:<ssevecmode> 0 "register_operand" "x")
1465 (parallel [(const_int 0)]))
1467 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1468 (parallel [(const_int 0)]))))]
1469 "SSE_FLOAT_MODE_P (<MODE>mode)"
1470 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1471 [(set_attr "type" "ssecomi")
1472 (set_attr "prefix" "maybe_vex")
1473 (set_attr "mode" "<MODE>")])
1475 (define_insn "<sse>_ucomi"
1476 [(set (reg:CCFPU FLAGS_REG)
1479 (match_operand:<ssevecmode> 0 "register_operand" "x")
1480 (parallel [(const_int 0)]))
1482 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1483 (parallel [(const_int 0)]))))]
1484 "SSE_FLOAT_MODE_P (<MODE>mode)"
1485 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1486 [(set_attr "type" "ssecomi")
1487 (set_attr "prefix" "maybe_vex")
1488 (set_attr "mode" "<MODE>")])
1490 (define_expand "vcond<mode>"
1491 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1492 (if_then_else:SSEMODEF2P
1493 (match_operator 3 ""
1494 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1495 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1496 (match_operand:SSEMODEF2P 1 "general_operand" "")
1497 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1498 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1500 bool ok = ix86_expand_fp_vcond (operands);
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1507 ;; Parallel floating point logical operations
1509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1511 (define_insn "avx_andnot<mode>3"
1512 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1515 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1516 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1517 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1518 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1519 [(set_attr "type" "sselog")
1520 (set_attr "prefix" "vex")
1521 (set_attr "mode" "<avxvecmode>")])
1523 (define_insn "<sse>_andnot<mode>3"
1524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1527 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1528 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1529 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1530 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1531 [(set_attr "type" "sselog")
1532 (set_attr "mode" "<MODE>")])
1534 (define_expand "<code><mode>3"
1535 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1536 (plogic:AVX256MODEF2P
1537 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1538 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1539 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1542 (define_insn "*avx_<code><mode>3"
1543 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1545 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1546 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1547 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1549 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1550 [(set_attr "type" "sselog")
1551 (set_attr "prefix" "vex")
1552 (set_attr "mode" "<avxvecmode>")])
1554 (define_expand "<code><mode>3"
1555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1557 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1559 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1562 (define_insn "*<code><mode>3"
1563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1567 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1568 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1569 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1570 [(set_attr "type" "sselog")
1571 (set_attr "mode" "<MODE>")])
1573 ;; Also define scalar versions. These are used for abs, neg, and
1574 ;; conditional move. Using subregs into vector modes causes register
1575 ;; allocation lossage. These patterns do not allow memory operands
1576 ;; because the native instructions read the full 128-bits.
1578 (define_insn "*avx_andnot<mode>3"
1579 [(set (match_operand:MODEF 0 "register_operand" "=x")
1582 (match_operand:MODEF 1 "register_operand" "x"))
1583 (match_operand:MODEF 2 "register_operand" "x")))]
1584 "AVX_FLOAT_MODE_P (<MODE>mode)"
1585 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1586 [(set_attr "type" "sselog")
1587 (set_attr "prefix" "vex")
1588 (set_attr "mode" "<ssevecmode>")])
1590 (define_insn "*andnot<mode>3"
1591 [(set (match_operand:MODEF 0 "register_operand" "=x")
1594 (match_operand:MODEF 1 "register_operand" "0"))
1595 (match_operand:MODEF 2 "register_operand" "x")))]
1596 "SSE_FLOAT_MODE_P (<MODE>mode)"
1597 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1598 [(set_attr "type" "sselog")
1599 (set_attr "mode" "<ssevecmode>")])
1601 (define_insn "*avx_<code><mode>3"
1602 [(set (match_operand:MODEF 0 "register_operand" "=x")
1604 (match_operand:MODEF 1 "register_operand" "x")
1605 (match_operand:MODEF 2 "register_operand" "x")))]
1606 "AVX_FLOAT_MODE_P (<MODE>mode)"
1607 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1608 [(set_attr "type" "sselog")
1609 (set_attr "prefix" "vex")
1610 (set_attr "mode" "<ssevecmode>")])
1612 (define_insn "*<code><mode>3"
1613 [(set (match_operand:MODEF 0 "register_operand" "=x")
1615 (match_operand:MODEF 1 "register_operand" "0")
1616 (match_operand:MODEF 2 "register_operand" "x")))]
1617 "SSE_FLOAT_MODE_P (<MODE>mode)"
1618 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1619 [(set_attr "type" "sselog")
1620 (set_attr "mode" "<ssevecmode>")])
1622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1624 ;; SSE5 floating point multiply/accumulate instructions This includes the
1625 ;; scalar version of the instructions as well as the vector
1627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1629 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1630 ;; combine to generate a multiply/add with two memory references. We then
1631 ;; split this insn, into loading up the destination register with one of the
1632 ;; memory operations. If we don't manage to split the insn, reload will
1633 ;; generate the appropriate moves. The reason this is needed, is that combine
1634 ;; has already folded one of the memory references into both the multiply and
1635 ;; add insns, and it can't generate a new pseudo. I.e.:
1636 ;; (set (reg1) (mem (addr1)))
1637 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1638 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1640 (define_insn "sse5_fmadd<mode>4"
1641 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1644 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1645 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1646 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1647 "TARGET_SSE5 && TARGET_FUSED_MADD
1648 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1649 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1650 [(set_attr "type" "ssemuladd")
1651 (set_attr "mode" "<MODE>")])
1653 ;; Split fmadd with two memory operands into a load and the fmadd.
1655 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1658 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1659 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1660 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1662 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1664 && !reg_mentioned_p (operands[0], operands[1])
1665 && !reg_mentioned_p (operands[0], operands[2])
1666 && !reg_mentioned_p (operands[0], operands[3])"
1669 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1670 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1671 operands[2], operands[3]));
1675 ;; For the scalar operations, use operand1 for the upper words that aren't
1676 ;; modified, so restrict the forms that are generated.
1677 ;; Scalar version of fmadd
1678 (define_insn "sse5_vmfmadd<mode>4"
1679 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1680 (vec_merge:SSEMODEF2P
1683 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1684 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1685 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1688 "TARGET_SSE5 && TARGET_FUSED_MADD
1689 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1690 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1691 [(set_attr "type" "ssemuladd")
1692 (set_attr "mode" "<MODE>")])
1694 ;; Floating multiply and subtract
1695 ;; Allow two memory operands the same as fmadd
1696 (define_insn "sse5_fmsub<mode>4"
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1703 "TARGET_SSE5 && TARGET_FUSED_MADD
1704 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1705 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1706 [(set_attr "type" "ssemuladd")
1707 (set_attr "mode" "<MODE>")])
1709 ;; Split fmsub with two memory operands into a load and the fmsub.
1711 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1714 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1715 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1716 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1718 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1719 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1720 && !reg_mentioned_p (operands[0], operands[1])
1721 && !reg_mentioned_p (operands[0], operands[2])
1722 && !reg_mentioned_p (operands[0], operands[3])"
1725 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1726 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1727 operands[2], operands[3]));
1731 ;; For the scalar operations, use operand1 for the upper words that aren't
1732 ;; modified, so restrict the forms that are generated.
1733 ;; Scalar version of fmsub
1734 (define_insn "sse5_vmfmsub<mode>4"
1735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1736 (vec_merge:SSEMODEF2P
1739 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1741 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1744 "TARGET_SSE5 && TARGET_FUSED_MADD
1745 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1746 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")])
1750 ;; Floating point negative multiply and add
1751 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1752 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1753 ;; Allow two memory operands to help in optimizing.
1754 (define_insn "sse5_fnmadd<mode>4"
1755 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1757 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1759 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1760 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1761 "TARGET_SSE5 && TARGET_FUSED_MADD
1762 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1763 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1764 [(set_attr "type" "ssemuladd")
1765 (set_attr "mode" "<MODE>")])
1767 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1769 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1771 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1773 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1774 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1776 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1777 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1778 && !reg_mentioned_p (operands[0], operands[1])
1779 && !reg_mentioned_p (operands[0], operands[2])
1780 && !reg_mentioned_p (operands[0], operands[3])"
1783 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1784 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1785 operands[2], operands[3]));
1789 ;; For the scalar operations, use operand1 for the upper words that aren't
1790 ;; modified, so restrict the forms that are generated.
1791 ;; Scalar version of fnmadd
1792 (define_insn "sse5_vmfnmadd<mode>4"
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1794 (vec_merge:SSEMODEF2P
1796 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1798 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1799 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1802 "TARGET_SSE5 && TARGET_FUSED_MADD
1803 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1804 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1805 [(set_attr "type" "ssemuladd")
1806 (set_attr "mode" "<MODE>")])
1808 ;; Floating point negative multiply and subtract
1809 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1810 ;; Allow 2 memory operands to help with optimization
1811 (define_insn "sse5_fnmsub<mode>4"
1812 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1816 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1817 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1818 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1819 "TARGET_SSE5 && TARGET_FUSED_MADD
1820 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1821 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1822 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")])
1825 ;; Split fnmsub with two memory operands into a load and the fmsub.
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1833 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1835 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1836 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1837 && !reg_mentioned_p (operands[0], operands[1])
1838 && !reg_mentioned_p (operands[0], operands[2])
1839 && !reg_mentioned_p (operands[0], operands[3])"
1842 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1843 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1844 operands[2], operands[3]));
1848 ;; For the scalar operations, use operand1 for the upper words that aren't
1849 ;; modified, so restrict the forms that are generated.
1850 ;; Scalar version of fnmsub
1851 (define_insn "sse5_vmfnmsub<mode>4"
1852 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1853 (vec_merge:SSEMODEF2P
1857 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1858 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1859 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1862 "TARGET_SSE5 && TARGET_FUSED_MADD
1863 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1864 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1865 [(set_attr "type" "ssemuladd")
1866 (set_attr "mode" "<MODE>")])
1868 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1869 ;; even if the user used -mno-fused-madd
1870 ;; Parallel instructions. During instruction generation, just default
1871 ;; to registers, and let combine later build the appropriate instruction.
1872 (define_expand "sse5i_fmadd<mode>4"
1873 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1877 (match_operand:SSEMODEF2P 1 "register_operand" "")
1878 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1879 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1880 UNSPEC_SSE5_INTRINSIC))]
1883 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1884 if (TARGET_FUSED_MADD)
1886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1887 operands[2], operands[3]));
1892 (define_insn "*sse5i_fmadd<mode>4"
1893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1897 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1898 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1899 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1900 UNSPEC_SSE5_INTRINSIC))]
1901 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1902 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1903 [(set_attr "type" "ssemuladd")
1904 (set_attr "mode" "<MODE>")])
1906 (define_expand "sse5i_fmsub<mode>4"
1907 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1911 (match_operand:SSEMODEF2P 1 "register_operand" "")
1912 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1913 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1914 UNSPEC_SSE5_INTRINSIC))]
1917 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1918 if (TARGET_FUSED_MADD)
1920 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1921 operands[2], operands[3]));
1926 (define_insn "*sse5i_fmsub<mode>4"
1927 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1931 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1932 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1933 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1934 UNSPEC_SSE5_INTRINSIC))]
1935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1936 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")])
1940 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1941 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1942 (define_expand "sse5i_fnmadd<mode>4"
1943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1946 (match_operand:SSEMODEF2P 3 "register_operand" "")
1948 (match_operand:SSEMODEF2P 1 "register_operand" "")
1949 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1950 UNSPEC_SSE5_INTRINSIC))]
1953 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1954 if (TARGET_FUSED_MADD)
1956 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1957 operands[2], operands[3]));
1962 (define_insn "*sse5i_fnmadd<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1966 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1968 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1969 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1970 UNSPEC_SSE5_INTRINSIC))]
1971 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1972 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1977 (define_expand "sse5i_fnmsub<mode>4"
1978 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1983 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1984 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1985 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1986 UNSPEC_SSE5_INTRINSIC))]
1989 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1990 if (TARGET_FUSED_MADD)
1992 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1993 operands[2], operands[3]));
1998 (define_insn "*sse5i_fnmsub<mode>4"
1999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2007 UNSPEC_SSE5_INTRINSIC))]
2008 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2009 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2013 ;; Scalar instructions
2014 (define_expand "sse5i_vmfmadd<mode>4"
2015 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2017 [(vec_merge:SSEMODEF2P
2020 (match_operand:SSEMODEF2P 1 "register_operand" "")
2021 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2022 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2025 UNSPEC_SSE5_INTRINSIC))]
2028 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2029 if (TARGET_FUSED_MADD)
2031 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2032 operands[2], operands[3]));
2037 ;; For the scalar operations, use operand1 for the upper words that aren't
2038 ;; modified, so restrict the forms that are accepted.
2039 (define_insn "*sse5i_vmfmadd<mode>4"
2040 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2042 [(vec_merge:SSEMODEF2P
2045 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2046 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2047 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2050 UNSPEC_SSE5_INTRINSIC))]
2051 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2052 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2053 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<ssescalarmode>")])
2056 (define_expand "sse5i_vmfmsub<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2059 [(vec_merge:SSEMODEF2P
2062 (match_operand:SSEMODEF2P 1 "register_operand" "")
2063 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2064 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2067 UNSPEC_SSE5_INTRINSIC))]
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2073 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2079 (define_insn "*sse5i_vmfmsub<mode>4"
2080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2082 [(vec_merge:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2087 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2090 UNSPEC_SSE5_INTRINSIC))]
2091 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2092 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2093 [(set_attr "type" "ssemuladd")
2094 (set_attr "mode" "<ssescalarmode>")])
2096 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2097 (define_expand "sse5i_vmfnmadd<mode>4"
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2100 [(vec_merge:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 3 "register_operand" "")
2104 (match_operand:SSEMODEF2P 1 "register_operand" "")
2105 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2108 UNSPEC_SSE5_INTRINSIC))]
2111 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2112 if (TARGET_FUSED_MADD)
2114 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2115 operands[2], operands[3]));
2120 (define_insn "*sse5i_vmfnmadd<mode>4"
2121 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2123 [(vec_merge:SSEMODEF2P
2125 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2127 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2131 UNSPEC_SSE5_INTRINSIC))]
2132 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2133 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2134 [(set_attr "type" "ssemuladd")
2135 (set_attr "mode" "<ssescalarmode>")])
2137 (define_expand "sse5i_vmfnmsub<mode>4"
2138 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2140 [(vec_merge:SSEMODEF2P
2144 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2145 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2146 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2149 UNSPEC_SSE5_INTRINSIC))]
2152 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2153 if (TARGET_FUSED_MADD)
2155 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2156 operands[2], operands[3]));
2161 (define_insn "*sse5i_vmfnmsub<mode>4"
2162 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2164 [(vec_merge:SSEMODEF2P
2168 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2169 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2170 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2173 UNSPEC_SSE5_INTRINSIC))]
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2175 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2176 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<ssescalarmode>")])
2179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2181 ;; Parallel single-precision floating point conversion operations
2183 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2185 (define_insn "sse_cvtpi2ps"
2186 [(set (match_operand:V4SF 0 "register_operand" "=x")
2189 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2190 (match_operand:V4SF 1 "register_operand" "0")
2193 "cvtpi2ps\t{%2, %0|%0, %2}"
2194 [(set_attr "type" "ssecvt")
2195 (set_attr "mode" "V4SF")])
2197 (define_insn "sse_cvtps2pi"
2198 [(set (match_operand:V2SI 0 "register_operand" "=y")
2200 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2202 (parallel [(const_int 0) (const_int 1)])))]
2204 "cvtps2pi\t{%1, %0|%0, %1}"
2205 [(set_attr "type" "ssecvt")
2206 (set_attr "unit" "mmx")
2207 (set_attr "mode" "DI")])
2209 (define_insn "sse_cvttps2pi"
2210 [(set (match_operand:V2SI 0 "register_operand" "=y")
2212 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2213 (parallel [(const_int 0) (const_int 1)])))]
2215 "cvttps2pi\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "ssecvt")
2217 (set_attr "unit" "mmx")
2218 (set_attr "mode" "SF")])
2220 (define_insn "*avx_cvtsi2ss"
2221 [(set (match_operand:V4SF 0 "register_operand" "=x")
2224 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2225 (match_operand:V4SF 1 "register_operand" "x")
2228 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2229 [(set_attr "type" "sseicvt")
2230 (set_attr "prefix" "vex")
2231 (set_attr "mode" "SF")])
2233 (define_insn "sse_cvtsi2ss"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2237 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2238 (match_operand:V4SF 1 "register_operand" "0,0")
2241 "cvtsi2ss\t{%2, %0|%0, %2}"
2242 [(set_attr "type" "sseicvt")
2243 (set_attr "athlon_decode" "vector,double")
2244 (set_attr "amdfam10_decode" "vector,double")
2245 (set_attr "mode" "SF")])
2247 (define_insn "*avx_cvtsi2ssq"
2248 [(set (match_operand:V4SF 0 "register_operand" "=x")
2251 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2252 (match_operand:V4SF 1 "register_operand" "x")
2254 "TARGET_AVX && TARGET_64BIT"
2255 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2256 [(set_attr "type" "sseicvt")
2257 (set_attr "prefix" "vex")
2258 (set_attr "mode" "SF")])
2260 (define_insn "sse_cvtsi2ssq"
2261 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2264 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2265 (match_operand:V4SF 1 "register_operand" "0,0")
2267 "TARGET_SSE && TARGET_64BIT"
2268 "cvtsi2ssq\t{%2, %0|%0, %2}"
2269 [(set_attr "type" "sseicvt")
2270 (set_attr "athlon_decode" "vector,double")
2271 (set_attr "amdfam10_decode" "vector,double")
2272 (set_attr "mode" "SF")])
2274 (define_insn "sse_cvtss2si"
2275 [(set (match_operand:SI 0 "register_operand" "=r,r")
2278 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279 (parallel [(const_int 0)]))]
2280 UNSPEC_FIX_NOTRUNC))]
2282 "%vcvtss2si\t{%1, %0|%0, %1}"
2283 [(set_attr "type" "sseicvt")
2284 (set_attr "athlon_decode" "double,vector")
2285 (set_attr "prefix_rep" "1")
2286 (set_attr "prefix" "maybe_vex")
2287 (set_attr "mode" "SI")])
2289 (define_insn "sse_cvtss2si_2"
2290 [(set (match_operand:SI 0 "register_operand" "=r,r")
2291 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2292 UNSPEC_FIX_NOTRUNC))]
2294 "%vcvtss2si\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "sseicvt")
2296 (set_attr "athlon_decode" "double,vector")
2297 (set_attr "amdfam10_decode" "double,double")
2298 (set_attr "prefix_rep" "1")
2299 (set_attr "prefix" "maybe_vex")
2300 (set_attr "mode" "SI")])
2302 (define_insn "sse_cvtss2siq"
2303 [(set (match_operand:DI 0 "register_operand" "=r,r")
2306 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2307 (parallel [(const_int 0)]))]
2308 UNSPEC_FIX_NOTRUNC))]
2309 "TARGET_SSE && TARGET_64BIT"
2310 "%vcvtss2siq\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "sseicvt")
2312 (set_attr "athlon_decode" "double,vector")
2313 (set_attr "prefix_rep" "1")
2314 (set_attr "prefix" "maybe_vex")
2315 (set_attr "mode" "DI")])
2317 (define_insn "sse_cvtss2siq_2"
2318 [(set (match_operand:DI 0 "register_operand" "=r,r")
2319 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2320 UNSPEC_FIX_NOTRUNC))]
2321 "TARGET_SSE && TARGET_64BIT"
2322 "%vcvtss2siq\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "sseicvt")
2324 (set_attr "athlon_decode" "double,vector")
2325 (set_attr "amdfam10_decode" "double,double")
2326 (set_attr "prefix_rep" "1")
2327 (set_attr "prefix" "maybe_vex")
2328 (set_attr "mode" "DI")])
2330 (define_insn "sse_cvttss2si"
2331 [(set (match_operand:SI 0 "register_operand" "=r,r")
2334 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2335 (parallel [(const_int 0)]))))]
2337 "%vcvttss2si\t{%1, %0|%0, %1}"
2338 [(set_attr "type" "sseicvt")
2339 (set_attr "athlon_decode" "double,vector")
2340 (set_attr "amdfam10_decode" "double,double")
2341 (set_attr "prefix_rep" "1")
2342 (set_attr "prefix" "maybe_vex")
2343 (set_attr "mode" "SI")])
2345 (define_insn "sse_cvttss2siq"
2346 [(set (match_operand:DI 0 "register_operand" "=r,r")
2349 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2350 (parallel [(const_int 0)]))))]
2351 "TARGET_SSE && TARGET_64BIT"
2352 "%vcvttss2siq\t{%1, %0|%0, %1}"
2353 [(set_attr "type" "sseicvt")
2354 (set_attr "athlon_decode" "double,vector")
2355 (set_attr "amdfam10_decode" "double,double")
2356 (set_attr "prefix_rep" "1")
2357 (set_attr "prefix" "maybe_vex")
2358 (set_attr "mode" "DI")])
2360 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2361 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2362 (float:AVXMODEDCVTDQ2PS
2363 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2365 "vcvtdq2ps\t{%1, %0|%0, %1}"
2366 [(set_attr "type" "ssecvt")
2367 (set_attr "prefix" "vex")
2368 (set_attr "mode" "<avxvecmode>")])
2370 (define_insn "sse2_cvtdq2ps"
2371 [(set (match_operand:V4SF 0 "register_operand" "=x")
2372 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2374 "cvtdq2ps\t{%1, %0|%0, %1}"
2375 [(set_attr "type" "ssecvt")
2376 (set_attr "mode" "V4SF")])
2378 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2379 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2380 (unspec:AVXMODEDCVTPS2DQ
2381 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2382 UNSPEC_FIX_NOTRUNC))]
2384 "vcvtps2dq\t{%1, %0|%0, %1}"
2385 [(set_attr "type" "ssecvt")
2386 (set_attr "prefix" "vex")
2387 (set_attr "mode" "<avxvecmode>")])
2389 (define_insn "sse2_cvtps2dq"
2390 [(set (match_operand:V4SI 0 "register_operand" "=x")
2391 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2392 UNSPEC_FIX_NOTRUNC))]
2394 "cvtps2dq\t{%1, %0|%0, %1}"
2395 [(set_attr "type" "ssecvt")
2396 (set_attr "prefix_data16" "1")
2397 (set_attr "mode" "TI")])
2399 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2400 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2401 (fix:AVXMODEDCVTPS2DQ
2402 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2404 "vcvttps2dq\t{%1, %0|%0, %1}"
2405 [(set_attr "type" "ssecvt")
2406 (set_attr "prefix" "vex")
2407 (set_attr "mode" "<avxvecmode>")])
2409 (define_insn "sse2_cvttps2dq"
2410 [(set (match_operand:V4SI 0 "register_operand" "=x")
2411 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2413 "cvttps2dq\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "prefix_rep" "1")
2416 (set_attr "mode" "TI")])
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420 ;; Parallel double-precision floating point conversion operations
2422 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424 (define_insn "sse2_cvtpi2pd"
2425 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2426 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2428 "cvtpi2pd\t{%1, %0|%0, %1}"
2429 [(set_attr "type" "ssecvt")
2430 (set_attr "unit" "mmx,*")
2431 (set_attr "mode" "V2DF")])
2433 (define_insn "sse2_cvtpd2pi"
2434 [(set (match_operand:V2SI 0 "register_operand" "=y")
2435 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2436 UNSPEC_FIX_NOTRUNC))]
2438 "cvtpd2pi\t{%1, %0|%0, %1}"
2439 [(set_attr "type" "ssecvt")
2440 (set_attr "unit" "mmx")
2441 (set_attr "prefix_data16" "1")
2442 (set_attr "mode" "DI")])
2444 (define_insn "sse2_cvttpd2pi"
2445 [(set (match_operand:V2SI 0 "register_operand" "=y")
2446 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2448 "cvttpd2pi\t{%1, %0|%0, %1}"
2449 [(set_attr "type" "ssecvt")
2450 (set_attr "unit" "mmx")
2451 (set_attr "prefix_data16" "1")
2452 (set_attr "mode" "TI")])
2454 (define_insn "*avx_cvtsi2sd"
2455 [(set (match_operand:V2DF 0 "register_operand" "=x")
2458 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2459 (match_operand:V2DF 1 "register_operand" "x")
2462 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2463 [(set_attr "type" "sseicvt")
2464 (set_attr "prefix" "vex")
2465 (set_attr "mode" "DF")])
2467 (define_insn "sse2_cvtsi2sd"
2468 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2471 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2472 (match_operand:V2DF 1 "register_operand" "0,0")
2475 "cvtsi2sd\t{%2, %0|%0, %2}"
2476 [(set_attr "type" "sseicvt")
2477 (set_attr "mode" "DF")
2478 (set_attr "athlon_decode" "double,direct")
2479 (set_attr "amdfam10_decode" "vector,double")])
2481 (define_insn "*avx_cvtsi2sdq"
2482 [(set (match_operand:V2DF 0 "register_operand" "=x")
2485 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2486 (match_operand:V2DF 1 "register_operand" "x")
2488 "TARGET_AVX && TARGET_64BIT"
2489 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "DF")])
2494 (define_insn "sse2_cvtsi2sdq"
2495 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2498 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2499 (match_operand:V2DF 1 "register_operand" "0,0")
2501 "TARGET_SSE2 && TARGET_64BIT"
2502 "cvtsi2sdq\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "mode" "DF")
2505 (set_attr "athlon_decode" "double,direct")
2506 (set_attr "amdfam10_decode" "vector,double")])
2508 (define_insn "sse2_cvtsd2si"
2509 [(set (match_operand:SI 0 "register_operand" "=r,r")
2512 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2513 (parallel [(const_int 0)]))]
2514 UNSPEC_FIX_NOTRUNC))]
2516 "%vcvtsd2si\t{%1, %0|%0, %1}"
2517 [(set_attr "type" "sseicvt")
2518 (set_attr "athlon_decode" "double,vector")
2519 (set_attr "prefix_rep" "1")
2520 (set_attr "prefix" "maybe_vex")
2521 (set_attr "mode" "SI")])
2523 (define_insn "sse2_cvtsd2si_2"
2524 [(set (match_operand:SI 0 "register_operand" "=r,r")
2525 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2526 UNSPEC_FIX_NOTRUNC))]
2528 "%vcvtsd2si\t{%1, %0|%0, %1}"
2529 [(set_attr "type" "sseicvt")
2530 (set_attr "athlon_decode" "double,vector")
2531 (set_attr "amdfam10_decode" "double,double")
2532 (set_attr "prefix_rep" "1")
2533 (set_attr "prefix" "maybe_vex")
2534 (set_attr "mode" "SI")])
2536 (define_insn "sse2_cvtsd2siq"
2537 [(set (match_operand:DI 0 "register_operand" "=r,r")
2540 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2541 (parallel [(const_int 0)]))]
2542 UNSPEC_FIX_NOTRUNC))]
2543 "TARGET_SSE2 && TARGET_64BIT"
2544 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2545 [(set_attr "type" "sseicvt")
2546 (set_attr "athlon_decode" "double,vector")
2547 (set_attr "prefix_rep" "1")
2548 (set_attr "prefix" "maybe_vex")
2549 (set_attr "mode" "DI")])
2551 (define_insn "sse2_cvtsd2siq_2"
2552 [(set (match_operand:DI 0 "register_operand" "=r,r")
2553 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2554 UNSPEC_FIX_NOTRUNC))]
2555 "TARGET_SSE2 && TARGET_64BIT"
2556 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2557 [(set_attr "type" "sseicvt")
2558 (set_attr "athlon_decode" "double,vector")
2559 (set_attr "amdfam10_decode" "double,double")
2560 (set_attr "prefix_rep" "1")
2561 (set_attr "prefix" "maybe_vex")
2562 (set_attr "mode" "DI")])
2564 (define_insn "sse2_cvttsd2si"
2565 [(set (match_operand:SI 0 "register_operand" "=r,r")
2568 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2569 (parallel [(const_int 0)]))))]
2571 "%vcvttsd2si\t{%1, %0|%0, %1}"
2572 [(set_attr "type" "sseicvt")
2573 (set_attr "prefix_rep" "1")
2574 (set_attr "prefix" "maybe_vex")
2575 (set_attr "mode" "SI")
2576 (set_attr "athlon_decode" "double,vector")
2577 (set_attr "amdfam10_decode" "double,double")])
2579 (define_insn "sse2_cvttsd2siq"
2580 [(set (match_operand:DI 0 "register_operand" "=r,r")
2583 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2584 (parallel [(const_int 0)]))))]
2585 "TARGET_SSE2 && TARGET_64BIT"
2586 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2587 [(set_attr "type" "sseicvt")
2588 (set_attr "prefix_rep" "1")
2589 (set_attr "prefix" "maybe_vex")
2590 (set_attr "mode" "DI")
2591 (set_attr "athlon_decode" "double,vector")
2592 (set_attr "amdfam10_decode" "double,double")])
2594 (define_insn "avx_cvtdq2pd256"
2595 [(set (match_operand:V4DF 0 "register_operand" "=x")
2596 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2598 "vcvtdq2pd\t{%1, %0|%0, %1}"
2599 [(set_attr "type" "ssecvt")
2600 (set_attr "prefix" "vex")
2601 (set_attr "mode" "V4DF")])
2603 (define_insn "sse2_cvtdq2pd"
2604 [(set (match_operand:V2DF 0 "register_operand" "=x")
2607 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2608 (parallel [(const_int 0) (const_int 1)]))))]
2610 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2611 [(set_attr "type" "ssecvt")
2612 (set_attr "prefix" "maybe_vex")
2613 (set_attr "mode" "V2DF")])
2615 (define_insn "avx_cvtpd2dq256"
2616 [(set (match_operand:V4SI 0 "register_operand" "=x")
2617 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2618 UNSPEC_FIX_NOTRUNC))]
2620 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2621 [(set_attr "type" "ssecvt")
2622 (set_attr "prefix" "vex")
2623 (set_attr "mode" "OI")])
2625 (define_expand "sse2_cvtpd2dq"
2626 [(set (match_operand:V4SI 0 "register_operand" "")
2628 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2632 "operands[2] = CONST0_RTX (V2SImode);")
2634 (define_insn "*sse2_cvtpd2dq"
2635 [(set (match_operand:V4SI 0 "register_operand" "=x")
2637 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2639 (match_operand:V2SI 2 "const0_operand" "")))]
2641 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2642 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2643 [(set_attr "type" "ssecvt")
2644 (set_attr "prefix_rep" "1")
2645 (set_attr "prefix" "maybe_vex")
2646 (set_attr "mode" "TI")
2647 (set_attr "amdfam10_decode" "double")])
2649 (define_insn "avx_cvttpd2dq256"
2650 [(set (match_operand:V4SI 0 "register_operand" "=x")
2651 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2653 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2654 [(set_attr "type" "ssecvt")
2655 (set_attr "prefix" "vex")
2656 (set_attr "mode" "OI")])
2658 (define_expand "sse2_cvttpd2dq"
2659 [(set (match_operand:V4SI 0 "register_operand" "")
2661 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2664 "operands[2] = CONST0_RTX (V2SImode);")
2666 (define_insn "*sse2_cvttpd2dq"
2667 [(set (match_operand:V4SI 0 "register_operand" "=x")
2669 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2670 (match_operand:V2SI 2 "const0_operand" "")))]
2672 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2673 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2674 [(set_attr "type" "ssecvt")
2675 (set_attr "prefix_rep" "1")
2676 (set_attr "prefix" "maybe_vex")
2677 (set_attr "mode" "TI")
2678 (set_attr "amdfam10_decode" "double")])
2680 (define_insn "*avx_cvtsd2ss"
2681 [(set (match_operand:V4SF 0 "register_operand" "=x")
2684 (float_truncate:V2SF
2685 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2686 (match_operand:V4SF 1 "register_operand" "x")
2689 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2690 [(set_attr "type" "ssecvt")
2691 (set_attr "prefix" "vex")
2692 (set_attr "mode" "SF")])
2694 (define_insn "sse2_cvtsd2ss"
2695 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2698 (float_truncate:V2SF
2699 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2700 (match_operand:V4SF 1 "register_operand" "0,0")
2703 "cvtsd2ss\t{%2, %0|%0, %2}"
2704 [(set_attr "type" "ssecvt")
2705 (set_attr "athlon_decode" "vector,double")
2706 (set_attr "amdfam10_decode" "vector,double")
2707 (set_attr "mode" "SF")])
2709 (define_insn "*avx_cvtss2sd"
2710 [(set (match_operand:V2DF 0 "register_operand" "=x")
2714 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2715 (parallel [(const_int 0) (const_int 1)])))
2716 (match_operand:V2DF 1 "register_operand" "x")
2719 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2720 [(set_attr "type" "ssecvt")
2721 (set_attr "prefix" "vex")
2722 (set_attr "mode" "DF")])
2724 (define_insn "sse2_cvtss2sd"
2725 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2729 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2730 (parallel [(const_int 0) (const_int 1)])))
2731 (match_operand:V2DF 1 "register_operand" "0,0")
2734 "cvtss2sd\t{%2, %0|%0, %2}"
2735 [(set_attr "type" "ssecvt")
2736 (set_attr "amdfam10_decode" "vector,double")
2737 (set_attr "mode" "DF")])
2739 (define_insn "avx_cvtpd2ps256"
2740 [(set (match_operand:V4SF 0 "register_operand" "=x")
2741 (float_truncate:V4SF
2742 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2744 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2745 [(set_attr "type" "ssecvt")
2746 (set_attr "prefix" "vex")
2747 (set_attr "mode" "V4SF")])
2749 (define_expand "sse2_cvtpd2ps"
2750 [(set (match_operand:V4SF 0 "register_operand" "")
2752 (float_truncate:V2SF
2753 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2756 "operands[2] = CONST0_RTX (V2SFmode);")
2758 (define_insn "*sse2_cvtpd2ps"
2759 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V2SF
2762 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2763 (match_operand:V2SF 2 "const0_operand" "")))]
2765 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2766 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2767 [(set_attr "type" "ssecvt")
2768 (set_attr "prefix_data16" "1")
2769 (set_attr "prefix" "maybe_vex")
2770 (set_attr "mode" "V4SF")
2771 (set_attr "amdfam10_decode" "double")])
2773 (define_insn "avx_cvtps2pd256"
2774 [(set (match_operand:V4DF 0 "register_operand" "=x")
2776 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2778 "vcvtps2pd\t{%1, %0|%0, %1}"
2779 [(set_attr "type" "ssecvt")
2780 (set_attr "prefix" "vex")
2781 (set_attr "mode" "V4DF")])
2783 (define_insn "sse2_cvtps2pd"
2784 [(set (match_operand:V2DF 0 "register_operand" "=x")
2787 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2788 (parallel [(const_int 0) (const_int 1)]))))]
2790 "%vcvtps2pd\t{%1, %0|%0, %1}"
2791 [(set_attr "type" "ssecvt")
2792 (set_attr "prefix" "maybe_vex")
2793 (set_attr "mode" "V2DF")
2794 (set_attr "amdfam10_decode" "direct")])
2796 (define_expand "vec_unpacks_hi_v4sf"
2801 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2802 (parallel [(const_int 6)
2806 (set (match_operand:V2DF 0 "register_operand" "")
2810 (parallel [(const_int 0) (const_int 1)]))))]
2813 operands[2] = gen_reg_rtx (V4SFmode);
2816 (define_expand "vec_unpacks_lo_v4sf"
2817 [(set (match_operand:V2DF 0 "register_operand" "")
2820 (match_operand:V4SF 1 "nonimmediate_operand" "")
2821 (parallel [(const_int 0) (const_int 1)]))))]
2824 (define_expand "vec_unpacks_float_hi_v8hi"
2825 [(match_operand:V4SF 0 "register_operand" "")
2826 (match_operand:V8HI 1 "register_operand" "")]
2829 rtx tmp = gen_reg_rtx (V4SImode);
2831 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2832 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2836 (define_expand "vec_unpacks_float_lo_v8hi"
2837 [(match_operand:V4SF 0 "register_operand" "")
2838 (match_operand:V8HI 1 "register_operand" "")]
2841 rtx tmp = gen_reg_rtx (V4SImode);
2843 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2844 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2848 (define_expand "vec_unpacku_float_hi_v8hi"
2849 [(match_operand:V4SF 0 "register_operand" "")
2850 (match_operand:V8HI 1 "register_operand" "")]
2853 rtx tmp = gen_reg_rtx (V4SImode);
2855 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2856 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2860 (define_expand "vec_unpacku_float_lo_v8hi"
2861 [(match_operand:V4SF 0 "register_operand" "")
2862 (match_operand:V8HI 1 "register_operand" "")]
2865 rtx tmp = gen_reg_rtx (V4SImode);
2867 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2868 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2872 (define_expand "vec_unpacks_float_hi_v4si"
2875 (match_operand:V4SI 1 "nonimmediate_operand" "")
2876 (parallel [(const_int 2)
2880 (set (match_operand:V2DF 0 "register_operand" "")
2884 (parallel [(const_int 0) (const_int 1)]))))]
2887 operands[2] = gen_reg_rtx (V4SImode);
2890 (define_expand "vec_unpacks_float_lo_v4si"
2891 [(set (match_operand:V2DF 0 "register_operand" "")
2894 (match_operand:V4SI 1 "nonimmediate_operand" "")
2895 (parallel [(const_int 0) (const_int 1)]))))]
2898 (define_expand "vec_pack_trunc_v2df"
2899 [(match_operand:V4SF 0 "register_operand" "")
2900 (match_operand:V2DF 1 "nonimmediate_operand" "")
2901 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2906 r1 = gen_reg_rtx (V4SFmode);
2907 r2 = gen_reg_rtx (V4SFmode);
2909 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2910 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2911 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2915 (define_expand "vec_pack_sfix_trunc_v2df"
2916 [(match_operand:V4SI 0 "register_operand" "")
2917 (match_operand:V2DF 1 "nonimmediate_operand" "")
2918 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2923 r1 = gen_reg_rtx (V4SImode);
2924 r2 = gen_reg_rtx (V4SImode);
2926 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2927 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2928 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2929 gen_lowpart (V2DImode, r1),
2930 gen_lowpart (V2DImode, r2)));
2934 (define_expand "vec_pack_sfix_v2df"
2935 [(match_operand:V4SI 0 "register_operand" "")
2936 (match_operand:V2DF 1 "nonimmediate_operand" "")
2937 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2942 r1 = gen_reg_rtx (V4SImode);
2943 r2 = gen_reg_rtx (V4SImode);
2945 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2946 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2947 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2948 gen_lowpart (V2DImode, r1),
2949 gen_lowpart (V2DImode, r2)));
2953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2955 ;; Parallel single-precision floating point element swizzling
2957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2959 (define_expand "sse_movhlps_exp"
2960 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2963 (match_operand:V4SF 1 "nonimmediate_operand" "")
2964 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2965 (parallel [(const_int 6)
2970 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2972 (define_insn "*avx_movhlps"
2973 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2976 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
2977 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2978 (parallel [(const_int 6)
2982 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2984 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2985 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2986 vmovhps\t{%2, %0|%0, %2}"
2987 [(set_attr "type" "ssemov")
2988 (set_attr "prefix" "vex")
2989 (set_attr "mode" "V4SF,V2SF,V2SF")])
2991 (define_insn "sse_movhlps"
2992 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2995 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2996 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2997 (parallel [(const_int 6)
3001 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3003 movhlps\t{%2, %0|%0, %2}
3004 movlps\t{%H2, %0|%0, %H2}
3005 movhps\t{%2, %0|%0, %2}"
3006 [(set_attr "type" "ssemov")
3007 (set_attr "mode" "V4SF,V2SF,V2SF")])
3009 (define_expand "sse_movlhps_exp"
3010 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3013 (match_operand:V4SF 1 "nonimmediate_operand" "")
3014 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3015 (parallel [(const_int 0)
3020 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3022 (define_insn "*avx_movlhps"
3023 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3026 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3027 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3028 (parallel [(const_int 0)
3032 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3034 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3035 vmovhps\t{%2, %1, %0|%0, %1, %2}
3036 vmovlps\t{%2, %H0|%H0, %2}"
3037 [(set_attr "type" "ssemov")
3038 (set_attr "prefix" "vex")
3039 (set_attr "mode" "V4SF,V2SF,V2SF")])
3041 (define_insn "sse_movlhps"
3042 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3045 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3046 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3047 (parallel [(const_int 0)
3051 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3053 movlhps\t{%2, %0|%0, %2}
3054 movhps\t{%2, %0|%0, %2}
3055 movlps\t{%2, %H0|%H0, %2}"
3056 [(set_attr "type" "ssemov")
3057 (set_attr "mode" "V4SF,V2SF,V2SF")])
3059 (define_insn "avx_unpckhps256"
3060 [(set (match_operand:V8SF 0 "register_operand" "=x")
3063 (match_operand:V8SF 1 "register_operand" "x")
3064 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3065 (parallel [(const_int 2) (const_int 10)
3066 (const_int 3) (const_int 11)
3067 (const_int 6) (const_int 14)
3068 (const_int 7) (const_int 15)])))]
3070 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3071 [(set_attr "type" "sselog")
3072 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V8SF")])
3075 (define_insn "*avx_unpckhps"
3076 [(set (match_operand:V4SF 0 "register_operand" "=x")
3079 (match_operand:V4SF 1 "register_operand" "x")
3080 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3081 (parallel [(const_int 2) (const_int 6)
3082 (const_int 3) (const_int 7)])))]
3084 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3085 [(set_attr "type" "sselog")
3086 (set_attr "prefix" "vex")
3087 (set_attr "mode" "V4SF")])
3089 (define_insn "sse_unpckhps"
3090 [(set (match_operand:V4SF 0 "register_operand" "=x")
3093 (match_operand:V4SF 1 "register_operand" "0")
3094 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3095 (parallel [(const_int 2) (const_int 6)
3096 (const_int 3) (const_int 7)])))]
3098 "unpckhps\t{%2, %0|%0, %2}"
3099 [(set_attr "type" "sselog")
3100 (set_attr "mode" "V4SF")])
3102 (define_insn "avx_unpcklps256"
3103 [(set (match_operand:V8SF 0 "register_operand" "=x")
3106 (match_operand:V8SF 1 "register_operand" "x")
3107 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3108 (parallel [(const_int 0) (const_int 8)
3109 (const_int 1) (const_int 9)
3110 (const_int 4) (const_int 12)
3111 (const_int 5) (const_int 13)])))]
3113 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3114 [(set_attr "type" "sselog")
3115 (set_attr "prefix" "vex")
3116 (set_attr "mode" "V8SF")])
3118 (define_insn "*avx_unpcklps"
3119 [(set (match_operand:V4SF 0 "register_operand" "=x")
3122 (match_operand:V4SF 1 "register_operand" "x")
3123 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3124 (parallel [(const_int 0) (const_int 4)
3125 (const_int 1) (const_int 5)])))]
3127 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3128 [(set_attr "type" "sselog")
3129 (set_attr "prefix" "vex")
3130 (set_attr "mode" "V4SF")])
3132 (define_insn "sse_unpcklps"
3133 [(set (match_operand:V4SF 0 "register_operand" "=x")
3136 (match_operand:V4SF 1 "register_operand" "0")
3137 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3138 (parallel [(const_int 0) (const_int 4)
3139 (const_int 1) (const_int 5)])))]
3141 "unpcklps\t{%2, %0|%0, %2}"
3142 [(set_attr "type" "sselog")
3143 (set_attr "mode" "V4SF")])
3145 ;; These are modeled with the same vec_concat as the others so that we
3146 ;; capture users of shufps that can use the new instructions
3147 (define_insn "avx_movshdup256"
3148 [(set (match_operand:V8SF 0 "register_operand" "=x")
3151 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3153 (parallel [(const_int 1) (const_int 1)
3154 (const_int 3) (const_int 3)
3155 (const_int 5) (const_int 5)
3156 (const_int 7) (const_int 7)])))]
3158 "vmovshdup\t{%1, %0|%0, %1}"
3159 [(set_attr "type" "sse")
3160 (set_attr "prefix" "vex")
3161 (set_attr "mode" "V8SF")])
3163 (define_insn "sse3_movshdup"
3164 [(set (match_operand:V4SF 0 "register_operand" "=x")
3167 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3169 (parallel [(const_int 1)
3174 "%vmovshdup\t{%1, %0|%0, %1}"
3175 [(set_attr "type" "sse")
3176 (set_attr "prefix_rep" "1")
3177 (set_attr "prefix" "maybe_vex")
3178 (set_attr "mode" "V4SF")])
3180 (define_insn "avx_movsldup256"
3181 [(set (match_operand:V8SF 0 "register_operand" "=x")
3184 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3186 (parallel [(const_int 0) (const_int 0)
3187 (const_int 2) (const_int 2)
3188 (const_int 4) (const_int 4)
3189 (const_int 6) (const_int 6)])))]
3191 "vmovsldup\t{%1, %0|%0, %1}"
3192 [(set_attr "type" "sse")
3193 (set_attr "prefix" "vex")
3194 (set_attr "mode" "V8SF")])
3196 (define_insn "sse3_movsldup"
3197 [(set (match_operand:V4SF 0 "register_operand" "=x")
3200 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3202 (parallel [(const_int 0)
3207 "%vmovsldup\t{%1, %0|%0, %1}"
3208 [(set_attr "type" "sse")
3209 (set_attr "prefix_rep" "1")
3210 (set_attr "prefix" "maybe_vex")
3211 (set_attr "mode" "V4SF")])
3213 (define_expand "avx_shufps256"
3214 [(match_operand:V8SF 0 "register_operand" "")
3215 (match_operand:V8SF 1 "register_operand" "")
3216 (match_operand:V8SF 2 "nonimmediate_operand" "")
3217 (match_operand:SI 3 "const_int_operand" "")]
3220 int mask = INTVAL (operands[3]);
3221 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3222 GEN_INT ((mask >> 0) & 3),
3223 GEN_INT ((mask >> 2) & 3),
3224 GEN_INT (((mask >> 4) & 3) + 8),
3225 GEN_INT (((mask >> 6) & 3) + 8),
3226 GEN_INT (((mask >> 0) & 3) + 4),
3227 GEN_INT (((mask >> 2) & 3) + 4),
3228 GEN_INT (((mask >> 4) & 3) + 12),
3229 GEN_INT (((mask >> 6) & 3) + 12)));
3233 ;; One bit in mask selects 2 elements.
3234 (define_insn "avx_shufps256_1"
3235 [(set (match_operand:V8SF 0 "register_operand" "=x")
3238 (match_operand:V8SF 1 "register_operand" "x")
3239 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3240 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3241 (match_operand 4 "const_0_to_3_operand" "")
3242 (match_operand 5 "const_8_to_11_operand" "")
3243 (match_operand 6 "const_8_to_11_operand" "")
3244 (match_operand 7 "const_4_to_7_operand" "")
3245 (match_operand 8 "const_4_to_7_operand" "")
3246 (match_operand 9 "const_12_to_15_operand" "")
3247 (match_operand 10 "const_12_to_15_operand" "")])))]
3249 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3250 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3251 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3252 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3255 mask = INTVAL (operands[3]);
3256 mask |= INTVAL (operands[4]) << 2;
3257 mask |= (INTVAL (operands[5]) - 8) << 4;
3258 mask |= (INTVAL (operands[6]) - 8) << 6;
3259 operands[3] = GEN_INT (mask);
3261 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3263 [(set_attr "type" "sselog")
3264 (set_attr "prefix" "vex")
3265 (set_attr "mode" "V8SF")])
3267 (define_expand "sse_shufps"
3268 [(match_operand:V4SF 0 "register_operand" "")
3269 (match_operand:V4SF 1 "register_operand" "")
3270 (match_operand:V4SF 2 "nonimmediate_operand" "")
3271 (match_operand:SI 3 "const_int_operand" "")]
3274 int mask = INTVAL (operands[3]);
3275 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3276 GEN_INT ((mask >> 0) & 3),
3277 GEN_INT ((mask >> 2) & 3),
3278 GEN_INT (((mask >> 4) & 3) + 4),
3279 GEN_INT (((mask >> 6) & 3) + 4)));
3283 (define_insn "*avx_shufps_<mode>"
3284 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3285 (vec_select:SSEMODE4S
3286 (vec_concat:<ssedoublesizemode>
3287 (match_operand:SSEMODE4S 1 "register_operand" "x")
3288 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3289 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3290 (match_operand 4 "const_0_to_3_operand" "")
3291 (match_operand 5 "const_4_to_7_operand" "")
3292 (match_operand 6 "const_4_to_7_operand" "")])))]
3296 mask |= INTVAL (operands[3]) << 0;
3297 mask |= INTVAL (operands[4]) << 2;
3298 mask |= (INTVAL (operands[5]) - 4) << 4;
3299 mask |= (INTVAL (operands[6]) - 4) << 6;
3300 operands[3] = GEN_INT (mask);
3302 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3304 [(set_attr "type" "sselog")
3305 (set_attr "prefix" "vex")
3306 (set_attr "mode" "V4SF")])
3308 (define_insn "sse_shufps_<mode>"
3309 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3310 (vec_select:SSEMODE4S
3311 (vec_concat:<ssedoublesizemode>
3312 (match_operand:SSEMODE4S 1 "register_operand" "0")
3313 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3314 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3315 (match_operand 4 "const_0_to_3_operand" "")
3316 (match_operand 5 "const_4_to_7_operand" "")
3317 (match_operand 6 "const_4_to_7_operand" "")])))]
3321 mask |= INTVAL (operands[3]) << 0;
3322 mask |= INTVAL (operands[4]) << 2;
3323 mask |= (INTVAL (operands[5]) - 4) << 4;
3324 mask |= (INTVAL (operands[6]) - 4) << 6;
3325 operands[3] = GEN_INT (mask);
3327 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3329 [(set_attr "type" "sselog")
3330 (set_attr "mode" "V4SF")])
3332 (define_insn "sse_storehps"
3333 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3335 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3336 (parallel [(const_int 2) (const_int 3)])))]
3339 %vmovhps\t{%1, %0|%0, %1}
3340 %vmovhlps\t{%1, %d0|%d0, %1}
3341 %vmovlps\t{%H1, %d0|%d0, %H1}"
3342 [(set_attr "type" "ssemov")
3343 (set_attr "prefix" "maybe_vex")
3344 (set_attr "mode" "V2SF,V4SF,V2SF")])
3346 (define_expand "sse_loadhps_exp"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3350 (match_operand:V4SF 1 "nonimmediate_operand" "")
3351 (parallel [(const_int 0) (const_int 1)]))
3352 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3354 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3356 (define_insn "*avx_loadhps"
3357 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3360 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3361 (parallel [(const_int 0) (const_int 1)]))
3362 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3365 vmovhps\t{%2, %1, %0|%0, %1, %2}
3366 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3367 vmovlps\t{%2, %H0|%H0, %2}"
3368 [(set_attr "type" "ssemov")
3369 (set_attr "prefix" "vex")
3370 (set_attr "mode" "V2SF,V4SF,V2SF")])
3372 (define_insn "sse_loadhps"
3373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3376 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3377 (parallel [(const_int 0) (const_int 1)]))
3378 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3381 movhps\t{%2, %0|%0, %2}
3382 movlhps\t{%2, %0|%0, %2}
3383 movlps\t{%2, %H0|%H0, %2}"
3384 [(set_attr "type" "ssemov")
3385 (set_attr "mode" "V2SF,V4SF,V2SF")])
3387 (define_insn "*avx_storelps"
3388 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3390 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3391 (parallel [(const_int 0) (const_int 1)])))]
3394 vmovlps\t{%1, %0|%0, %1}
3395 vmovaps\t{%1, %0|%0, %1}
3396 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3397 [(set_attr "type" "ssemov")
3398 (set_attr "prefix" "vex")
3399 (set_attr "mode" "V2SF,V2DF,V2SF")])
3401 (define_insn "sse_storelps"
3402 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3404 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3405 (parallel [(const_int 0) (const_int 1)])))]
3408 movlps\t{%1, %0|%0, %1}
3409 movaps\t{%1, %0|%0, %1}
3410 movlps\t{%1, %0|%0, %1}"
3411 [(set_attr "type" "ssemov")
3412 (set_attr "mode" "V2SF,V4SF,V2SF")])
3414 (define_expand "sse_loadlps_exp"
3415 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3417 (match_operand:V2SF 2 "nonimmediate_operand" "")
3419 (match_operand:V4SF 1 "nonimmediate_operand" "")
3420 (parallel [(const_int 2) (const_int 3)]))))]
3422 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3424 (define_insn "*avx_loadlps"
3425 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3427 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3429 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3430 (parallel [(const_int 2) (const_int 3)]))))]
3433 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3434 vmovlps\t{%2, %1, %0|%0, %1, %2}
3435 vmovlps\t{%2, %0|%0, %2}"
3436 [(set_attr "type" "sselog,ssemov,ssemov")
3437 (set_attr "prefix" "vex")
3438 (set_attr "mode" "V4SF,V2SF,V2SF")])
3440 (define_insn "sse_loadlps"
3441 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3443 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3445 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3446 (parallel [(const_int 2) (const_int 3)]))))]
3449 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3450 movlps\t{%2, %0|%0, %2}
3451 movlps\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sselog,ssemov,ssemov")
3453 (set_attr "mode" "V4SF,V2SF,V2SF")])
3455 (define_insn "*avx_movss"
3456 [(set (match_operand:V4SF 0 "register_operand" "=x")
3458 (match_operand:V4SF 2 "register_operand" "x")
3459 (match_operand:V4SF 1 "register_operand" "x")
3462 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3463 [(set_attr "type" "ssemov")
3464 (set_attr "prefix" "vex")
3465 (set_attr "mode" "SF")])
3467 (define_insn "sse_movss"
3468 [(set (match_operand:V4SF 0 "register_operand" "=x")
3470 (match_operand:V4SF 2 "register_operand" "x")
3471 (match_operand:V4SF 1 "register_operand" "0")
3474 "movss\t{%2, %0|%0, %2}"
3475 [(set_attr "type" "ssemov")
3476 (set_attr "mode" "SF")])
3478 (define_insn "*vec_dupv4sf_avx"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x")
3481 (match_operand:SF 1 "register_operand" "x")))]
3483 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3484 [(set_attr "type" "sselog1")
3485 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")])
3488 (define_insn "*vec_dupv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x")
3491 (match_operand:SF 1 "register_operand" "0")))]
3493 "shufps\t{$0, %0, %0|%0, %0, 0}"
3494 [(set_attr "type" "sselog1")
3495 (set_attr "mode" "V4SF")])
3497 (define_insn "*vec_concatv2sf_avx"
3498 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3500 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3501 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3504 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3505 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3506 vmovss\t{%1, %0|%0, %1}
3507 punpckldq\t{%2, %0|%0, %2}
3508 movd\t{%1, %0|%0, %1}"
3509 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3510 (set (attr "prefix")
3511 (if_then_else (eq_attr "alternative" "3,4")
3512 (const_string "orig")
3513 (const_string "vex")))
3514 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3516 ;; Although insertps takes register source, we prefer
3517 ;; unpcklps with register source since it is shorter.
3518 (define_insn "*vec_concatv2sf_sse4_1"
3519 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3521 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3522 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3525 unpcklps\t{%2, %0|%0, %2}
3526 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3527 movss\t{%1, %0|%0, %1}
3528 punpckldq\t{%2, %0|%0, %2}
3529 movd\t{%1, %0|%0, %1}"
3530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3531 (set_attr "prefix_extra" "*,1,*,*,*")
3532 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3534 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3535 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3536 ;; alternatives pretty much forces the MMX alternative to be chosen.
3537 (define_insn "*vec_concatv2sf_sse"
3538 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3540 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3541 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3544 unpcklps\t{%2, %0|%0, %2}
3545 movss\t{%1, %0|%0, %1}
3546 punpckldq\t{%2, %0|%0, %2}
3547 movd\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3549 (set_attr "mode" "V4SF,SF,DI,DI")])
3551 (define_insn "*vec_concatv4sf_avx"
3552 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3554 (match_operand:V2SF 1 "register_operand" " x,x")
3555 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3558 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3559 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3560 [(set_attr "type" "ssemov")
3561 (set_attr "prefix" "vex")
3562 (set_attr "mode" "V4SF,V2SF")])
3564 (define_insn "*vec_concatv4sf_sse"
3565 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3567 (match_operand:V2SF 1 "register_operand" " 0,0")
3568 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3571 movlhps\t{%2, %0|%0, %2}
3572 movhps\t{%2, %0|%0, %2}"
3573 [(set_attr "type" "ssemov")
3574 (set_attr "mode" "V4SF,V2SF")])
3576 (define_expand "vec_init<mode>"
3577 [(match_operand:SSEMODE 0 "register_operand" "")
3578 (match_operand 1 "" "")]
3581 ix86_expand_vector_init (false, operands[0], operands[1]);
3585 (define_insn "*vec_setv4sf_0_avx"
3586 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3589 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3590 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3594 vmovss\t{%2, %1, %0|%0, %1, %2}
3595 vmovss\t{%2, %0|%0, %2}
3596 vmovd\t{%2, %0|%0, %2}
3598 [(set_attr "type" "ssemov")
3599 (set_attr "prefix" "vex")
3600 (set_attr "mode" "SF")])
3602 (define_insn "vec_setv4sf_0"
3603 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3606 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3607 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3611 movss\t{%2, %0|%0, %2}
3612 movss\t{%2, %0|%0, %2}
3613 movd\t{%2, %0|%0, %2}
3615 [(set_attr "type" "ssemov")
3616 (set_attr "mode" "SF")])
3618 ;; A subset is vec_setv4sf.
3619 (define_insn "*vec_setv4sf_avx"
3620 [(set (match_operand:V4SF 0 "register_operand" "=x")
3623 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3624 (match_operand:V4SF 1 "register_operand" "x")
3625 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3628 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3629 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3631 [(set_attr "type" "sselog")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V4SF")])
3635 (define_insn "*vec_setv4sf_sse4_1"
3636 [(set (match_operand:V4SF 0 "register_operand" "=x")
3639 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3640 (match_operand:V4SF 1 "register_operand" "0")
3641 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3644 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3645 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3647 [(set_attr "type" "sselog")
3648 (set_attr "prefix_extra" "1")
3649 (set_attr "mode" "V4SF")])
3651 (define_insn "*avx_insertps"
3652 [(set (match_operand:V4SF 0 "register_operand" "=x")
3653 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3654 (match_operand:V4SF 1 "register_operand" "x")
3655 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3658 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3659 [(set_attr "type" "sselog")
3660 (set_attr "prefix" "vex")
3661 (set_attr "mode" "V4SF")])
3663 (define_insn "sse4_1_insertps"
3664 [(set (match_operand:V4SF 0 "register_operand" "=x")
3665 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3666 (match_operand:V4SF 1 "register_operand" "0")
3667 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3670 "insertps\t{%3, %2, %0|%0, %2, %3}";
3671 [(set_attr "type" "sselog")
3672 (set_attr "prefix_extra" "1")
3673 (set_attr "mode" "V4SF")])
3676 [(set (match_operand:V4SF 0 "memory_operand" "")
3679 (match_operand:SF 1 "nonmemory_operand" ""))
3682 "TARGET_SSE && reload_completed"
3685 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3689 (define_expand "vec_set<mode>"
3690 [(match_operand:SSEMODE 0 "register_operand" "")
3691 (match_operand:<ssescalarmode> 1 "register_operand" "")
3692 (match_operand 2 "const_int_operand" "")]
3695 ix86_expand_vector_set (false, operands[0], operands[1],
3696 INTVAL (operands[2]));
3700 (define_insn_and_split "*vec_extractv4sf_0"
3701 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3703 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3704 (parallel [(const_int 0)])))]
3705 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3707 "&& reload_completed"
3710 rtx op1 = operands[1];
3712 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3714 op1 = gen_lowpart (SFmode, op1);
3715 emit_move_insn (operands[0], op1);
3719 (define_expand "avx_vextractf128<mode>"
3720 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3721 (match_operand:AVX256MODE 1 "register_operand" "")
3722 (match_operand:SI 2 "const_0_to_1_operand" "")]
3725 switch (INTVAL (operands[2]))
3728 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3731 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3739 (define_insn "vec_extract_lo_<mode>"
3740 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3741 (vec_select:<avxhalfvecmode>
3742 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3743 (parallel [(const_int 0) (const_int 1)])))]
3745 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3746 [(set_attr "type" "sselog")
3747 (set_attr "memory" "none,store")
3748 (set_attr "prefix" "vex")
3749 (set_attr "mode" "V8SF")])
3751 (define_insn "vec_extract_hi_<mode>"
3752 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3753 (vec_select:<avxhalfvecmode>
3754 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3755 (parallel [(const_int 2) (const_int 3)])))]
3757 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3758 [(set_attr "type" "sselog")
3759 (set_attr "memory" "none,store")
3760 (set_attr "prefix" "vex")
3761 (set_attr "mode" "V8SF")])
3763 (define_insn "vec_extract_lo_<mode>"
3764 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3765 (vec_select:<avxhalfvecmode>
3766 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3767 (parallel [(const_int 0) (const_int 1)
3768 (const_int 2) (const_int 3)])))]
3770 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3771 [(set_attr "type" "sselog")
3772 (set_attr "memory" "none,store")
3773 (set_attr "prefix" "vex")
3774 (set_attr "mode" "V8SF")])
3776 (define_insn "vec_extract_hi_<mode>"
3777 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3778 (vec_select:<avxhalfvecmode>
3779 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3780 (parallel [(const_int 4) (const_int 5)
3781 (const_int 6) (const_int 7)])))]
3783 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3784 [(set_attr "type" "sselog")
3785 (set_attr "memory" "none,store")
3786 (set_attr "prefix" "vex")
3787 (set_attr "mode" "V8SF")])
3789 (define_insn "vec_extract_lo_v16hi"
3790 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3792 (match_operand:V16HI 1 "register_operand" "x,x")
3793 (parallel [(const_int 0) (const_int 1)
3794 (const_int 2) (const_int 3)
3795 (const_int 4) (const_int 5)
3796 (const_int 6) (const_int 7)])))]
3798 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3799 [(set_attr "type" "sselog")
3800 (set_attr "memory" "none,store")
3801 (set_attr "prefix" "vex")
3802 (set_attr "mode" "V8SF")])
3804 (define_insn "vec_extract_hi_v16hi"
3805 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3807 (match_operand:V16HI 1 "register_operand" "x,x")
3808 (parallel [(const_int 8) (const_int 9)
3809 (const_int 10) (const_int 11)
3810 (const_int 12) (const_int 13)
3811 (const_int 14) (const_int 15)])))]
3813 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3814 [(set_attr "type" "sselog")
3815 (set_attr "memory" "none,store")
3816 (set_attr "prefix" "vex")
3817 (set_attr "mode" "V8SF")])
3819 (define_insn "vec_extract_lo_v32qi"
3820 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3822 (match_operand:V32QI 1 "register_operand" "x,x")
3823 (parallel [(const_int 0) (const_int 1)
3824 (const_int 2) (const_int 3)
3825 (const_int 4) (const_int 5)
3826 (const_int 6) (const_int 7)
3827 (const_int 8) (const_int 9)
3828 (const_int 10) (const_int 11)
3829 (const_int 12) (const_int 13)
3830 (const_int 14) (const_int 15)])))]
3832 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3833 [(set_attr "type" "sselog")
3834 (set_attr "memory" "none,store")
3835 (set_attr "prefix" "vex")
3836 (set_attr "mode" "V8SF")])
3838 (define_insn "vec_extract_hi_v32qi"
3839 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3841 (match_operand:V32QI 1 "register_operand" "x,x")
3842 (parallel [(const_int 16) (const_int 17)
3843 (const_int 18) (const_int 19)
3844 (const_int 20) (const_int 21)
3845 (const_int 22) (const_int 23)
3846 (const_int 24) (const_int 25)
3847 (const_int 26) (const_int 27)
3848 (const_int 28) (const_int 29)
3849 (const_int 30) (const_int 31)])))]
3851 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3852 [(set_attr "type" "sselog")
3853 (set_attr "memory" "none,store")
3854 (set_attr "prefix" "vex")
3855 (set_attr "mode" "V8SF")])
3857 (define_insn "*sse4_1_extractps"
3858 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3860 (match_operand:V4SF 1 "register_operand" "x")
3861 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3863 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3864 [(set_attr "type" "sselog")
3865 (set_attr "prefix_extra" "1")
3866 (set_attr "prefix" "maybe_vex")
3867 (set_attr "mode" "V4SF")])
3869 (define_insn_and_split "*vec_extract_v4sf_mem"
3870 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3872 (match_operand:V4SF 1 "memory_operand" "o")
3873 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3879 int i = INTVAL (operands[2]);
3881 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3885 (define_expand "vec_extract<mode>"
3886 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3887 (match_operand:SSEMODE 1 "register_operand" "")
3888 (match_operand 2 "const_int_operand" "")]
3891 ix86_expand_vector_extract (false, operands[0], operands[1],
3892 INTVAL (operands[2]));
3896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3898 ;; Parallel double-precision floating point element swizzling
3900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3902 (define_insn "avx_unpckhpd256"
3903 [(set (match_operand:V4DF 0 "register_operand" "=x")
3906 (match_operand:V4DF 1 "register_operand" "x")
3907 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3908 (parallel [(const_int 1) (const_int 5)
3909 (const_int 3) (const_int 7)])))]
3911 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3912 [(set_attr "type" "sselog")
3913 (set_attr "prefix" "vex")
3914 (set_attr "mode" "V4DF")])
3916 (define_expand "sse2_unpckhpd_exp"
3917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
3920 (match_operand:V2DF 1 "nonimmediate_operand" "")
3921 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3922 (parallel [(const_int 1)
3925 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
3927 (define_insn "*avx_unpckhpd"
3928 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3931 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
3932 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
3933 (parallel [(const_int 1)
3935 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3937 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3938 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3939 vmovhpd\t{%1, %0|%0, %1}"
3940 [(set_attr "type" "sselog,ssemov,ssemov")
3941 (set_attr "prefix" "vex")
3942 (set_attr "mode" "V2DF,V1DF,V1DF")])
3944 (define_insn "sse2_unpckhpd"
3945 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3948 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3949 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3950 (parallel [(const_int 1)
3952 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3954 unpckhpd\t{%2, %0|%0, %2}
3955 movlpd\t{%H1, %0|%0, %H1}
3956 movhpd\t{%1, %0|%0, %1}"
3957 [(set_attr "type" "sselog,ssemov,ssemov")
3958 (set_attr "mode" "V2DF,V1DF,V1DF")])
3960 (define_insn "avx_movddup256"
3961 [(set (match_operand:V4DF 0 "register_operand" "=x")
3964 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
3966 (parallel [(const_int 0) (const_int 2)
3967 (const_int 4) (const_int 6)])))]
3969 "vmovddup\t{%1, %0|%0, %1}"
3970 [(set_attr "type" "sselog1")
3971 (set_attr "prefix" "vex")
3972 (set_attr "mode" "V4DF")])
3974 (define_insn "*avx_movddup"
3975 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3978 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3980 (parallel [(const_int 0)
3982 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3984 vmovddup\t{%1, %0|%0, %1}
3986 [(set_attr "type" "sselog1,ssemov")
3987 (set_attr "prefix" "vex")
3988 (set_attr "mode" "V2DF")])
3990 (define_insn "*sse3_movddup"
3991 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3994 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3996 (parallel [(const_int 0)
3998 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4000 movddup\t{%1, %0|%0, %1}
4002 [(set_attr "type" "sselog1,ssemov")
4003 (set_attr "mode" "V2DF")])
4006 [(set (match_operand:V2DF 0 "memory_operand" "")
4009 (match_operand:V2DF 1 "register_operand" "")
4011 (parallel [(const_int 0)
4013 "TARGET_SSE3 && reload_completed"
4016 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4017 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4018 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4022 (define_insn "avx_unpcklpd256"
4023 [(set (match_operand:V4DF 0 "register_operand" "=x")
4026 (match_operand:V4DF 1 "register_operand" "x")
4027 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4028 (parallel [(const_int 0) (const_int 4)
4029 (const_int 2) (const_int 6)])))]
4031 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4032 [(set_attr "type" "sselog")
4033 (set_attr "prefix" "vex")
4034 (set_attr "mode" "V4DF")])
4036 (define_expand "sse2_unpcklpd_exp"
4037 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4040 (match_operand:V2DF 1 "nonimmediate_operand" "")
4041 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4042 (parallel [(const_int 0)
4045 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4047 (define_insn "*avx_unpcklpd"
4048 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4051 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4052 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4053 (parallel [(const_int 0)
4055 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4057 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4058 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4059 vmovlpd\t{%2, %H0|%H0, %2}"
4060 [(set_attr "type" "sselog,ssemov,ssemov")
4061 (set_attr "prefix" "vex")
4062 (set_attr "mode" "V2DF,V1DF,V1DF")])
4064 (define_insn "sse2_unpcklpd"
4065 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4068 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4069 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4070 (parallel [(const_int 0)
4072 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4074 unpcklpd\t{%2, %0|%0, %2}
4075 movhpd\t{%2, %0|%0, %2}
4076 movlpd\t{%2, %H0|%H0, %2}"
4077 [(set_attr "type" "sselog,ssemov,ssemov")
4078 (set_attr "mode" "V2DF,V1DF,V1DF")])
4080 (define_expand "avx_shufpd256"
4081 [(match_operand:V4DF 0 "register_operand" "")
4082 (match_operand:V4DF 1 "register_operand" "")
4083 (match_operand:V4DF 2 "nonimmediate_operand" "")
4084 (match_operand:SI 3 "const_int_operand" "")]
4087 int mask = INTVAL (operands[3]);
4088 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4090 GEN_INT (mask & 2 ? 5 : 4),
4091 GEN_INT (mask & 4 ? 3 : 2),
4092 GEN_INT (mask & 8 ? 7 : 6)));
4096 (define_insn "avx_shufpd256_1"
4097 [(set (match_operand:V4DF 0 "register_operand" "=x")
4100 (match_operand:V4DF 1 "register_operand" "x")
4101 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4102 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4103 (match_operand 4 "const_4_to_5_operand" "")
4104 (match_operand 5 "const_2_to_3_operand" "")
4105 (match_operand 6 "const_6_to_7_operand" "")])))]
4109 mask = INTVAL (operands[3]);
4110 mask |= (INTVAL (operands[4]) - 4) << 1;
4111 mask |= (INTVAL (operands[5]) - 2) << 2;
4112 mask |= (INTVAL (operands[6]) - 6) << 3;
4113 operands[3] = GEN_INT (mask);
4115 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4117 [(set_attr "type" "sselog")
4118 (set_attr "prefix" "vex")
4119 (set_attr "mode" "V4DF")])
4121 (define_expand "sse2_shufpd"
4122 [(match_operand:V2DF 0 "register_operand" "")
4123 (match_operand:V2DF 1 "register_operand" "")
4124 (match_operand:V2DF 2 "nonimmediate_operand" "")
4125 (match_operand:SI 3 "const_int_operand" "")]
4128 int mask = INTVAL (operands[3]);
4129 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4131 GEN_INT (mask & 2 ? 3 : 2)));
4135 (define_expand "vec_extract_even<mode>"
4136 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4137 (vec_select:SSEMODE4S
4138 (vec_concat:<ssedoublesizemode>
4139 (match_operand:SSEMODE4S 1 "register_operand" "")
4140 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4141 (parallel [(const_int 0)
4147 (define_expand "vec_extract_odd<mode>"
4148 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4149 (vec_select:SSEMODE4S
4150 (vec_concat:<ssedoublesizemode>
4151 (match_operand:SSEMODE4S 1 "register_operand" "")
4152 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4153 (parallel [(const_int 1)
4159 (define_expand "vec_extract_even<mode>"
4160 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4161 (vec_select:SSEMODE2D
4162 (vec_concat:<ssedoublesizemode>
4163 (match_operand:SSEMODE2D 1 "register_operand" "")
4164 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4165 (parallel [(const_int 0)
4169 (define_expand "vec_extract_odd<mode>"
4170 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4171 (vec_select:SSEMODE2D
4172 (vec_concat:<ssedoublesizemode>
4173 (match_operand:SSEMODE2D 1 "register_operand" "")
4174 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4175 (parallel [(const_int 1)
4179 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4180 (define_insn "*avx_punpckhqdq"
4181 [(set (match_operand:V2DI 0 "register_operand" "=x")
4184 (match_operand:V2DI 1 "register_operand" "x")
4185 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4186 (parallel [(const_int 1)
4189 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4190 [(set_attr "type" "sselog")
4191 (set_attr "prefix" "vex")
4192 (set_attr "mode" "TI")])
4194 (define_insn "sse2_punpckhqdq"
4195 [(set (match_operand:V2DI 0 "register_operand" "=x")
4198 (match_operand:V2DI 1 "register_operand" "0")
4199 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4200 (parallel [(const_int 1)
4203 "punpckhqdq\t{%2, %0|%0, %2}"
4204 [(set_attr "type" "sselog")
4205 (set_attr "prefix_data16" "1")
4206 (set_attr "mode" "TI")])
4208 (define_insn "*avx_punpcklqdq"
4209 [(set (match_operand:V2DI 0 "register_operand" "=x")
4212 (match_operand:V2DI 1 "register_operand" "x")
4213 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4214 (parallel [(const_int 0)
4217 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4218 [(set_attr "type" "sselog")
4219 (set_attr "prefix" "vex")
4220 (set_attr "mode" "TI")])
4222 (define_insn "sse2_punpcklqdq"
4223 [(set (match_operand:V2DI 0 "register_operand" "=x")
4226 (match_operand:V2DI 1 "register_operand" "0")
4227 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4228 (parallel [(const_int 0)
4231 "punpcklqdq\t{%2, %0|%0, %2}"
4232 [(set_attr "type" "sselog")
4233 (set_attr "prefix_data16" "1")
4234 (set_attr "mode" "TI")])
4236 (define_insn "*avx_shufpd_<mode>"
4237 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4238 (vec_select:SSEMODE2D
4239 (vec_concat:<ssedoublesizemode>
4240 (match_operand:SSEMODE2D 1 "register_operand" "x")
4241 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4242 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4243 (match_operand 4 "const_2_to_3_operand" "")])))]
4247 mask = INTVAL (operands[3]);
4248 mask |= (INTVAL (operands[4]) - 2) << 1;
4249 operands[3] = GEN_INT (mask);
4251 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4253 [(set_attr "type" "sselog")
4254 (set_attr "prefix" "vex")
4255 (set_attr "mode" "V2DF")])
4257 (define_insn "sse2_shufpd_<mode>"
4258 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4259 (vec_select:SSEMODE2D
4260 (vec_concat:<ssedoublesizemode>
4261 (match_operand:SSEMODE2D 1 "register_operand" "0")
4262 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4263 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4264 (match_operand 4 "const_2_to_3_operand" "")])))]
4268 mask = INTVAL (operands[3]);
4269 mask |= (INTVAL (operands[4]) - 2) << 1;
4270 operands[3] = GEN_INT (mask);
4272 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4274 [(set_attr "type" "sselog")
4275 (set_attr "mode" "V2DF")])
4277 ;; Avoid combining registers from different units in a single alternative,
4278 ;; see comment above inline_secondary_memory_needed function in i386.c
4279 (define_insn "*avx_storehpd"
4280 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4282 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4283 (parallel [(const_int 1)])))]
4284 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4286 vmovhpd\t{%1, %0|%0, %1}
4287 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4291 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4292 (set_attr "prefix" "vex")
4293 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4295 (define_insn "sse2_storehpd"
4296 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4298 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4299 (parallel [(const_int 1)])))]
4300 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4302 movhpd\t{%1, %0|%0, %1}
4307 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4308 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4311 [(set (match_operand:DF 0 "register_operand" "")
4313 (match_operand:V2DF 1 "memory_operand" "")
4314 (parallel [(const_int 1)])))]
4315 "TARGET_SSE2 && reload_completed"
4316 [(set (match_dup 0) (match_dup 1))]
4318 operands[1] = adjust_address (operands[1], DFmode, 8);
4321 ;; Avoid combining registers from different units in a single alternative,
4322 ;; see comment above inline_secondary_memory_needed function in i386.c
4323 (define_insn "sse2_storelpd"
4324 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4326 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4327 (parallel [(const_int 0)])))]
4328 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4330 %vmovlpd\t{%1, %0|%0, %1}
4335 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4336 (set_attr "prefix" "maybe_vex")
4337 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4340 [(set (match_operand:DF 0 "register_operand" "")
4342 (match_operand:V2DF 1 "nonimmediate_operand" "")
4343 (parallel [(const_int 0)])))]
4344 "TARGET_SSE2 && reload_completed"
4347 rtx op1 = operands[1];
4349 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4351 op1 = gen_lowpart (DFmode, op1);
4352 emit_move_insn (operands[0], op1);
4356 (define_expand "sse2_loadhpd_exp"
4357 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4360 (match_operand:V2DF 1 "nonimmediate_operand" "")
4361 (parallel [(const_int 0)]))
4362 (match_operand:DF 2 "nonimmediate_operand" "")))]
4364 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4366 ;; Avoid combining registers from different units in a single alternative,
4367 ;; see comment above inline_secondary_memory_needed function in i386.c
4368 (define_insn "*avx_loadhpd"
4369 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4372 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4373 (parallel [(const_int 0)]))
4374 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4375 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4377 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4378 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4382 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4383 (set_attr "prefix" "vex")
4384 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4386 (define_insn "sse2_loadhpd"
4387 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4390 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4391 (parallel [(const_int 0)]))
4392 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4393 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4395 movhpd\t{%2, %0|%0, %2}
4396 unpcklpd\t{%2, %0|%0, %2}
4397 shufpd\t{$1, %1, %0|%0, %1, 1}
4401 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4402 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4405 [(set (match_operand:V2DF 0 "memory_operand" "")
4407 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4408 (match_operand:DF 1 "register_operand" "")))]
4409 "TARGET_SSE2 && reload_completed"
4410 [(set (match_dup 0) (match_dup 1))]
4412 operands[0] = adjust_address (operands[0], DFmode, 8);
4415 (define_expand "sse2_loadlpd_exp"
4416 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4418 (match_operand:DF 2 "nonimmediate_operand" "")
4420 (match_operand:V2DF 1 "nonimmediate_operand" "")
4421 (parallel [(const_int 1)]))))]
4423 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4425 ;; Avoid combining registers from different units in a single alternative,
4426 ;; see comment above inline_secondary_memory_needed function in i386.c
4427 (define_insn "*avx_loadlpd"
4428 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4430 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4432 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4433 (parallel [(const_int 1)]))))]
4434 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4436 vmovsd\t{%2, %0|%0, %2}
4437 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4438 vmovsd\t{%2, %1, %0|%0, %1, %2}
4439 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4443 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4444 (set_attr "prefix" "vex")
4445 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4447 (define_insn "sse2_loadlpd"
4448 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4450 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4452 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4453 (parallel [(const_int 1)]))))]
4454 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4456 movsd\t{%2, %0|%0, %2}
4457 movlpd\t{%2, %0|%0, %2}
4458 movsd\t{%2, %0|%0, %2}
4459 shufpd\t{$2, %2, %0|%0, %2, 2}
4460 movhpd\t{%H1, %0|%0, %H1}
4464 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4465 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4468 [(set (match_operand:V2DF 0 "memory_operand" "")
4470 (match_operand:DF 1 "register_operand" "")
4471 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4472 "TARGET_SSE2 && reload_completed"
4473 [(set (match_dup 0) (match_dup 1))]
4475 operands[0] = adjust_address (operands[0], DFmode, 8);
4478 ;; Not sure these two are ever used, but it doesn't hurt to have
4480 (define_insn "*vec_extractv2df_1_sse"
4481 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4483 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4484 (parallel [(const_int 1)])))]
4485 "!TARGET_SSE2 && TARGET_SSE
4486 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4488 movhps\t{%1, %0|%0, %1}
4489 movhlps\t{%1, %0|%0, %1}
4490 movlps\t{%H1, %0|%0, %H1}"
4491 [(set_attr "type" "ssemov")
4492 (set_attr "mode" "V2SF,V4SF,V2SF")])
4494 (define_insn "*vec_extractv2df_0_sse"
4495 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4497 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4498 (parallel [(const_int 0)])))]
4499 "!TARGET_SSE2 && TARGET_SSE
4500 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4502 movlps\t{%1, %0|%0, %1}
4503 movaps\t{%1, %0|%0, %1}
4504 movlps\t{%1, %0|%0, %1}"
4505 [(set_attr "type" "ssemov")
4506 (set_attr "mode" "V2SF,V4SF,V2SF")])
4508 (define_insn "*avx_movsd"
4509 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4511 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4512 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4516 vmovsd\t{%2, %1, %0|%0, %1, %2}
4517 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4518 vmovlpd\t{%2, %0|%0, %2}
4519 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4520 vmovhps\t{%1, %H0|%H0, %1}"
4521 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4522 (set_attr "prefix" "vex")
4523 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4525 (define_insn "sse2_movsd"
4526 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4528 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4529 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4533 movsd\t{%2, %0|%0, %2}
4534 movlpd\t{%2, %0|%0, %2}
4535 movlpd\t{%2, %0|%0, %2}
4536 shufpd\t{$2, %2, %0|%0, %2, 2}
4537 movhps\t{%H1, %0|%0, %H1}
4538 movhps\t{%1, %H0|%H0, %1}"
4539 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4540 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4542 (define_insn "*vec_dupv2df_sse3"
4543 [(set (match_operand:V2DF 0 "register_operand" "=x")
4545 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4547 "%vmovddup\t{%1, %0|%0, %1}"
4548 [(set_attr "type" "sselog1")
4549 (set_attr "prefix" "maybe_vex")
4550 (set_attr "mode" "DF")])
4552 (define_insn "vec_dupv2df"
4553 [(set (match_operand:V2DF 0 "register_operand" "=x")
4555 (match_operand:DF 1 "register_operand" "0")))]
4558 [(set_attr "type" "sselog1")
4559 (set_attr "mode" "V2DF")])
4561 (define_insn "*vec_concatv2df_sse3"
4562 [(set (match_operand:V2DF 0 "register_operand" "=x")
4564 (match_operand:DF 1 "nonimmediate_operand" "xm")
4567 "%vmovddup\t{%1, %0|%0, %1}"
4568 [(set_attr "type" "sselog1")
4569 (set_attr "prefix" "maybe_vex")
4570 (set_attr "mode" "DF")])
4572 (define_insn "*vec_concatv2df_avx"
4573 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4575 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4576 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4579 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4580 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4581 vmovsd\t{%1, %0|%0, %1}"
4582 [(set_attr "type" "ssemov")
4583 (set_attr "prefix" "vex")
4584 (set_attr "mode" "DF,V1DF,DF")])
4586 (define_insn "*vec_concatv2df"
4587 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4589 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4590 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4593 unpcklpd\t{%2, %0|%0, %2}
4594 movhpd\t{%2, %0|%0, %2}
4595 movsd\t{%1, %0|%0, %1}
4596 movlhps\t{%2, %0|%0, %2}
4597 movhps\t{%2, %0|%0, %2}"
4598 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4599 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4603 ;; Parallel integral arithmetic
4605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4607 (define_expand "neg<mode>2"
4608 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4611 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4613 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4615 (define_expand "<plusminus_insn><mode>3"
4616 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4618 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4619 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4621 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4623 (define_insn "*avx_<plusminus_insn><mode>3"
4624 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4626 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4627 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4628 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4629 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4630 [(set_attr "type" "sseiadd")
4631 (set_attr "prefix" "vex")
4632 (set_attr "mode" "TI")])
4634 (define_insn "*<plusminus_insn><mode>3"
4635 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4637 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4638 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4639 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4640 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4641 [(set_attr "type" "sseiadd")
4642 (set_attr "prefix_data16" "1")
4643 (set_attr "mode" "TI")])
4645 (define_expand "sse2_<plusminus_insn><mode>3"
4646 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4647 (sat_plusminus:SSEMODE12
4648 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4649 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4651 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4653 (define_insn "*avx_<plusminus_insn><mode>3"
4654 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4655 (sat_plusminus:SSEMODE12
4656 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4657 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4658 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4659 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4660 [(set_attr "type" "sseiadd")
4661 (set_attr "prefix" "vex")
4662 (set_attr "mode" "TI")])
4664 (define_insn "*sse2_<plusminus_insn><mode>3"
4665 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4666 (sat_plusminus:SSEMODE12
4667 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4668 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4669 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4670 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4671 [(set_attr "type" "sseiadd")
4672 (set_attr "prefix_data16" "1")
4673 (set_attr "mode" "TI")])
4675 (define_insn_and_split "mulv16qi3"
4676 [(set (match_operand:V16QI 0 "register_operand" "")
4677 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4678 (match_operand:V16QI 2 "register_operand" "")))]
4680 && !(reload_completed || reload_in_progress)"
4685 rtx t[12], op0, op[3];
4690 /* On SSE5, we can take advantage of the pperm instruction to pack and
4691 unpack the bytes. Unpack data such that we've got a source byte in
4692 each low byte of each word. We don't care what goes into the high
4693 byte, so put 0 there. */
4694 for (i = 0; i < 6; ++i)
4695 t[i] = gen_reg_rtx (V8HImode);
4697 for (i = 0; i < 2; i++)
4700 op[1] = operands[i+1];
4701 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4704 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4707 /* Multiply words. */
4708 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4709 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4711 /* Pack the low byte of each word back into a single xmm */
4712 op[0] = operands[0];
4715 ix86_expand_sse5_pack (op);
4719 for (i = 0; i < 12; ++i)
4720 t[i] = gen_reg_rtx (V16QImode);
4722 /* Unpack data such that we've got a source byte in each low byte of
4723 each word. We don't care what goes into the high byte of each word.
4724 Rather than trying to get zero in there, most convenient is to let
4725 it be a copy of the low byte. */
4726 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4727 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4728 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4729 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4731 /* Multiply words. The end-of-line annotations here give a picture of what
4732 the output of that instruction looks like. Dot means don't care; the
4733 letters are the bytes of the result with A being the most significant. */
4734 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4735 gen_lowpart (V8HImode, t[0]),
4736 gen_lowpart (V8HImode, t[1])));
4737 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4738 gen_lowpart (V8HImode, t[2]),
4739 gen_lowpart (V8HImode, t[3])));
4741 /* Extract the relevant bytes and merge them back together. */
4742 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4743 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4744 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4745 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4746 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4747 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4750 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4754 (define_expand "mulv8hi3"
4755 [(set (match_operand:V8HI 0 "register_operand" "")
4756 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4757 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4759 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4761 (define_insn "*avx_mulv8hi3"
4762 [(set (match_operand:V8HI 0 "register_operand" "=x")
4763 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4764 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4765 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4766 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4767 [(set_attr "type" "sseimul")
4768 (set_attr "prefix" "vex")
4769 (set_attr "mode" "TI")])
4771 (define_insn "*mulv8hi3"
4772 [(set (match_operand:V8HI 0 "register_operand" "=x")
4773 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4774 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4775 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4776 "pmullw\t{%2, %0|%0, %2}"
4777 [(set_attr "type" "sseimul")
4778 (set_attr "prefix_data16" "1")
4779 (set_attr "mode" "TI")])
4781 (define_expand "smulv8hi3_highpart"
4782 [(set (match_operand:V8HI 0 "register_operand" "")
4787 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4789 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4792 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4794 (define_insn "*avxv8hi3_highpart"
4795 [(set (match_operand:V8HI 0 "register_operand" "=x")
4800 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4802 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4804 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4805 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4806 [(set_attr "type" "sseimul")
4807 (set_attr "prefix" "vex")
4808 (set_attr "mode" "TI")])
4810 (define_insn "*smulv8hi3_highpart"
4811 [(set (match_operand:V8HI 0 "register_operand" "=x")
4816 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4818 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4820 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4821 "pmulhw\t{%2, %0|%0, %2}"
4822 [(set_attr "type" "sseimul")
4823 (set_attr "prefix_data16" "1")
4824 (set_attr "mode" "TI")])
4826 (define_expand "umulv8hi3_highpart"
4827 [(set (match_operand:V8HI 0 "register_operand" "")
4832 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4834 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4837 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4839 (define_insn "*avx_umulv8hi3_highpart"
4840 [(set (match_operand:V8HI 0 "register_operand" "=x")
4845 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4847 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4849 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4850 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4851 [(set_attr "type" "sseimul")
4852 (set_attr "prefix" "vex")
4853 (set_attr "mode" "TI")])
4855 (define_insn "*umulv8hi3_highpart"
4856 [(set (match_operand:V8HI 0 "register_operand" "=x")
4861 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4863 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4865 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4866 "pmulhuw\t{%2, %0|%0, %2}"
4867 [(set_attr "type" "sseimul")
4868 (set_attr "prefix_data16" "1")
4869 (set_attr "mode" "TI")])
4871 (define_expand "sse2_umulv2siv2di3"
4872 [(set (match_operand:V2DI 0 "register_operand" "")
4876 (match_operand:V4SI 1 "nonimmediate_operand" "")
4877 (parallel [(const_int 0) (const_int 2)])))
4880 (match_operand:V4SI 2 "nonimmediate_operand" "")
4881 (parallel [(const_int 0) (const_int 2)])))))]
4883 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4885 (define_insn "*avx_umulv2siv2di3"
4886 [(set (match_operand:V2DI 0 "register_operand" "=x")
4890 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4891 (parallel [(const_int 0) (const_int 2)])))
4894 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4895 (parallel [(const_int 0) (const_int 2)])))))]
4896 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4897 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4898 [(set_attr "type" "sseimul")
4899 (set_attr "prefix" "vex")
4900 (set_attr "mode" "TI")])
4902 (define_insn "*sse2_umulv2siv2di3"
4903 [(set (match_operand:V2DI 0 "register_operand" "=x")
4907 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4908 (parallel [(const_int 0) (const_int 2)])))
4911 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4912 (parallel [(const_int 0) (const_int 2)])))))]
4913 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4914 "pmuludq\t{%2, %0|%0, %2}"
4915 [(set_attr "type" "sseimul")
4916 (set_attr "prefix_data16" "1")
4917 (set_attr "mode" "TI")])
4919 (define_expand "sse4_1_mulv2siv2di3"
4920 [(set (match_operand:V2DI 0 "register_operand" "")
4924 (match_operand:V4SI 1 "nonimmediate_operand" "")
4925 (parallel [(const_int 0) (const_int 2)])))
4928 (match_operand:V4SI 2 "nonimmediate_operand" "")
4929 (parallel [(const_int 0) (const_int 2)])))))]
4931 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4933 (define_insn "*avx_mulv2siv2di3"
4934 [(set (match_operand:V2DI 0 "register_operand" "=x")
4938 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4939 (parallel [(const_int 0) (const_int 2)])))
4942 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4943 (parallel [(const_int 0) (const_int 2)])))))]
4944 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4945 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4946 [(set_attr "type" "sseimul")
4947 (set_attr "prefix" "vex")
4948 (set_attr "mode" "TI")])
4950 (define_insn "*sse4_1_mulv2siv2di3"
4951 [(set (match_operand:V2DI 0 "register_operand" "=x")
4955 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4956 (parallel [(const_int 0) (const_int 2)])))
4959 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4960 (parallel [(const_int 0) (const_int 2)])))))]
4961 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4962 "pmuldq\t{%2, %0|%0, %2}"
4963 [(set_attr "type" "sseimul")
4964 (set_attr "prefix_extra" "1")
4965 (set_attr "mode" "TI")])
4967 (define_expand "sse2_pmaddwd"
4968 [(set (match_operand:V4SI 0 "register_operand" "")
4973 (match_operand:V8HI 1 "nonimmediate_operand" "")
4974 (parallel [(const_int 0)
4980 (match_operand:V8HI 2 "nonimmediate_operand" "")
4981 (parallel [(const_int 0)
4987 (vec_select:V4HI (match_dup 1)
4988 (parallel [(const_int 1)
4993 (vec_select:V4HI (match_dup 2)
4994 (parallel [(const_int 1)
4997 (const_int 7)]))))))]
4999 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5001 (define_insn "*avx_pmaddwd"
5002 [(set (match_operand:V4SI 0 "register_operand" "=x")
5007 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5008 (parallel [(const_int 0)
5014 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5015 (parallel [(const_int 0)
5021 (vec_select:V4HI (match_dup 1)
5022 (parallel [(const_int 1)
5027 (vec_select:V4HI (match_dup 2)
5028 (parallel [(const_int 1)
5031 (const_int 7)]))))))]
5032 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5033 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5034 [(set_attr "type" "sseiadd")
5035 (set_attr "prefix" "vex")
5036 (set_attr "mode" "TI")])
5038 (define_insn "*sse2_pmaddwd"
5039 [(set (match_operand:V4SI 0 "register_operand" "=x")
5044 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5045 (parallel [(const_int 0)
5051 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5052 (parallel [(const_int 0)
5058 (vec_select:V4HI (match_dup 1)
5059 (parallel [(const_int 1)
5064 (vec_select:V4HI (match_dup 2)
5065 (parallel [(const_int 1)
5068 (const_int 7)]))))))]
5069 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5070 "pmaddwd\t{%2, %0|%0, %2}"
5071 [(set_attr "type" "sseiadd")
5072 (set_attr "prefix_data16" "1")
5073 (set_attr "mode" "TI")])
5075 (define_expand "mulv4si3"
5076 [(set (match_operand:V4SI 0 "register_operand" "")
5077 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5078 (match_operand:V4SI 2 "register_operand" "")))]
5081 if (TARGET_SSE4_1 || TARGET_SSE5)
5082 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5085 (define_insn "*avx_mulv4si3"
5086 [(set (match_operand:V4SI 0 "register_operand" "=x")
5087 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5088 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5089 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5090 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5091 [(set_attr "type" "sseimul")
5092 (set_attr "prefix" "vex")
5093 (set_attr "mode" "TI")])
5095 (define_insn "*sse4_1_mulv4si3"
5096 [(set (match_operand:V4SI 0 "register_operand" "=x")
5097 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5098 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5099 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5100 "pmulld\t{%2, %0|%0, %2}"
5101 [(set_attr "type" "sseimul")
5102 (set_attr "prefix_extra" "1")
5103 (set_attr "mode" "TI")])
5105 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5106 ;; multiply/add. In general, we expect the define_split to occur before
5107 ;; register allocation, so we have to handle the corner case where the target
5108 ;; is the same as one of the inputs.
5109 (define_insn_and_split "*sse5_mulv4si3"
5110 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5111 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5112 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5115 "&& (reload_completed
5116 || (!reg_mentioned_p (operands[0], operands[1])
5117 && !reg_mentioned_p (operands[0], operands[2])))"
5121 (plus:V4SI (mult:V4SI (match_dup 1)
5125 operands[3] = CONST0_RTX (V4SImode);
5127 [(set_attr "type" "ssemuladd")
5128 (set_attr "mode" "TI")])
5130 (define_insn_and_split "*sse2_mulv4si3"
5131 [(set (match_operand:V4SI 0 "register_operand" "")
5132 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5133 (match_operand:V4SI 2 "register_operand" "")))]
5134 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5135 && !(reload_completed || reload_in_progress)"
5140 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5146 t1 = gen_reg_rtx (V4SImode);
5147 t2 = gen_reg_rtx (V4SImode);
5148 t3 = gen_reg_rtx (V4SImode);
5149 t4 = gen_reg_rtx (V4SImode);
5150 t5 = gen_reg_rtx (V4SImode);
5151 t6 = gen_reg_rtx (V4SImode);
5152 thirtytwo = GEN_INT (32);
5154 /* Multiply elements 2 and 0. */
5155 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5158 /* Shift both input vectors down one element, so that elements 3
5159 and 1 are now in the slots for elements 2 and 0. For K8, at
5160 least, this is faster than using a shuffle. */
5161 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5162 gen_lowpart (TImode, op1),
5164 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5165 gen_lowpart (TImode, op2),
5167 /* Multiply elements 3 and 1. */
5168 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5171 /* Move the results in element 2 down to element 1; we don't care
5172 what goes in elements 2 and 3. */
5173 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5174 const0_rtx, const0_rtx));
5175 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5176 const0_rtx, const0_rtx));
5178 /* Merge the parts back together. */
5179 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5183 (define_insn_and_split "mulv2di3"
5184 [(set (match_operand:V2DI 0 "register_operand" "")
5185 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5186 (match_operand:V2DI 2 "register_operand" "")))]
5188 && !(reload_completed || reload_in_progress)"
5193 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5198 /* op1: A,B,C,D, op2: E,F,G,H */
5200 op1 = gen_lowpart (V4SImode, operands[1]);
5201 op2 = gen_lowpart (V4SImode, operands[2]);
5202 t1 = gen_reg_rtx (V4SImode);
5203 t2 = gen_reg_rtx (V4SImode);
5204 t3 = gen_reg_rtx (V4SImode);
5205 t4 = gen_reg_rtx (V2DImode);
5206 t5 = gen_reg_rtx (V2DImode);
5209 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5216 emit_move_insn (t2, CONST0_RTX (V4SImode));
5218 /* t3: (B*E),(A*F),(D*G),(C*H) */
5219 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5221 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5222 emit_insn (gen_sse5_phadddq (t4, t3));
5224 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5225 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5227 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5228 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5235 t1 = gen_reg_rtx (V2DImode);
5236 t2 = gen_reg_rtx (V2DImode);
5237 t3 = gen_reg_rtx (V2DImode);
5238 t4 = gen_reg_rtx (V2DImode);
5239 t5 = gen_reg_rtx (V2DImode);
5240 t6 = gen_reg_rtx (V2DImode);
5241 thirtytwo = GEN_INT (32);
5243 /* Multiply low parts. */
5244 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5245 gen_lowpart (V4SImode, op2)));
5247 /* Shift input vectors left 32 bits so we can multiply high parts. */
5248 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5249 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5251 /* Multiply high parts by low parts. */
5252 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5253 gen_lowpart (V4SImode, t3)));
5254 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5255 gen_lowpart (V4SImode, t2)));
5257 /* Shift them back. */
5258 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5259 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5261 /* Add the three parts together. */
5262 emit_insn (gen_addv2di3 (t6, t1, t4));
5263 emit_insn (gen_addv2di3 (op0, t6, t5));
5267 (define_expand "vec_widen_smult_hi_v8hi"
5268 [(match_operand:V4SI 0 "register_operand" "")
5269 (match_operand:V8HI 1 "register_operand" "")
5270 (match_operand:V8HI 2 "register_operand" "")]
5273 rtx op1, op2, t1, t2, dest;
5277 t1 = gen_reg_rtx (V8HImode);
5278 t2 = gen_reg_rtx (V8HImode);
5279 dest = gen_lowpart (V8HImode, operands[0]);
5281 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5282 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5283 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5287 (define_expand "vec_widen_smult_lo_v8hi"
5288 [(match_operand:V4SI 0 "register_operand" "")
5289 (match_operand:V8HI 1 "register_operand" "")
5290 (match_operand:V8HI 2 "register_operand" "")]
5293 rtx op1, op2, t1, t2, dest;
5297 t1 = gen_reg_rtx (V8HImode);
5298 t2 = gen_reg_rtx (V8HImode);
5299 dest = gen_lowpart (V8HImode, operands[0]);
5301 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5302 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5303 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5307 (define_expand "vec_widen_umult_hi_v8hi"
5308 [(match_operand:V4SI 0 "register_operand" "")
5309 (match_operand:V8HI 1 "register_operand" "")
5310 (match_operand:V8HI 2 "register_operand" "")]
5313 rtx op1, op2, t1, t2, dest;
5317 t1 = gen_reg_rtx (V8HImode);
5318 t2 = gen_reg_rtx (V8HImode);
5319 dest = gen_lowpart (V8HImode, operands[0]);
5321 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5322 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5323 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5327 (define_expand "vec_widen_umult_lo_v8hi"
5328 [(match_operand:V4SI 0 "register_operand" "")
5329 (match_operand:V8HI 1 "register_operand" "")
5330 (match_operand:V8HI 2 "register_operand" "")]
5333 rtx op1, op2, t1, t2, dest;
5337 t1 = gen_reg_rtx (V8HImode);
5338 t2 = gen_reg_rtx (V8HImode);
5339 dest = gen_lowpart (V8HImode, operands[0]);
5341 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5342 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5343 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5347 (define_expand "vec_widen_smult_hi_v4si"
5348 [(match_operand:V2DI 0 "register_operand" "")
5349 (match_operand:V4SI 1 "register_operand" "")
5350 (match_operand:V4SI 2 "register_operand" "")]
5355 t1 = gen_reg_rtx (V4SImode);
5356 t2 = gen_reg_rtx (V4SImode);
5358 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5363 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5368 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5372 (define_expand "vec_widen_smult_lo_v4si"
5373 [(match_operand:V2DI 0 "register_operand" "")
5374 (match_operand:V4SI 1 "register_operand" "")
5375 (match_operand:V4SI 2 "register_operand" "")]
5380 t1 = gen_reg_rtx (V4SImode);
5381 t2 = gen_reg_rtx (V4SImode);
5383 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5388 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5393 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5398 (define_expand "vec_widen_umult_hi_v4si"
5399 [(match_operand:V2DI 0 "register_operand" "")
5400 (match_operand:V4SI 1 "register_operand" "")
5401 (match_operand:V4SI 2 "register_operand" "")]
5404 rtx op1, op2, t1, t2;
5408 t1 = gen_reg_rtx (V4SImode);
5409 t2 = gen_reg_rtx (V4SImode);
5411 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5412 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5413 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5417 (define_expand "vec_widen_umult_lo_v4si"
5418 [(match_operand:V2DI 0 "register_operand" "")
5419 (match_operand:V4SI 1 "register_operand" "")
5420 (match_operand:V4SI 2 "register_operand" "")]
5423 rtx op1, op2, t1, t2;
5427 t1 = gen_reg_rtx (V4SImode);
5428 t2 = gen_reg_rtx (V4SImode);
5430 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5431 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5432 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5436 (define_expand "sdot_prodv8hi"
5437 [(match_operand:V4SI 0 "register_operand" "")
5438 (match_operand:V8HI 1 "register_operand" "")
5439 (match_operand:V8HI 2 "register_operand" "")
5440 (match_operand:V4SI 3 "register_operand" "")]
5443 rtx t = gen_reg_rtx (V4SImode);
5444 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5445 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5449 (define_expand "udot_prodv4si"
5450 [(match_operand:V2DI 0 "register_operand" "")
5451 (match_operand:V4SI 1 "register_operand" "")
5452 (match_operand:V4SI 2 "register_operand" "")
5453 (match_operand:V2DI 3 "register_operand" "")]
5458 t1 = gen_reg_rtx (V2DImode);
5459 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5460 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5462 t2 = gen_reg_rtx (V4SImode);
5463 t3 = gen_reg_rtx (V4SImode);
5464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5465 gen_lowpart (TImode, operands[1]),
5467 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5468 gen_lowpart (TImode, operands[2]),
5471 t4 = gen_reg_rtx (V2DImode);
5472 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5474 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5478 (define_insn "*avx_ashr<mode>3"
5479 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5481 (match_operand:SSEMODE24 1 "register_operand" "x")
5482 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5484 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5485 [(set_attr "type" "sseishft")
5486 (set_attr "prefix" "vex")
5487 (set_attr "mode" "TI")])
5489 (define_insn "ashr<mode>3"
5490 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5492 (match_operand:SSEMODE24 1 "register_operand" "0")
5493 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5495 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5496 [(set_attr "type" "sseishft")
5497 (set_attr "prefix_data16" "1")
5498 (set_attr "mode" "TI")])
5500 (define_insn "*avx_lshr<mode>3"
5501 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5502 (lshiftrt:SSEMODE248
5503 (match_operand:SSEMODE248 1 "register_operand" "x")
5504 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5506 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5507 [(set_attr "type" "sseishft")
5508 (set_attr "prefix" "vex")
5509 (set_attr "mode" "TI")])
5511 (define_insn "lshr<mode>3"
5512 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5513 (lshiftrt:SSEMODE248
5514 (match_operand:SSEMODE248 1 "register_operand" "0")
5515 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5517 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseishft")
5519 (set_attr "prefix_data16" "1")
5520 (set_attr "mode" "TI")])
5522 (define_insn "*avx_ashl<mode>3"
5523 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5525 (match_operand:SSEMODE248 1 "register_operand" "x")
5526 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5528 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5529 [(set_attr "type" "sseishft")
5530 (set_attr "prefix" "vex")
5531 (set_attr "mode" "TI")])
5533 (define_insn "ashl<mode>3"
5534 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5536 (match_operand:SSEMODE248 1 "register_operand" "0")
5537 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5539 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5540 [(set_attr "type" "sseishft")
5541 (set_attr "prefix_data16" "1")
5542 (set_attr "mode" "TI")])
5544 (define_expand "vec_shl_<mode>"
5545 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5546 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5547 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5550 operands[0] = gen_lowpart (TImode, operands[0]);
5551 operands[1] = gen_lowpart (TImode, operands[1]);
5554 (define_expand "vec_shr_<mode>"
5555 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5556 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5557 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5560 operands[0] = gen_lowpart (TImode, operands[0]);
5561 operands[1] = gen_lowpart (TImode, operands[1]);
5564 (define_insn "*avx_<code><mode>3"
5565 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5567 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5568 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5569 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5570 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5571 [(set_attr "type" "sseiadd")
5572 (set_attr "prefix" "vex")
5573 (set_attr "mode" "TI")])
5575 (define_expand "<code>v16qi3"
5576 [(set (match_operand:V16QI 0 "register_operand" "")
5578 (match_operand:V16QI 1 "nonimmediate_operand" "")
5579 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5581 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5583 (define_insn "*<code>v16qi3"
5584 [(set (match_operand:V16QI 0 "register_operand" "=x")
5586 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5587 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5588 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5589 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5590 [(set_attr "type" "sseiadd")
5591 (set_attr "prefix_data16" "1")
5592 (set_attr "mode" "TI")])
5594 (define_expand "<code>v8hi3"
5595 [(set (match_operand:V8HI 0 "register_operand" "")
5597 (match_operand:V8HI 1 "nonimmediate_operand" "")
5598 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5600 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5602 (define_insn "*<code>v8hi3"
5603 [(set (match_operand:V8HI 0 "register_operand" "=x")
5605 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5606 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5607 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5608 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5609 [(set_attr "type" "sseiadd")
5610 (set_attr "prefix_data16" "1")
5611 (set_attr "mode" "TI")])
5613 (define_expand "umaxv8hi3"
5614 [(set (match_operand:V8HI 0 "register_operand" "")
5615 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5616 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5620 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5623 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5624 if (rtx_equal_p (op3, op2))
5625 op3 = gen_reg_rtx (V8HImode);
5626 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5627 emit_insn (gen_addv8hi3 (op0, op3, op2));
5632 (define_expand "smax<mode>3"
5633 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5634 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5635 (match_operand:SSEMODE14 2 "register_operand" "")))]
5639 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5645 xops[0] = operands[0];
5646 xops[1] = operands[1];
5647 xops[2] = operands[2];
5648 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5649 xops[4] = operands[1];
5650 xops[5] = operands[2];
5651 ok = ix86_expand_int_vcond (xops);
5657 (define_insn "*sse4_1_<code><mode>3"
5658 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5660 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5661 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5662 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5663 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5664 [(set_attr "type" "sseiadd")
5665 (set_attr "prefix_extra" "1")
5666 (set_attr "mode" "TI")])
5668 (define_expand "umaxv4si3"
5669 [(set (match_operand:V4SI 0 "register_operand" "")
5670 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5671 (match_operand:V4SI 2 "register_operand" "")))]
5675 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5681 xops[0] = operands[0];
5682 xops[1] = operands[1];
5683 xops[2] = operands[2];
5684 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5685 xops[4] = operands[1];
5686 xops[5] = operands[2];
5687 ok = ix86_expand_int_vcond (xops);
5693 (define_insn "*sse4_1_<code><mode>3"
5694 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5696 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5697 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5698 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5699 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5700 [(set_attr "type" "sseiadd")
5701 (set_attr "prefix_extra" "1")
5702 (set_attr "mode" "TI")])
5704 (define_expand "smin<mode>3"
5705 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5706 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5707 (match_operand:SSEMODE14 2 "register_operand" "")))]
5711 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5717 xops[0] = operands[0];
5718 xops[1] = operands[2];
5719 xops[2] = operands[1];
5720 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5721 xops[4] = operands[1];
5722 xops[5] = operands[2];
5723 ok = ix86_expand_int_vcond (xops);
5729 (define_expand "umin<mode>3"
5730 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5731 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5732 (match_operand:SSEMODE24 2 "register_operand" "")))]
5736 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5742 xops[0] = operands[0];
5743 xops[1] = operands[2];
5744 xops[2] = operands[1];
5745 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5746 xops[4] = operands[1];
5747 xops[5] = operands[2];
5748 ok = ix86_expand_int_vcond (xops);
5754 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5756 ;; Parallel integral comparisons
5758 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5760 (define_expand "sse2_eq<mode>3"
5761 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5763 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5764 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5765 "TARGET_SSE2 && !TARGET_SSE5"
5766 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5768 (define_insn "*avx_eq<mode>3"
5769 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5771 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5772 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5773 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5774 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5775 [(set_attr "type" "ssecmp")
5776 (set_attr "prefix" "vex")
5777 (set_attr "mode" "TI")])
5779 (define_insn "*sse2_eq<mode>3"
5780 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5782 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5783 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5784 "TARGET_SSE2 && !TARGET_SSE5
5785 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5786 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5787 [(set_attr "type" "ssecmp")
5788 (set_attr "prefix_data16" "1")
5789 (set_attr "mode" "TI")])
5791 (define_expand "sse4_1_eqv2di3"
5792 [(set (match_operand:V2DI 0 "register_operand" "")
5794 (match_operand:V2DI 1 "nonimmediate_operand" "")
5795 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5797 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5799 (define_insn "*sse4_1_eqv2di3"
5800 [(set (match_operand:V2DI 0 "register_operand" "=x")
5802 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5803 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5804 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5805 "pcmpeqq\t{%2, %0|%0, %2}"
5806 [(set_attr "type" "ssecmp")
5807 (set_attr "prefix_extra" "1")
5808 (set_attr "mode" "TI")])
5810 (define_insn "*avx_gt<mode>3"
5811 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5813 (match_operand:SSEMODE1248 1 "register_operand" "x")
5814 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5816 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5817 [(set_attr "type" "ssecmp")
5818 (set_attr "prefix" "vex")
5819 (set_attr "mode" "TI")])
5821 (define_insn "sse2_gt<mode>3"
5822 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5824 (match_operand:SSEMODE124 1 "register_operand" "0")
5825 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5826 "TARGET_SSE2 && !TARGET_SSE5"
5827 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5828 [(set_attr "type" "ssecmp")
5829 (set_attr "prefix_data16" "1")
5830 (set_attr "mode" "TI")])
5832 (define_insn "sse4_2_gtv2di3"
5833 [(set (match_operand:V2DI 0 "register_operand" "=x")
5835 (match_operand:V2DI 1 "register_operand" "0")
5836 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5838 "pcmpgtq\t{%2, %0|%0, %2}"
5839 [(set_attr "type" "ssecmp")
5840 (set_attr "mode" "TI")])
5842 (define_expand "vcond<mode>"
5843 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5844 (if_then_else:SSEMODE124C8
5845 (match_operator 3 ""
5846 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
5847 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
5848 (match_operand:SSEMODE124C8 1 "general_operand" "")
5849 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
5852 bool ok = ix86_expand_int_vcond (operands);
5857 (define_expand "vcondu<mode>"
5858 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5859 (if_then_else:SSEMODE124C8
5860 (match_operator 3 ""
5861 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
5862 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
5863 (match_operand:SSEMODE124C8 1 "general_operand" "")
5864 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
5867 bool ok = ix86_expand_int_vcond (operands);
5872 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5874 ;; Parallel bitwise logical operations
5876 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5878 (define_expand "one_cmpl<mode>2"
5879 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5880 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5884 int i, n = GET_MODE_NUNITS (<MODE>mode);
5885 rtvec v = rtvec_alloc (n);
5887 for (i = 0; i < n; ++i)
5888 RTVEC_ELT (v, i) = constm1_rtx;
5890 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5893 (define_insn "*avx_andnot<mode>3"
5894 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5896 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5897 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5899 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5900 [(set_attr "type" "sselog")
5901 (set_attr "prefix" "vex")
5902 (set_attr "mode" "<avxvecpsmode>")])
5904 (define_insn "*sse_andnot<mode>3"
5905 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5907 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5908 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5909 "(TARGET_SSE && !TARGET_SSE2)"
5910 "andnps\t{%2, %0|%0, %2}"
5911 [(set_attr "type" "sselog")
5912 (set_attr "mode" "V4SF")])
5914 (define_insn "*avx_andnot<mode>3"
5915 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5917 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5918 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5920 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5921 [(set_attr "type" "sselog")
5922 (set_attr "prefix" "vex")
5923 (set_attr "mode" "TI")])
5925 (define_insn "sse2_andnot<mode>3"
5926 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5928 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5929 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5931 "pandn\t{%2, %0|%0, %2}"
5932 [(set_attr "type" "sselog")
5933 (set_attr "prefix_data16" "1")
5934 (set_attr "mode" "TI")])
5936 (define_insn "*andnottf3"
5937 [(set (match_operand:TF 0 "register_operand" "=x")
5939 (not:TF (match_operand:TF 1 "register_operand" "0"))
5940 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5942 "pandn\t{%2, %0|%0, %2}"
5943 [(set_attr "type" "sselog")
5944 (set_attr "prefix_data16" "1")
5945 (set_attr "mode" "TI")])
5947 (define_expand "<code><mode>3"
5948 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5950 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5951 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5953 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5955 (define_insn "*avx_<code><mode>3"
5956 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5958 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5959 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5961 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5962 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5963 [(set_attr "type" "sselog")
5964 (set_attr "prefix" "vex")
5965 (set_attr "mode" "<avxvecpsmode>")])
5967 (define_insn "*sse_<code><mode>3"
5968 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5970 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5971 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5972 "(TARGET_SSE && !TARGET_SSE2)
5973 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5974 "<plogicprefix>ps\t{%2, %0|%0, %2}"
5975 [(set_attr "type" "sselog")
5976 (set_attr "mode" "V4SF")])
5978 (define_insn "*avx_<code><mode>3"
5979 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5981 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5982 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5985 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
5986 [(set_attr "type" "sselog")
5987 (set_attr "prefix" "vex")
5988 (set_attr "mode" "TI")])
5990 (define_insn "*sse2_<code><mode>3"
5991 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5993 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5994 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5995 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5996 "p<plogicprefix>\t{%2, %0|%0, %2}"
5997 [(set_attr "type" "sselog")
5998 (set_attr "prefix_data16" "1")
5999 (set_attr "mode" "TI")])
6001 (define_expand "<code>tf3"
6002 [(set (match_operand:TF 0 "register_operand" "")
6004 (match_operand:TF 1 "nonimmediate_operand" "")
6005 (match_operand:TF 2 "nonimmediate_operand" "")))]
6007 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6009 (define_insn "*<code>tf3"
6010 [(set (match_operand:TF 0 "register_operand" "=x")
6012 (match_operand:TF 1 "nonimmediate_operand" "%0")
6013 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6014 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6015 "p<plogicprefix>\t{%2, %0|%0, %2}"
6016 [(set_attr "type" "sselog")
6017 (set_attr "prefix_data16" "1")
6018 (set_attr "mode" "TI")])
6020 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6022 ;; Parallel integral element swizzling
6024 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6027 ;; op1 = abcdefghijklmnop
6028 ;; op2 = qrstuvwxyz012345
6029 ;; h1 = aqbrcsdteufvgwhx
6030 ;; l1 = iyjzk0l1m2n3o4p5
6031 ;; h2 = aiqybjrzcks0dlt1
6032 ;; l2 = emu2fnv3gow4hpx5
6033 ;; h3 = aeimquy2bfjnrvz3
6034 ;; l3 = cgkosw04dhlptx15
6035 ;; result = bdfhjlnprtvxz135
6036 (define_expand "vec_pack_trunc_v8hi"
6037 [(match_operand:V16QI 0 "register_operand" "")
6038 (match_operand:V8HI 1 "register_operand" "")
6039 (match_operand:V8HI 2 "register_operand" "")]
6042 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6046 ix86_expand_sse5_pack (operands);
6050 op1 = gen_lowpart (V16QImode, operands[1]);
6051 op2 = gen_lowpart (V16QImode, operands[2]);
6052 h1 = gen_reg_rtx (V16QImode);
6053 l1 = gen_reg_rtx (V16QImode);
6054 h2 = gen_reg_rtx (V16QImode);
6055 l2 = gen_reg_rtx (V16QImode);
6056 h3 = gen_reg_rtx (V16QImode);
6057 l3 = gen_reg_rtx (V16QImode);
6059 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6060 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6061 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6062 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6063 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6064 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6065 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6076 ;; result = bdfhjlnp
6077 (define_expand "vec_pack_trunc_v4si"
6078 [(match_operand:V8HI 0 "register_operand" "")
6079 (match_operand:V4SI 1 "register_operand" "")
6080 (match_operand:V4SI 2 "register_operand" "")]
6083 rtx op1, op2, h1, l1, h2, l2;
6087 ix86_expand_sse5_pack (operands);
6091 op1 = gen_lowpart (V8HImode, operands[1]);
6092 op2 = gen_lowpart (V8HImode, operands[2]);
6093 h1 = gen_reg_rtx (V8HImode);
6094 l1 = gen_reg_rtx (V8HImode);
6095 h2 = gen_reg_rtx (V8HImode);
6096 l2 = gen_reg_rtx (V8HImode);
6098 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6099 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6100 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6101 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6102 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6112 (define_expand "vec_pack_trunc_v2di"
6113 [(match_operand:V4SI 0 "register_operand" "")
6114 (match_operand:V2DI 1 "register_operand" "")
6115 (match_operand:V2DI 2 "register_operand" "")]
6118 rtx op1, op2, h1, l1;
6122 ix86_expand_sse5_pack (operands);
6126 op1 = gen_lowpart (V4SImode, operands[1]);
6127 op2 = gen_lowpart (V4SImode, operands[2]);
6128 h1 = gen_reg_rtx (V4SImode);
6129 l1 = gen_reg_rtx (V4SImode);
6131 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6132 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6133 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6137 (define_expand "vec_interleave_highv16qi"
6138 [(set (match_operand:V16QI 0 "register_operand" "")
6141 (match_operand:V16QI 1 "register_operand" "")
6142 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6143 (parallel [(const_int 8) (const_int 24)
6144 (const_int 9) (const_int 25)
6145 (const_int 10) (const_int 26)
6146 (const_int 11) (const_int 27)
6147 (const_int 12) (const_int 28)
6148 (const_int 13) (const_int 29)
6149 (const_int 14) (const_int 30)
6150 (const_int 15) (const_int 31)])))]
6153 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6157 (define_expand "vec_interleave_lowv16qi"
6158 [(set (match_operand:V16QI 0 "register_operand" "")
6161 (match_operand:V16QI 1 "register_operand" "")
6162 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6163 (parallel [(const_int 0) (const_int 16)
6164 (const_int 1) (const_int 17)
6165 (const_int 2) (const_int 18)
6166 (const_int 3) (const_int 19)
6167 (const_int 4) (const_int 20)
6168 (const_int 5) (const_int 21)
6169 (const_int 6) (const_int 22)
6170 (const_int 7) (const_int 23)])))]
6173 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6177 (define_expand "vec_interleave_highv8hi"
6178 [(set (match_operand:V8HI 0 "register_operand" "=")
6181 (match_operand:V8HI 1 "register_operand" "")
6182 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6183 (parallel [(const_int 4) (const_int 12)
6184 (const_int 5) (const_int 13)
6185 (const_int 6) (const_int 14)
6186 (const_int 7) (const_int 15)])))]
6189 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6193 (define_expand "vec_interleave_lowv8hi"
6194 [(set (match_operand:V8HI 0 "register_operand" "")
6197 (match_operand:V8HI 1 "register_operand" "")
6198 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6199 (parallel [(const_int 0) (const_int 8)
6200 (const_int 1) (const_int 9)
6201 (const_int 2) (const_int 10)
6202 (const_int 3) (const_int 11)])))]
6205 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6209 (define_expand "vec_interleave_highv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "")
6213 (match_operand:V4SI 1 "register_operand" "")
6214 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6215 (parallel [(const_int 2) (const_int 6)
6216 (const_int 3) (const_int 7)])))]
6219 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6223 (define_expand "vec_interleave_lowv4si"
6224 [(set (match_operand:V4SI 0 "register_operand" "")
6227 (match_operand:V4SI 1 "register_operand" "")
6228 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6229 (parallel [(const_int 0) (const_int 4)
6230 (const_int 1) (const_int 5)])))]
6233 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6237 (define_expand "vec_interleave_highv2di"
6238 [(set (match_operand:V2DI 0 "register_operand" "")
6241 (match_operand:V2DI 1 "register_operand" "")
6242 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6243 (parallel [(const_int 1)
6247 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6251 (define_expand "vec_interleave_lowv2di"
6252 [(set (match_operand:V2DI 0 "register_operand" "")
6255 (match_operand:V2DI 1 "register_operand" "")
6256 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6257 (parallel [(const_int 0)
6261 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6265 (define_expand "vec_interleave_highv4sf"
6266 [(set (match_operand:V4SF 0 "register_operand" "")
6269 (match_operand:V4SF 1 "register_operand" "")
6270 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6271 (parallel [(const_int 2) (const_int 6)
6272 (const_int 3) (const_int 7)])))]
6275 (define_expand "vec_interleave_lowv4sf"
6276 [(set (match_operand:V4SF 0 "register_operand" "")
6279 (match_operand:V4SF 1 "register_operand" "")
6280 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6281 (parallel [(const_int 0) (const_int 4)
6282 (const_int 1) (const_int 5)])))]
6285 (define_expand "vec_interleave_highv2df"
6286 [(set (match_operand:V2DF 0 "register_operand" "")
6289 (match_operand:V2DF 1 "register_operand" "")
6290 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6291 (parallel [(const_int 1)
6295 (define_expand "vec_interleave_lowv2df"
6296 [(set (match_operand:V2DF 0 "register_operand" "")
6299 (match_operand:V2DF 1 "register_operand" "")
6300 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6301 (parallel [(const_int 0)
6305 (define_insn "*avx_packsswb"
6306 [(set (match_operand:V16QI 0 "register_operand" "=x")
6309 (match_operand:V8HI 1 "register_operand" "x"))
6311 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6313 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6314 [(set_attr "type" "sselog")
6315 (set_attr "prefix" "vex")
6316 (set_attr "mode" "TI")])
6318 (define_insn "sse2_packsswb"
6319 [(set (match_operand:V16QI 0 "register_operand" "=x")
6322 (match_operand:V8HI 1 "register_operand" "0"))
6324 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6326 "packsswb\t{%2, %0|%0, %2}"
6327 [(set_attr "type" "sselog")
6328 (set_attr "prefix_data16" "1")
6329 (set_attr "mode" "TI")])
6331 (define_insn "*avx_packssdw"
6332 [(set (match_operand:V8HI 0 "register_operand" "=x")
6335 (match_operand:V4SI 1 "register_operand" "x"))
6337 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6339 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6340 [(set_attr "type" "sselog")
6341 (set_attr "prefix" "vex")
6342 (set_attr "mode" "TI")])
6344 (define_insn "sse2_packssdw"
6345 [(set (match_operand:V8HI 0 "register_operand" "=x")
6348 (match_operand:V4SI 1 "register_operand" "0"))
6350 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6352 "packssdw\t{%2, %0|%0, %2}"
6353 [(set_attr "type" "sselog")
6354 (set_attr "prefix_data16" "1")
6355 (set_attr "mode" "TI")])
6357 (define_insn "*avx_packuswb"
6358 [(set (match_operand:V16QI 0 "register_operand" "=x")
6361 (match_operand:V8HI 1 "register_operand" "x"))
6363 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6365 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6366 [(set_attr "type" "sselog")
6367 (set_attr "prefix" "vex")
6368 (set_attr "mode" "TI")])
6370 (define_insn "sse2_packuswb"
6371 [(set (match_operand:V16QI 0 "register_operand" "=x")
6374 (match_operand:V8HI 1 "register_operand" "0"))
6376 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6378 "packuswb\t{%2, %0|%0, %2}"
6379 [(set_attr "type" "sselog")
6380 (set_attr "prefix_data16" "1")
6381 (set_attr "mode" "TI")])
6383 (define_insn "*avx_punpckhbw"
6384 [(set (match_operand:V16QI 0 "register_operand" "=x")
6387 (match_operand:V16QI 1 "register_operand" "x")
6388 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6389 (parallel [(const_int 8) (const_int 24)
6390 (const_int 9) (const_int 25)
6391 (const_int 10) (const_int 26)
6392 (const_int 11) (const_int 27)
6393 (const_int 12) (const_int 28)
6394 (const_int 13) (const_int 29)
6395 (const_int 14) (const_int 30)
6396 (const_int 15) (const_int 31)])))]
6398 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6399 [(set_attr "type" "sselog")
6400 (set_attr "prefix" "vex")
6401 (set_attr "mode" "TI")])
6403 (define_insn "sse2_punpckhbw"
6404 [(set (match_operand:V16QI 0 "register_operand" "=x")
6407 (match_operand:V16QI 1 "register_operand" "0")
6408 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6409 (parallel [(const_int 8) (const_int 24)
6410 (const_int 9) (const_int 25)
6411 (const_int 10) (const_int 26)
6412 (const_int 11) (const_int 27)
6413 (const_int 12) (const_int 28)
6414 (const_int 13) (const_int 29)
6415 (const_int 14) (const_int 30)
6416 (const_int 15) (const_int 31)])))]
6418 "punpckhbw\t{%2, %0|%0, %2}"
6419 [(set_attr "type" "sselog")
6420 (set_attr "prefix_data16" "1")
6421 (set_attr "mode" "TI")])
6423 (define_insn "*avx_punpcklbw"
6424 [(set (match_operand:V16QI 0 "register_operand" "=x")
6427 (match_operand:V16QI 1 "register_operand" "x")
6428 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6429 (parallel [(const_int 0) (const_int 16)
6430 (const_int 1) (const_int 17)
6431 (const_int 2) (const_int 18)
6432 (const_int 3) (const_int 19)
6433 (const_int 4) (const_int 20)
6434 (const_int 5) (const_int 21)
6435 (const_int 6) (const_int 22)
6436 (const_int 7) (const_int 23)])))]
6438 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6439 [(set_attr "type" "sselog")
6440 (set_attr "prefix" "vex")
6441 (set_attr "mode" "TI")])
6443 (define_insn "sse2_punpcklbw"
6444 [(set (match_operand:V16QI 0 "register_operand" "=x")
6447 (match_operand:V16QI 1 "register_operand" "0")
6448 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6449 (parallel [(const_int 0) (const_int 16)
6450 (const_int 1) (const_int 17)
6451 (const_int 2) (const_int 18)
6452 (const_int 3) (const_int 19)
6453 (const_int 4) (const_int 20)
6454 (const_int 5) (const_int 21)
6455 (const_int 6) (const_int 22)
6456 (const_int 7) (const_int 23)])))]
6458 "punpcklbw\t{%2, %0|%0, %2}"
6459 [(set_attr "type" "sselog")
6460 (set_attr "prefix_data16" "1")
6461 (set_attr "mode" "TI")])
6463 (define_insn "*avx_punpckhwd"
6464 [(set (match_operand:V8HI 0 "register_operand" "=x")
6467 (match_operand:V8HI 1 "register_operand" "x")
6468 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6469 (parallel [(const_int 4) (const_int 12)
6470 (const_int 5) (const_int 13)
6471 (const_int 6) (const_int 14)
6472 (const_int 7) (const_int 15)])))]
6474 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6475 [(set_attr "type" "sselog")
6476 (set_attr "prefix" "vex")
6477 (set_attr "mode" "TI")])
6479 (define_insn "sse2_punpckhwd"
6480 [(set (match_operand:V8HI 0 "register_operand" "=x")
6483 (match_operand:V8HI 1 "register_operand" "0")
6484 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6485 (parallel [(const_int 4) (const_int 12)
6486 (const_int 5) (const_int 13)
6487 (const_int 6) (const_int 14)
6488 (const_int 7) (const_int 15)])))]
6490 "punpckhwd\t{%2, %0|%0, %2}"
6491 [(set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1")
6493 (set_attr "mode" "TI")])
6495 (define_insn "*avx_punpcklwd"
6496 [(set (match_operand:V8HI 0 "register_operand" "=x")
6499 (match_operand:V8HI 1 "register_operand" "x")
6500 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6501 (parallel [(const_int 0) (const_int 8)
6502 (const_int 1) (const_int 9)
6503 (const_int 2) (const_int 10)
6504 (const_int 3) (const_int 11)])))]
6506 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6507 [(set_attr "type" "sselog")
6508 (set_attr "prefix" "vex")
6509 (set_attr "mode" "TI")])
6511 (define_insn "sse2_punpcklwd"
6512 [(set (match_operand:V8HI 0 "register_operand" "=x")
6515 (match_operand:V8HI 1 "register_operand" "0")
6516 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6517 (parallel [(const_int 0) (const_int 8)
6518 (const_int 1) (const_int 9)
6519 (const_int 2) (const_int 10)
6520 (const_int 3) (const_int 11)])))]
6522 "punpcklwd\t{%2, %0|%0, %2}"
6523 [(set_attr "type" "sselog")
6524 (set_attr "prefix_data16" "1")
6525 (set_attr "mode" "TI")])
6527 (define_insn "*avx_punpckhdq"
6528 [(set (match_operand:V4SI 0 "register_operand" "=x")
6531 (match_operand:V4SI 1 "register_operand" "x")
6532 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6533 (parallel [(const_int 2) (const_int 6)
6534 (const_int 3) (const_int 7)])))]
6536 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6537 [(set_attr "type" "sselog")
6538 (set_attr "prefix" "vex")
6539 (set_attr "mode" "TI")])
6541 (define_insn "sse2_punpckhdq"
6542 [(set (match_operand:V4SI 0 "register_operand" "=x")
6545 (match_operand:V4SI 1 "register_operand" "0")
6546 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6547 (parallel [(const_int 2) (const_int 6)
6548 (const_int 3) (const_int 7)])))]
6550 "punpckhdq\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "prefix_data16" "1")
6553 (set_attr "mode" "TI")])
6555 (define_insn "*avx_punpckldq"
6556 [(set (match_operand:V4SI 0 "register_operand" "=x")
6559 (match_operand:V4SI 1 "register_operand" "x")
6560 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6561 (parallel [(const_int 0) (const_int 4)
6562 (const_int 1) (const_int 5)])))]
6564 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6565 [(set_attr "type" "sselog")
6566 (set_attr "prefix" "vex")
6567 (set_attr "mode" "TI")])
6569 (define_insn "sse2_punpckldq"
6570 [(set (match_operand:V4SI 0 "register_operand" "=x")
6573 (match_operand:V4SI 1 "register_operand" "0")
6574 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6575 (parallel [(const_int 0) (const_int 4)
6576 (const_int 1) (const_int 5)])))]
6578 "punpckldq\t{%2, %0|%0, %2}"
6579 [(set_attr "type" "sselog")
6580 (set_attr "prefix_data16" "1")
6581 (set_attr "mode" "TI")])
6583 (define_insn "*avx_pinsr<avxmodesuffixs>"
6584 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6585 (vec_merge:SSEMODE124
6586 (vec_duplicate:SSEMODE124
6587 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6588 (match_operand:SSEMODE124 1 "register_operand" "x")
6589 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6592 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6593 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6595 [(set_attr "type" "sselog")
6596 (set_attr "prefix" "vex")
6597 (set_attr "mode" "TI")])
6599 (define_insn "*sse4_1_pinsrb"
6600 [(set (match_operand:V16QI 0 "register_operand" "=x")
6602 (vec_duplicate:V16QI
6603 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6604 (match_operand:V16QI 1 "register_operand" "0")
6605 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6608 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6609 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6611 [(set_attr "type" "sselog")
6612 (set_attr "prefix_extra" "1")
6613 (set_attr "mode" "TI")])
6615 (define_insn "*sse2_pinsrw"
6616 [(set (match_operand:V8HI 0 "register_operand" "=x")
6619 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6620 (match_operand:V8HI 1 "register_operand" "0")
6621 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6624 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6625 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6627 [(set_attr "type" "sselog")
6628 (set_attr "prefix_data16" "1")
6629 (set_attr "mode" "TI")])
6631 ;; It must come before sse2_loadld since it is preferred.
6632 (define_insn "*sse4_1_pinsrd"
6633 [(set (match_operand:V4SI 0 "register_operand" "=x")
6636 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6637 (match_operand:V4SI 1 "register_operand" "0")
6638 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6641 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6642 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6644 [(set_attr "type" "sselog")
6645 (set_attr "prefix_extra" "1")
6646 (set_attr "mode" "TI")])
6648 (define_insn "*avx_pinsrq"
6649 [(set (match_operand:V2DI 0 "register_operand" "=x")
6652 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6653 (match_operand:V2DI 1 "register_operand" "x")
6654 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6655 "TARGET_AVX && TARGET_64BIT"
6657 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6658 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6660 [(set_attr "type" "sselog")
6661 (set_attr "prefix" "vex")
6662 (set_attr "mode" "TI")])
6664 (define_insn "*sse4_1_pinsrq"
6665 [(set (match_operand:V2DI 0 "register_operand" "=x")
6668 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6669 (match_operand:V2DI 1 "register_operand" "0")
6670 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6671 "TARGET_SSE4_1 && TARGET_64BIT"
6673 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6674 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6676 [(set_attr "type" "sselog")
6677 (set_attr "prefix_extra" "1")
6678 (set_attr "mode" "TI")])
6680 (define_insn "*sse4_1_pextrb"
6681 [(set (match_operand:SI 0 "register_operand" "=r")
6684 (match_operand:V16QI 1 "register_operand" "x")
6685 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6687 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6688 [(set_attr "type" "sselog")
6689 (set_attr "prefix_extra" "1")
6690 (set_attr "prefix" "maybe_vex")
6691 (set_attr "mode" "TI")])
6693 (define_insn "*sse4_1_pextrb_memory"
6694 [(set (match_operand:QI 0 "memory_operand" "=m")
6696 (match_operand:V16QI 1 "register_operand" "x")
6697 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6699 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6700 [(set_attr "type" "sselog")
6701 (set_attr "prefix_extra" "1")
6702 (set_attr "prefix" "maybe_vex")
6703 (set_attr "mode" "TI")])
6705 (define_insn "*sse2_pextrw"
6706 [(set (match_operand:SI 0 "register_operand" "=r")
6709 (match_operand:V8HI 1 "register_operand" "x")
6710 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6712 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6713 [(set_attr "type" "sselog")
6714 (set_attr "prefix_data16" "1")
6715 (set_attr "prefix" "maybe_vex")
6716 (set_attr "mode" "TI")])
6718 (define_insn "*sse4_1_pextrw_memory"
6719 [(set (match_operand:HI 0 "memory_operand" "=m")
6721 (match_operand:V8HI 1 "register_operand" "x")
6722 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6724 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6725 [(set_attr "type" "sselog")
6726 (set_attr "prefix_extra" "1")
6727 (set_attr "prefix" "maybe_vex")
6728 (set_attr "mode" "TI")])
6730 (define_insn "*sse4_1_pextrd"
6731 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6733 (match_operand:V4SI 1 "register_operand" "x")
6734 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6736 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6737 [(set_attr "type" "sselog")
6738 (set_attr "prefix_extra" "1")
6739 (set_attr "prefix" "maybe_vex")
6740 (set_attr "mode" "TI")])
6742 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6743 (define_insn "*sse4_1_pextrq"
6744 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6746 (match_operand:V2DI 1 "register_operand" "x")
6747 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6748 "TARGET_SSE4_1 && TARGET_64BIT"
6749 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6750 [(set_attr "type" "sselog")
6751 (set_attr "prefix_extra" "1")
6752 (set_attr "prefix" "maybe_vex")
6753 (set_attr "mode" "TI")])
6755 (define_expand "sse2_pshufd"
6756 [(match_operand:V4SI 0 "register_operand" "")
6757 (match_operand:V4SI 1 "nonimmediate_operand" "")
6758 (match_operand:SI 2 "const_int_operand" "")]
6761 int mask = INTVAL (operands[2]);
6762 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6763 GEN_INT ((mask >> 0) & 3),
6764 GEN_INT ((mask >> 2) & 3),
6765 GEN_INT ((mask >> 4) & 3),
6766 GEN_INT ((mask >> 6) & 3)));
6770 (define_insn "sse2_pshufd_1"
6771 [(set (match_operand:V4SI 0 "register_operand" "=x")
6773 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6774 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6775 (match_operand 3 "const_0_to_3_operand" "")
6776 (match_operand 4 "const_0_to_3_operand" "")
6777 (match_operand 5 "const_0_to_3_operand" "")])))]
6781 mask |= INTVAL (operands[2]) << 0;
6782 mask |= INTVAL (operands[3]) << 2;
6783 mask |= INTVAL (operands[4]) << 4;
6784 mask |= INTVAL (operands[5]) << 6;
6785 operands[2] = GEN_INT (mask);
6787 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6789 [(set_attr "type" "sselog1")
6790 (set_attr "prefix_data16" "1")
6791 (set_attr "prefix" "vex")
6792 (set_attr "mode" "TI")])
6794 (define_expand "sse2_pshuflw"
6795 [(match_operand:V8HI 0 "register_operand" "")
6796 (match_operand:V8HI 1 "nonimmediate_operand" "")
6797 (match_operand:SI 2 "const_int_operand" "")]
6800 int mask = INTVAL (operands[2]);
6801 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6802 GEN_INT ((mask >> 0) & 3),
6803 GEN_INT ((mask >> 2) & 3),
6804 GEN_INT ((mask >> 4) & 3),
6805 GEN_INT ((mask >> 6) & 3)));
6809 (define_insn "sse2_pshuflw_1"
6810 [(set (match_operand:V8HI 0 "register_operand" "=x")
6812 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6813 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6814 (match_operand 3 "const_0_to_3_operand" "")
6815 (match_operand 4 "const_0_to_3_operand" "")
6816 (match_operand 5 "const_0_to_3_operand" "")
6824 mask |= INTVAL (operands[2]) << 0;
6825 mask |= INTVAL (operands[3]) << 2;
6826 mask |= INTVAL (operands[4]) << 4;
6827 mask |= INTVAL (operands[5]) << 6;
6828 operands[2] = GEN_INT (mask);
6830 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6832 [(set_attr "type" "sselog")
6833 (set_attr "prefix_rep" "1")
6834 (set_attr "prefix" "maybe_vex")
6835 (set_attr "mode" "TI")])
6837 (define_expand "sse2_pshufhw"
6838 [(match_operand:V8HI 0 "register_operand" "")
6839 (match_operand:V8HI 1 "nonimmediate_operand" "")
6840 (match_operand:SI 2 "const_int_operand" "")]
6843 int mask = INTVAL (operands[2]);
6844 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6845 GEN_INT (((mask >> 0) & 3) + 4),
6846 GEN_INT (((mask >> 2) & 3) + 4),
6847 GEN_INT (((mask >> 4) & 3) + 4),
6848 GEN_INT (((mask >> 6) & 3) + 4)));
6852 (define_insn "sse2_pshufhw_1"
6853 [(set (match_operand:V8HI 0 "register_operand" "=x")
6855 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6856 (parallel [(const_int 0)
6860 (match_operand 2 "const_4_to_7_operand" "")
6861 (match_operand 3 "const_4_to_7_operand" "")
6862 (match_operand 4 "const_4_to_7_operand" "")
6863 (match_operand 5 "const_4_to_7_operand" "")])))]
6867 mask |= (INTVAL (operands[2]) - 4) << 0;
6868 mask |= (INTVAL (operands[3]) - 4) << 2;
6869 mask |= (INTVAL (operands[4]) - 4) << 4;
6870 mask |= (INTVAL (operands[5]) - 4) << 6;
6871 operands[2] = GEN_INT (mask);
6873 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6875 [(set_attr "type" "sselog")
6876 (set_attr "prefix_rep" "1")
6877 (set_attr "prefix" "maybe_vex")
6878 (set_attr "mode" "TI")])
6880 (define_expand "sse2_loadd"
6881 [(set (match_operand:V4SI 0 "register_operand" "")
6884 (match_operand:SI 1 "nonimmediate_operand" ""))
6888 "operands[2] = CONST0_RTX (V4SImode);")
6890 (define_insn "*avx_loadld"
6891 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6894 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6895 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6899 vmovd\t{%2, %0|%0, %2}
6900 vmovd\t{%2, %0|%0, %2}
6901 vmovss\t{%2, %1, %0|%0, %1, %2}"
6902 [(set_attr "type" "ssemov")
6903 (set_attr "prefix" "vex")
6904 (set_attr "mode" "TI,TI,V4SF")])
6906 (define_insn "sse2_loadld"
6907 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6910 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6911 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6915 movd\t{%2, %0|%0, %2}
6916 movd\t{%2, %0|%0, %2}
6917 movss\t{%2, %0|%0, %2}
6918 movss\t{%2, %0|%0, %2}"
6919 [(set_attr "type" "ssemov")
6920 (set_attr "mode" "TI,TI,V4SF,SF")])
6922 (define_insn_and_split "sse2_stored"
6923 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6925 (match_operand:V4SI 1 "register_operand" "x,Yi")
6926 (parallel [(const_int 0)])))]
6929 "&& reload_completed
6930 && (TARGET_INTER_UNIT_MOVES
6931 || MEM_P (operands [0])
6932 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6933 [(set (match_dup 0) (match_dup 1))]
6935 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
6938 (define_insn_and_split "*vec_ext_v4si_mem"
6939 [(set (match_operand:SI 0 "register_operand" "=r")
6941 (match_operand:V4SI 1 "memory_operand" "o")
6942 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6948 int i = INTVAL (operands[2]);
6950 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6954 (define_expand "sse_storeq"
6955 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6957 (match_operand:V2DI 1 "register_operand" "")
6958 (parallel [(const_int 0)])))]
6962 (define_insn "*sse2_storeq_rex64"
6963 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6965 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6966 (parallel [(const_int 0)])))]
6967 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6971 %vmov{q}\t{%1, %0|%0, %1}"
6972 [(set_attr "type" "*,*,imov")
6973 (set_attr "prefix" "*,*,maybe_vex")
6974 (set_attr "mode" "*,*,DI")])
6976 (define_insn "*sse2_storeq"
6977 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
6979 (match_operand:V2DI 1 "register_operand" "x")
6980 (parallel [(const_int 0)])))]
6985 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6987 (match_operand:V2DI 1 "register_operand" "")
6988 (parallel [(const_int 0)])))]
6991 && (TARGET_INTER_UNIT_MOVES
6992 || MEM_P (operands [0])
6993 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6994 [(set (match_dup 0) (match_dup 1))]
6996 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
6999 (define_insn "*vec_extractv2di_1_rex64_avx"
7000 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7002 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7003 (parallel [(const_int 1)])))]
7006 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7008 vmovhps\t{%1, %0|%0, %1}
7009 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7010 vmovq\t{%H1, %0|%0, %H1}
7011 vmov{q}\t{%H1, %0|%0, %H1}"
7012 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7013 (set_attr "memory" "*,none,*,*")
7014 (set_attr "prefix" "vex")
7015 (set_attr "mode" "V2SF,TI,TI,DI")])
7017 (define_insn "*vec_extractv2di_1_rex64"
7018 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7020 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7021 (parallel [(const_int 1)])))]
7022 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7024 movhps\t{%1, %0|%0, %1}
7025 psrldq\t{$8, %0|%0, 8}
7026 movq\t{%H1, %0|%0, %H1}
7027 mov{q}\t{%H1, %0|%0, %H1}"
7028 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7029 (set_attr "memory" "*,none,*,*")
7030 (set_attr "mode" "V2SF,TI,TI,DI")])
7032 (define_insn "*vec_extractv2di_1_avx"
7033 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7035 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7036 (parallel [(const_int 1)])))]
7039 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7041 vmovhps\t{%1, %0|%0, %1}
7042 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7043 vmovq\t{%H1, %0|%0, %H1}"
7044 [(set_attr "type" "ssemov,sseishft,ssemov")
7045 (set_attr "memory" "*,none,*")
7046 (set_attr "prefix" "vex")
7047 (set_attr "mode" "V2SF,TI,TI")])
7049 (define_insn "*vec_extractv2di_1_sse2"
7050 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7052 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7053 (parallel [(const_int 1)])))]
7055 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7057 movhps\t{%1, %0|%0, %1}
7058 psrldq\t{$8, %0|%0, 8}
7059 movq\t{%H1, %0|%0, %H1}"
7060 [(set_attr "type" "ssemov,sseishft,ssemov")
7061 (set_attr "memory" "*,none,*")
7062 (set_attr "mode" "V2SF,TI,TI")])
7064 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7065 (define_insn "*vec_extractv2di_1_sse"
7066 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7068 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7069 (parallel [(const_int 1)])))]
7070 "!TARGET_SSE2 && TARGET_SSE
7071 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7073 movhps\t{%1, %0|%0, %1}
7074 movhlps\t{%1, %0|%0, %1}
7075 movlps\t{%H1, %0|%0, %H1}"
7076 [(set_attr "type" "ssemov")
7077 (set_attr "mode" "V2SF,V4SF,V2SF")])
7079 (define_insn "*vec_dupv4si"
7080 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7082 (match_operand:SI 1 "register_operand" " Y2,0")))]
7085 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7086 shufps\t{$0, %0, %0|%0, %0, 0}"
7087 [(set_attr "type" "sselog1")
7088 (set_attr "prefix" "maybe_vex,orig")
7089 (set_attr "mode" "TI,V4SF")])
7091 (define_insn "*vec_dupv2di_avx"
7092 [(set (match_operand:V2DI 0 "register_operand" "=x")
7094 (match_operand:DI 1 "register_operand" "x")))]
7096 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7097 [(set_attr "type" "sselog1")
7098 (set_attr "prefix" "vex")
7099 (set_attr "mode" "TI")])
7101 (define_insn "*vec_dupv2di"
7102 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7104 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7109 [(set_attr "type" "sselog1,ssemov")
7110 (set_attr "mode" "TI,V4SF")])
7112 (define_insn "*vec_concatv2si_avx"
7113 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7115 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7116 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7119 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7120 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7121 vmovd\t{%1, %0|%0, %1}
7122 punpckldq\t{%2, %0|%0, %2}
7123 movd\t{%1, %0|%0, %1}"
7124 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7125 (set (attr "prefix")
7126 (if_then_else (eq_attr "alternative" "3,4")
7127 (const_string "orig")
7128 (const_string "vex")))
7129 (set_attr "mode" "TI,TI,TI,DI,DI")])
7131 (define_insn "*vec_concatv2si_sse4_1"
7132 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7134 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7135 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7138 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7139 punpckldq\t{%2, %0|%0, %2}
7140 movd\t{%1, %0|%0, %1}
7141 punpckldq\t{%2, %0|%0, %2}
7142 movd\t{%1, %0|%0, %1}"
7143 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7144 (set_attr "prefix_extra" "1,*,*,*,*")
7145 (set_attr "mode" "TI,TI,TI,DI,DI")])
7147 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7148 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7149 ;; alternatives pretty much forces the MMX alternative to be chosen.
7150 (define_insn "*vec_concatv2si_sse2"
7151 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7153 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7154 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7157 punpckldq\t{%2, %0|%0, %2}
7158 movd\t{%1, %0|%0, %1}
7159 punpckldq\t{%2, %0|%0, %2}
7160 movd\t{%1, %0|%0, %1}"
7161 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7162 (set_attr "mode" "TI,TI,DI,DI")])
7164 (define_insn "*vec_concatv2si_sse"
7165 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7167 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7168 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7171 unpcklps\t{%2, %0|%0, %2}
7172 movss\t{%1, %0|%0, %1}
7173 punpckldq\t{%2, %0|%0, %2}
7174 movd\t{%1, %0|%0, %1}"
7175 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7176 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7178 (define_insn "*vec_concatv4si_1_avx"
7179 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7181 (match_operand:V2SI 1 "register_operand" " x,x")
7182 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7185 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7186 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7187 [(set_attr "type" "sselog,ssemov")
7188 (set_attr "prefix" "vex")
7189 (set_attr "mode" "TI,V2SF")])
7191 (define_insn "*vec_concatv4si_1"
7192 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7194 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7195 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7198 punpcklqdq\t{%2, %0|%0, %2}
7199 movlhps\t{%2, %0|%0, %2}
7200 movhps\t{%2, %0|%0, %2}"
7201 [(set_attr "type" "sselog,ssemov,ssemov")
7202 (set_attr "mode" "TI,V4SF,V2SF")])
7204 (define_insn "*vec_concatv2di_avx"
7205 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7207 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7208 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7209 "!TARGET_64BIT && TARGET_AVX"
7211 vmovq\t{%1, %0|%0, %1}
7212 movq2dq\t{%1, %0|%0, %1}
7213 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7214 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7215 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7216 (set (attr "prefix")
7217 (if_then_else (eq_attr "alternative" "1")
7218 (const_string "orig")
7219 (const_string "vex")))
7220 (set_attr "mode" "TI,TI,TI,V2SF")])
7222 (define_insn "vec_concatv2di"
7223 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7225 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7226 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7227 "!TARGET_64BIT && TARGET_SSE"
7229 movq\t{%1, %0|%0, %1}
7230 movq2dq\t{%1, %0|%0, %1}
7231 punpcklqdq\t{%2, %0|%0, %2}
7232 movlhps\t{%2, %0|%0, %2}
7233 movhps\t{%2, %0|%0, %2}"
7234 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7235 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7237 (define_insn "*vec_concatv2di_rex64_avx"
7238 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7240 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7241 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7242 "TARGET_64BIT && TARGET_AVX"
7244 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7245 vmovq\t{%1, %0|%0, %1}
7246 vmovq\t{%1, %0|%0, %1}
7247 movq2dq\t{%1, %0|%0, %1}
7248 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7249 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7250 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7251 (set (attr "prefix")
7252 (if_then_else (eq_attr "alternative" "3")
7253 (const_string "orig")
7254 (const_string "vex")))
7255 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7257 (define_insn "*vec_concatv2di_rex64_sse4_1"
7258 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7260 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7261 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7262 "TARGET_64BIT && TARGET_SSE4_1"
7264 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7265 movq\t{%1, %0|%0, %1}
7266 movq\t{%1, %0|%0, %1}
7267 movq2dq\t{%1, %0|%0, %1}
7268 punpcklqdq\t{%2, %0|%0, %2}
7269 movlhps\t{%2, %0|%0, %2}
7270 movhps\t{%2, %0|%0, %2}"
7271 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7272 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7273 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7275 (define_insn "*vec_concatv2di_rex64_sse"
7276 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7278 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7279 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7280 "TARGET_64BIT && TARGET_SSE"
7282 movq\t{%1, %0|%0, %1}
7283 movq\t{%1, %0|%0, %1}
7284 movq2dq\t{%1, %0|%0, %1}
7285 punpcklqdq\t{%2, %0|%0, %2}
7286 movlhps\t{%2, %0|%0, %2}
7287 movhps\t{%2, %0|%0, %2}"
7288 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7289 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7291 (define_expand "vec_unpacku_hi_v16qi"
7292 [(match_operand:V8HI 0 "register_operand" "")
7293 (match_operand:V16QI 1 "register_operand" "")]
7297 ix86_expand_sse4_unpack (operands, true, true);
7298 else if (TARGET_SSE5)
7299 ix86_expand_sse5_unpack (operands, true, true);
7301 ix86_expand_sse_unpack (operands, true, true);
7305 (define_expand "vec_unpacks_hi_v16qi"
7306 [(match_operand:V8HI 0 "register_operand" "")
7307 (match_operand:V16QI 1 "register_operand" "")]
7311 ix86_expand_sse4_unpack (operands, false, true);
7312 else if (TARGET_SSE5)
7313 ix86_expand_sse5_unpack (operands, false, true);
7315 ix86_expand_sse_unpack (operands, false, true);
7319 (define_expand "vec_unpacku_lo_v16qi"
7320 [(match_operand:V8HI 0 "register_operand" "")
7321 (match_operand:V16QI 1 "register_operand" "")]
7325 ix86_expand_sse4_unpack (operands, true, false);
7326 else if (TARGET_SSE5)
7327 ix86_expand_sse5_unpack (operands, true, false);
7329 ix86_expand_sse_unpack (operands, true, false);
7333 (define_expand "vec_unpacks_lo_v16qi"
7334 [(match_operand:V8HI 0 "register_operand" "")
7335 (match_operand:V16QI 1 "register_operand" "")]
7339 ix86_expand_sse4_unpack (operands, false, false);
7340 else if (TARGET_SSE5)
7341 ix86_expand_sse5_unpack (operands, false, false);
7343 ix86_expand_sse_unpack (operands, false, false);
7347 (define_expand "vec_unpacku_hi_v8hi"
7348 [(match_operand:V4SI 0 "register_operand" "")
7349 (match_operand:V8HI 1 "register_operand" "")]
7353 ix86_expand_sse4_unpack (operands, true, true);
7354 else if (TARGET_SSE5)
7355 ix86_expand_sse5_unpack (operands, true, true);
7357 ix86_expand_sse_unpack (operands, true, true);
7361 (define_expand "vec_unpacks_hi_v8hi"
7362 [(match_operand:V4SI 0 "register_operand" "")
7363 (match_operand:V8HI 1 "register_operand" "")]
7367 ix86_expand_sse4_unpack (operands, false, true);
7368 else if (TARGET_SSE5)
7369 ix86_expand_sse5_unpack (operands, false, true);
7371 ix86_expand_sse_unpack (operands, false, true);
7375 (define_expand "vec_unpacku_lo_v8hi"
7376 [(match_operand:V4SI 0 "register_operand" "")
7377 (match_operand:V8HI 1 "register_operand" "")]
7381 ix86_expand_sse4_unpack (operands, true, false);
7382 else if (TARGET_SSE5)
7383 ix86_expand_sse5_unpack (operands, true, false);
7385 ix86_expand_sse_unpack (operands, true, false);
7389 (define_expand "vec_unpacks_lo_v8hi"
7390 [(match_operand:V4SI 0 "register_operand" "")
7391 (match_operand:V8HI 1 "register_operand" "")]
7395 ix86_expand_sse4_unpack (operands, false, false);
7396 else if (TARGET_SSE5)
7397 ix86_expand_sse5_unpack (operands, false, false);
7399 ix86_expand_sse_unpack (operands, false, false);
7403 (define_expand "vec_unpacku_hi_v4si"
7404 [(match_operand:V2DI 0 "register_operand" "")
7405 (match_operand:V4SI 1 "register_operand" "")]
7409 ix86_expand_sse4_unpack (operands, true, true);
7410 else if (TARGET_SSE5)
7411 ix86_expand_sse5_unpack (operands, true, true);
7413 ix86_expand_sse_unpack (operands, true, true);
7417 (define_expand "vec_unpacks_hi_v4si"
7418 [(match_operand:V2DI 0 "register_operand" "")
7419 (match_operand:V4SI 1 "register_operand" "")]
7423 ix86_expand_sse4_unpack (operands, false, true);
7424 else if (TARGET_SSE5)
7425 ix86_expand_sse5_unpack (operands, false, true);
7427 ix86_expand_sse_unpack (operands, false, true);
7431 (define_expand "vec_unpacku_lo_v4si"
7432 [(match_operand:V2DI 0 "register_operand" "")
7433 (match_operand:V4SI 1 "register_operand" "")]
7437 ix86_expand_sse4_unpack (operands, true, false);
7438 else if (TARGET_SSE5)
7439 ix86_expand_sse5_unpack (operands, true, false);
7441 ix86_expand_sse_unpack (operands, true, false);
7445 (define_expand "vec_unpacks_lo_v4si"
7446 [(match_operand:V2DI 0 "register_operand" "")
7447 (match_operand:V4SI 1 "register_operand" "")]
7451 ix86_expand_sse4_unpack (operands, false, false);
7452 else if (TARGET_SSE5)
7453 ix86_expand_sse5_unpack (operands, false, false);
7455 ix86_expand_sse_unpack (operands, false, false);
7459 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7465 (define_expand "sse2_uavgv16qi3"
7466 [(set (match_operand:V16QI 0 "register_operand" "")
7472 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7474 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7475 (const_vector:V16QI [(const_int 1) (const_int 1)
7476 (const_int 1) (const_int 1)
7477 (const_int 1) (const_int 1)
7478 (const_int 1) (const_int 1)
7479 (const_int 1) (const_int 1)
7480 (const_int 1) (const_int 1)
7481 (const_int 1) (const_int 1)
7482 (const_int 1) (const_int 1)]))
7485 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7487 (define_insn "*avx_uavgv16qi3"
7488 [(set (match_operand:V16QI 0 "register_operand" "=x")
7494 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7496 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7497 (const_vector:V16QI [(const_int 1) (const_int 1)
7498 (const_int 1) (const_int 1)
7499 (const_int 1) (const_int 1)
7500 (const_int 1) (const_int 1)
7501 (const_int 1) (const_int 1)
7502 (const_int 1) (const_int 1)
7503 (const_int 1) (const_int 1)
7504 (const_int 1) (const_int 1)]))
7506 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7507 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7508 [(set_attr "type" "sseiadd")
7509 (set_attr "prefix" "vex")
7510 (set_attr "mode" "TI")])
7512 (define_insn "*sse2_uavgv16qi3"
7513 [(set (match_operand:V16QI 0 "register_operand" "=x")
7519 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7521 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7522 (const_vector:V16QI [(const_int 1) (const_int 1)
7523 (const_int 1) (const_int 1)
7524 (const_int 1) (const_int 1)
7525 (const_int 1) (const_int 1)
7526 (const_int 1) (const_int 1)
7527 (const_int 1) (const_int 1)
7528 (const_int 1) (const_int 1)
7529 (const_int 1) (const_int 1)]))
7531 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7532 "pavgb\t{%2, %0|%0, %2}"
7533 [(set_attr "type" "sseiadd")
7534 (set_attr "prefix_data16" "1")
7535 (set_attr "mode" "TI")])
7537 (define_expand "sse2_uavgv8hi3"
7538 [(set (match_operand:V8HI 0 "register_operand" "")
7544 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7546 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7547 (const_vector:V8HI [(const_int 1) (const_int 1)
7548 (const_int 1) (const_int 1)
7549 (const_int 1) (const_int 1)
7550 (const_int 1) (const_int 1)]))
7553 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7555 (define_insn "*avx_uavgv8hi3"
7556 [(set (match_operand:V8HI 0 "register_operand" "=x")
7562 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7564 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7565 (const_vector:V8HI [(const_int 1) (const_int 1)
7566 (const_int 1) (const_int 1)
7567 (const_int 1) (const_int 1)
7568 (const_int 1) (const_int 1)]))
7570 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7571 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7572 [(set_attr "type" "sseiadd")
7573 (set_attr "prefix" "vex")
7574 (set_attr "mode" "TI")])
7576 (define_insn "*sse2_uavgv8hi3"
7577 [(set (match_operand:V8HI 0 "register_operand" "=x")
7583 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7585 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7586 (const_vector:V8HI [(const_int 1) (const_int 1)
7587 (const_int 1) (const_int 1)
7588 (const_int 1) (const_int 1)
7589 (const_int 1) (const_int 1)]))
7591 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7592 "pavgw\t{%2, %0|%0, %2}"
7593 [(set_attr "type" "sseiadd")
7594 (set_attr "prefix_data16" "1")
7595 (set_attr "mode" "TI")])
7597 ;; The correct representation for this is absolutely enormous, and
7598 ;; surely not generally useful.
7599 (define_insn "*avx_psadbw"
7600 [(set (match_operand:V2DI 0 "register_operand" "=x")
7601 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7602 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7605 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7606 [(set_attr "type" "sseiadd")
7607 (set_attr "prefix" "vex")
7608 (set_attr "mode" "TI")])
7610 (define_insn "sse2_psadbw"
7611 [(set (match_operand:V2DI 0 "register_operand" "=x")
7612 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7613 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7616 "psadbw\t{%2, %0|%0, %2}"
7617 [(set_attr "type" "sseiadd")
7618 (set_attr "prefix_data16" "1")
7619 (set_attr "mode" "TI")])
7621 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7622 [(set (match_operand:SI 0 "register_operand" "=r")
7624 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7626 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7627 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7628 [(set_attr "type" "ssecvt")
7629 (set_attr "prefix" "vex")
7630 (set_attr "mode" "<MODE>")])
7632 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7633 [(set (match_operand:SI 0 "register_operand" "=r")
7635 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7637 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7638 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7639 [(set_attr "type" "ssecvt")
7640 (set_attr "prefix" "maybe_vex")
7641 (set_attr "mode" "<MODE>")])
7643 (define_insn "sse2_pmovmskb"
7644 [(set (match_operand:SI 0 "register_operand" "=r")
7645 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7648 "%vpmovmskb\t{%1, %0|%0, %1}"
7649 [(set_attr "type" "ssecvt")
7650 (set_attr "prefix_data16" "1")
7651 (set_attr "prefix" "maybe_vex")
7652 (set_attr "mode" "SI")])
7654 (define_expand "sse2_maskmovdqu"
7655 [(set (match_operand:V16QI 0 "memory_operand" "")
7656 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7657 (match_operand:V16QI 2 "register_operand" "")
7663 (define_insn "*sse2_maskmovdqu"
7664 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7665 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7666 (match_operand:V16QI 2 "register_operand" "x")
7667 (mem:V16QI (match_dup 0))]
7669 "TARGET_SSE2 && !TARGET_64BIT"
7670 ;; @@@ check ordering of operands in intel/nonintel syntax
7671 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7672 [(set_attr "type" "ssecvt")
7673 (set_attr "prefix_data16" "1")
7674 (set_attr "prefix" "maybe_vex")
7675 (set_attr "mode" "TI")])
7677 (define_insn "*sse2_maskmovdqu_rex64"
7678 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7679 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7680 (match_operand:V16QI 2 "register_operand" "x")
7681 (mem:V16QI (match_dup 0))]
7683 "TARGET_SSE2 && TARGET_64BIT"
7684 ;; @@@ check ordering of operands in intel/nonintel syntax
7685 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7686 [(set_attr "type" "ssecvt")
7687 (set_attr "prefix_data16" "1")
7688 (set_attr "prefix" "maybe_vex")
7689 (set_attr "mode" "TI")])
7691 (define_insn "sse_ldmxcsr"
7692 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7696 [(set_attr "type" "sse")
7697 (set_attr "prefix" "maybe_vex")
7698 (set_attr "memory" "load")])
7700 (define_insn "sse_stmxcsr"
7701 [(set (match_operand:SI 0 "memory_operand" "=m")
7702 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7705 [(set_attr "type" "sse")
7706 (set_attr "prefix" "maybe_vex")
7707 (set_attr "memory" "store")])
7709 (define_expand "sse_sfence"
7711 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7712 "TARGET_SSE || TARGET_3DNOW_A"
7714 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7715 MEM_VOLATILE_P (operands[0]) = 1;
7718 (define_insn "*sse_sfence"
7719 [(set (match_operand:BLK 0 "" "")
7720 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7721 "TARGET_SSE || TARGET_3DNOW_A"
7723 [(set_attr "type" "sse")
7724 (set_attr "memory" "unknown")])
7726 (define_insn "sse2_clflush"
7727 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7731 [(set_attr "type" "sse")
7732 (set_attr "memory" "unknown")])
7734 (define_expand "sse2_mfence"
7736 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7739 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7740 MEM_VOLATILE_P (operands[0]) = 1;
7743 (define_insn "*sse2_mfence"
7744 [(set (match_operand:BLK 0 "" "")
7745 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7746 "TARGET_64BIT || TARGET_SSE2"
7748 [(set_attr "type" "sse")
7749 (set_attr "memory" "unknown")])
7751 (define_expand "sse2_lfence"
7753 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7756 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7757 MEM_VOLATILE_P (operands[0]) = 1;
7760 (define_insn "*sse2_lfence"
7761 [(set (match_operand:BLK 0 "" "")
7762 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7765 [(set_attr "type" "sse")
7766 (set_attr "memory" "unknown")])
7768 (define_insn "sse3_mwait"
7769 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7770 (match_operand:SI 1 "register_operand" "c")]
7773 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7774 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7775 ;; we only need to set up 32bit registers.
7777 [(set_attr "length" "3")])
7779 (define_insn "sse3_monitor"
7780 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7781 (match_operand:SI 1 "register_operand" "c")
7782 (match_operand:SI 2 "register_operand" "d")]
7784 "TARGET_SSE3 && !TARGET_64BIT"
7785 "monitor\t%0, %1, %2"
7786 [(set_attr "length" "3")])
7788 (define_insn "sse3_monitor64"
7789 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7790 (match_operand:SI 1 "register_operand" "c")
7791 (match_operand:SI 2 "register_operand" "d")]
7793 "TARGET_SSE3 && TARGET_64BIT"
7794 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7795 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7796 ;; zero extended to 64bit, we only need to set up 32bit registers.
7798 [(set_attr "length" "3")])
7800 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7802 ;; SSSE3 instructions
7804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7806 (define_insn "*avx_phaddwv8hi3"
7807 [(set (match_operand:V8HI 0 "register_operand" "=x")
7813 (match_operand:V8HI 1 "register_operand" "x")
7814 (parallel [(const_int 0)]))
7815 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7817 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7818 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7821 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7822 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7824 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7825 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7830 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7831 (parallel [(const_int 0)]))
7832 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7834 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7835 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7838 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7839 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7841 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7842 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7844 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7845 [(set_attr "type" "sseiadd")
7846 (set_attr "prefix" "vex")
7847 (set_attr "mode" "TI")])
7849 (define_insn "ssse3_phaddwv8hi3"
7850 [(set (match_operand:V8HI 0 "register_operand" "=x")
7856 (match_operand:V8HI 1 "register_operand" "0")
7857 (parallel [(const_int 0)]))
7858 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7860 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7861 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7864 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7865 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7867 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7868 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7873 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7874 (parallel [(const_int 0)]))
7875 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7877 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7878 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7881 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7882 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7884 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7885 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7887 "phaddw\t{%2, %0|%0, %2}"
7888 [(set_attr "type" "sseiadd")
7889 (set_attr "prefix_data16" "1")
7890 (set_attr "prefix_extra" "1")
7891 (set_attr "mode" "TI")])
7893 (define_insn "ssse3_phaddwv4hi3"
7894 [(set (match_operand:V4HI 0 "register_operand" "=y")
7899 (match_operand:V4HI 1 "register_operand" "0")
7900 (parallel [(const_int 0)]))
7901 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7903 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7904 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7908 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7909 (parallel [(const_int 0)]))
7910 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7912 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7913 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7915 "phaddw\t{%2, %0|%0, %2}"
7916 [(set_attr "type" "sseiadd")
7917 (set_attr "prefix_extra" "1")
7918 (set_attr "mode" "DI")])
7920 (define_insn "*avx_phadddv4si3"
7921 [(set (match_operand:V4SI 0 "register_operand" "=x")
7926 (match_operand:V4SI 1 "register_operand" "x")
7927 (parallel [(const_int 0)]))
7928 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7930 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7931 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7935 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7936 (parallel [(const_int 0)]))
7937 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7939 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7940 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7942 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7943 [(set_attr "type" "sseiadd")
7944 (set_attr "prefix" "vex")
7945 (set_attr "mode" "TI")])
7947 (define_insn "ssse3_phadddv4si3"
7948 [(set (match_operand:V4SI 0 "register_operand" "=x")
7953 (match_operand:V4SI 1 "register_operand" "0")
7954 (parallel [(const_int 0)]))
7955 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7957 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7958 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7962 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7963 (parallel [(const_int 0)]))
7964 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7966 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7967 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7969 "phaddd\t{%2, %0|%0, %2}"
7970 [(set_attr "type" "sseiadd")
7971 (set_attr "prefix_data16" "1")
7972 (set_attr "prefix_extra" "1")
7973 (set_attr "mode" "TI")])
7975 (define_insn "ssse3_phadddv2si3"
7976 [(set (match_operand:V2SI 0 "register_operand" "=y")
7980 (match_operand:V2SI 1 "register_operand" "0")
7981 (parallel [(const_int 0)]))
7982 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7985 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7986 (parallel [(const_int 0)]))
7987 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7989 "phaddd\t{%2, %0|%0, %2}"
7990 [(set_attr "type" "sseiadd")
7991 (set_attr "prefix_extra" "1")
7992 (set_attr "mode" "DI")])
7994 (define_insn "*avx_phaddswv8hi3"
7995 [(set (match_operand:V8HI 0 "register_operand" "=x")
8001 (match_operand:V8HI 1 "register_operand" "x")
8002 (parallel [(const_int 0)]))
8003 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8005 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8006 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8009 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8010 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8012 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8013 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8018 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8019 (parallel [(const_int 0)]))
8020 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8022 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8023 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8026 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8027 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8029 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8030 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8032 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8033 [(set_attr "type" "sseiadd")
8034 (set_attr "prefix" "vex")
8035 (set_attr "mode" "TI")])
8037 (define_insn "ssse3_phaddswv8hi3"
8038 [(set (match_operand:V8HI 0 "register_operand" "=x")
8044 (match_operand:V8HI 1 "register_operand" "0")
8045 (parallel [(const_int 0)]))
8046 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8048 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8049 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8052 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8053 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8055 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8056 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8061 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8062 (parallel [(const_int 0)]))
8063 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8065 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8066 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8069 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8070 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8072 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8073 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8075 "phaddsw\t{%2, %0|%0, %2}"
8076 [(set_attr "type" "sseiadd")
8077 (set_attr "prefix_data16" "1")
8078 (set_attr "prefix_extra" "1")
8079 (set_attr "mode" "TI")])
8081 (define_insn "ssse3_phaddswv4hi3"
8082 [(set (match_operand:V4HI 0 "register_operand" "=y")
8087 (match_operand:V4HI 1 "register_operand" "0")
8088 (parallel [(const_int 0)]))
8089 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8091 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8092 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8096 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8097 (parallel [(const_int 0)]))
8098 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8100 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8101 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8103 "phaddsw\t{%2, %0|%0, %2}"
8104 [(set_attr "type" "sseiadd")
8105 (set_attr "prefix_extra" "1")
8106 (set_attr "mode" "DI")])
8108 (define_insn "*avx_phsubwv8hi3"
8109 [(set (match_operand:V8HI 0 "register_operand" "=x")
8115 (match_operand:V8HI 1 "register_operand" "x")
8116 (parallel [(const_int 0)]))
8117 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8119 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8120 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8123 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8124 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8126 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8127 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8132 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8133 (parallel [(const_int 0)]))
8134 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8136 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8137 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8140 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8141 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8143 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8144 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8146 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8147 [(set_attr "type" "sseiadd")
8148 (set_attr "prefix" "vex")
8149 (set_attr "mode" "TI")])
8151 (define_insn "ssse3_phsubwv8hi3"
8152 [(set (match_operand:V8HI 0 "register_operand" "=x")
8158 (match_operand:V8HI 1 "register_operand" "0")
8159 (parallel [(const_int 0)]))
8160 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8162 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8163 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8166 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8167 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8169 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8170 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8175 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8176 (parallel [(const_int 0)]))
8177 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8179 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8180 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8183 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8184 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8186 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8187 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8189 "phsubw\t{%2, %0|%0, %2}"
8190 [(set_attr "type" "sseiadd")
8191 (set_attr "prefix_data16" "1")
8192 (set_attr "prefix_extra" "1")
8193 (set_attr "mode" "TI")])
8195 (define_insn "ssse3_phsubwv4hi3"
8196 [(set (match_operand:V4HI 0 "register_operand" "=y")
8201 (match_operand:V4HI 1 "register_operand" "0")
8202 (parallel [(const_int 0)]))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8206 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8210 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8211 (parallel [(const_int 0)]))
8212 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8215 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8217 "phsubw\t{%2, %0|%0, %2}"
8218 [(set_attr "type" "sseiadd")
8219 (set_attr "prefix_extra" "1")
8220 (set_attr "mode" "DI")])
8222 (define_insn "*avx_phsubdv4si3"
8223 [(set (match_operand:V4SI 0 "register_operand" "=x")
8228 (match_operand:V4SI 1 "register_operand" "x")
8229 (parallel [(const_int 0)]))
8230 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8232 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8233 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8237 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8238 (parallel [(const_int 0)]))
8239 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8241 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8242 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8244 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8245 [(set_attr "type" "sseiadd")
8246 (set_attr "prefix" "vex")
8247 (set_attr "mode" "TI")])
8249 (define_insn "ssse3_phsubdv4si3"
8250 [(set (match_operand:V4SI 0 "register_operand" "=x")
8255 (match_operand:V4SI 1 "register_operand" "0")
8256 (parallel [(const_int 0)]))
8257 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8259 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8260 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8264 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8265 (parallel [(const_int 0)]))
8266 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8268 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8269 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8271 "phsubd\t{%2, %0|%0, %2}"
8272 [(set_attr "type" "sseiadd")
8273 (set_attr "prefix_data16" "1")
8274 (set_attr "prefix_extra" "1")
8275 (set_attr "mode" "TI")])
8277 (define_insn "ssse3_phsubdv2si3"
8278 [(set (match_operand:V2SI 0 "register_operand" "=y")
8282 (match_operand:V2SI 1 "register_operand" "0")
8283 (parallel [(const_int 0)]))
8284 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8287 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8288 (parallel [(const_int 0)]))
8289 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8291 "phsubd\t{%2, %0|%0, %2}"
8292 [(set_attr "type" "sseiadd")
8293 (set_attr "prefix_extra" "1")
8294 (set_attr "mode" "DI")])
8296 (define_insn "*avx_phsubswv8hi3"
8297 [(set (match_operand:V8HI 0 "register_operand" "=x")
8303 (match_operand:V8HI 1 "register_operand" "x")
8304 (parallel [(const_int 0)]))
8305 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8308 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8311 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8312 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8314 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8315 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8320 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8321 (parallel [(const_int 0)]))
8322 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8324 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8325 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8328 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8329 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8331 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8332 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8334 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8335 [(set_attr "type" "sseiadd")
8336 (set_attr "prefix" "vex")
8337 (set_attr "mode" "TI")])
8339 (define_insn "ssse3_phsubswv8hi3"
8340 [(set (match_operand:V8HI 0 "register_operand" "=x")
8346 (match_operand:V8HI 1 "register_operand" "0")
8347 (parallel [(const_int 0)]))
8348 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8350 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8351 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8355 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8358 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8363 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8364 (parallel [(const_int 0)]))
8365 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8367 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8368 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8371 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8372 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8374 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8375 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8377 "phsubsw\t{%2, %0|%0, %2}"
8378 [(set_attr "type" "sseiadd")
8379 (set_attr "prefix_data16" "1")
8380 (set_attr "prefix_extra" "1")
8381 (set_attr "mode" "TI")])
8383 (define_insn "ssse3_phsubswv4hi3"
8384 [(set (match_operand:V4HI 0 "register_operand" "=y")
8389 (match_operand:V4HI 1 "register_operand" "0")
8390 (parallel [(const_int 0)]))
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8393 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8394 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8398 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8399 (parallel [(const_int 0)]))
8400 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8403 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8405 "phsubsw\t{%2, %0|%0, %2}"
8406 [(set_attr "type" "sseiadd")
8407 (set_attr "prefix_extra" "1")
8408 (set_attr "mode" "DI")])
8410 (define_insn "*avx_pmaddubsw128"
8411 [(set (match_operand:V8HI 0 "register_operand" "=x")
8416 (match_operand:V16QI 1 "register_operand" "x")
8417 (parallel [(const_int 0)
8427 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8428 (parallel [(const_int 0)
8438 (vec_select:V16QI (match_dup 1)
8439 (parallel [(const_int 1)
8448 (vec_select:V16QI (match_dup 2)
8449 (parallel [(const_int 1)
8456 (const_int 15)]))))))]
8458 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8459 [(set_attr "type" "sseiadd")
8460 (set_attr "prefix" "vex")
8461 (set_attr "mode" "TI")])
8463 (define_insn "ssse3_pmaddubsw128"
8464 [(set (match_operand:V8HI 0 "register_operand" "=x")
8469 (match_operand:V16QI 1 "register_operand" "0")
8470 (parallel [(const_int 0)
8480 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8481 (parallel [(const_int 0)
8491 (vec_select:V16QI (match_dup 1)
8492 (parallel [(const_int 1)
8501 (vec_select:V16QI (match_dup 2)
8502 (parallel [(const_int 1)
8509 (const_int 15)]))))))]
8511 "pmaddubsw\t{%2, %0|%0, %2}"
8512 [(set_attr "type" "sseiadd")
8513 (set_attr "prefix_data16" "1")
8514 (set_attr "prefix_extra" "1")
8515 (set_attr "mode" "TI")])
8517 (define_insn "ssse3_pmaddubsw"
8518 [(set (match_operand:V4HI 0 "register_operand" "=y")
8523 (match_operand:V8QI 1 "register_operand" "0")
8524 (parallel [(const_int 0)
8530 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8531 (parallel [(const_int 0)
8537 (vec_select:V8QI (match_dup 1)
8538 (parallel [(const_int 1)
8543 (vec_select:V8QI (match_dup 2)
8544 (parallel [(const_int 1)
8547 (const_int 7)]))))))]
8549 "pmaddubsw\t{%2, %0|%0, %2}"
8550 [(set_attr "type" "sseiadd")
8551 (set_attr "prefix_extra" "1")
8552 (set_attr "mode" "DI")])
8554 (define_expand "ssse3_pmulhrswv8hi3"
8555 [(set (match_operand:V8HI 0 "register_operand" "")
8562 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8564 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8566 (const_vector:V8HI [(const_int 1) (const_int 1)
8567 (const_int 1) (const_int 1)
8568 (const_int 1) (const_int 1)
8569 (const_int 1) (const_int 1)]))
8572 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8574 (define_insn "*avx_pmulhrswv8hi3"
8575 [(set (match_operand:V8HI 0 "register_operand" "=x")
8582 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8584 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8586 (const_vector:V8HI [(const_int 1) (const_int 1)
8587 (const_int 1) (const_int 1)
8588 (const_int 1) (const_int 1)
8589 (const_int 1) (const_int 1)]))
8591 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8592 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8593 [(set_attr "type" "sseimul")
8594 (set_attr "prefix" "vex")
8595 (set_attr "mode" "TI")])
8597 (define_insn "*ssse3_pmulhrswv8hi3"
8598 [(set (match_operand:V8HI 0 "register_operand" "=x")
8605 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8607 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8609 (const_vector:V8HI [(const_int 1) (const_int 1)
8610 (const_int 1) (const_int 1)
8611 (const_int 1) (const_int 1)
8612 (const_int 1) (const_int 1)]))
8614 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8615 "pmulhrsw\t{%2, %0|%0, %2}"
8616 [(set_attr "type" "sseimul")
8617 (set_attr "prefix_data16" "1")
8618 (set_attr "prefix_extra" "1")
8619 (set_attr "mode" "TI")])
8621 (define_expand "ssse3_pmulhrswv4hi3"
8622 [(set (match_operand:V4HI 0 "register_operand" "")
8629 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8631 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8633 (const_vector:V4HI [(const_int 1) (const_int 1)
8634 (const_int 1) (const_int 1)]))
8637 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8639 (define_insn "*ssse3_pmulhrswv4hi3"
8640 [(set (match_operand:V4HI 0 "register_operand" "=y")
8647 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8649 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8651 (const_vector:V4HI [(const_int 1) (const_int 1)
8652 (const_int 1) (const_int 1)]))
8654 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8655 "pmulhrsw\t{%2, %0|%0, %2}"
8656 [(set_attr "type" "sseimul")
8657 (set_attr "prefix_extra" "1")
8658 (set_attr "mode" "DI")])
8660 (define_insn "*avx_pshufbv16qi3"
8661 [(set (match_operand:V16QI 0 "register_operand" "=x")
8662 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8663 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8666 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8667 [(set_attr "type" "sselog1")
8668 (set_attr "prefix" "vex")
8669 (set_attr "mode" "TI")])
8671 (define_insn "ssse3_pshufbv16qi3"
8672 [(set (match_operand:V16QI 0 "register_operand" "=x")
8673 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8674 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8677 "pshufb\t{%2, %0|%0, %2}";
8678 [(set_attr "type" "sselog1")
8679 (set_attr "prefix_data16" "1")
8680 (set_attr "prefix_extra" "1")
8681 (set_attr "mode" "TI")])
8683 (define_insn "ssse3_pshufbv8qi3"
8684 [(set (match_operand:V8QI 0 "register_operand" "=y")
8685 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8686 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8689 "pshufb\t{%2, %0|%0, %2}";
8690 [(set_attr "type" "sselog1")
8691 (set_attr "prefix_extra" "1")
8692 (set_attr "mode" "DI")])
8694 (define_insn "*avx_psign<mode>3"
8695 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8697 [(match_operand:SSEMODE124 1 "register_operand" "x")
8698 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8701 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8702 [(set_attr "type" "sselog1")
8703 (set_attr "prefix" "vex")
8704 (set_attr "mode" "TI")])
8706 (define_insn "ssse3_psign<mode>3"
8707 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8709 [(match_operand:SSEMODE124 1 "register_operand" "0")
8710 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8713 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8714 [(set_attr "type" "sselog1")
8715 (set_attr "prefix_data16" "1")
8716 (set_attr "prefix_extra" "1")
8717 (set_attr "mode" "TI")])
8719 (define_insn "ssse3_psign<mode>3"
8720 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8722 [(match_operand:MMXMODEI 1 "register_operand" "0")
8723 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8726 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8727 [(set_attr "type" "sselog1")
8728 (set_attr "prefix_extra" "1")
8729 (set_attr "mode" "DI")])
8731 (define_insn "*avx_palignrti"
8732 [(set (match_operand:TI 0 "register_operand" "=x")
8733 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8734 (match_operand:TI 2 "nonimmediate_operand" "xm")
8735 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8739 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8740 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8742 [(set_attr "type" "sseishft")
8743 (set_attr "prefix" "vex")
8744 (set_attr "mode" "TI")])
8746 (define_insn "ssse3_palignrti"
8747 [(set (match_operand:TI 0 "register_operand" "=x")
8748 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8749 (match_operand:TI 2 "nonimmediate_operand" "xm")
8750 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8754 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8755 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8757 [(set_attr "type" "sseishft")
8758 (set_attr "prefix_data16" "1")
8759 (set_attr "prefix_extra" "1")
8760 (set_attr "mode" "TI")])
8762 (define_insn "ssse3_palignrdi"
8763 [(set (match_operand:DI 0 "register_operand" "=y")
8764 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8765 (match_operand:DI 2 "nonimmediate_operand" "ym")
8766 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8770 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8771 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8773 [(set_attr "type" "sseishft")
8774 (set_attr "prefix_extra" "1")
8775 (set_attr "mode" "DI")])
8777 (define_insn "abs<mode>2"
8778 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8779 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8781 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8782 [(set_attr "type" "sselog1")
8783 (set_attr "prefix_data16" "1")
8784 (set_attr "prefix_extra" "1")
8785 (set_attr "prefix" "maybe_vex")
8786 (set_attr "mode" "TI")])
8788 (define_insn "abs<mode>2"
8789 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8790 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8792 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8793 [(set_attr "type" "sselog1")
8794 (set_attr "prefix_extra" "1")
8795 (set_attr "mode" "DI")])
8797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8799 ;; AMD SSE4A instructions
8801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8803 (define_insn "sse4a_movnt<mode>"
8804 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8806 [(match_operand:MODEF 1 "register_operand" "x")]
8809 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8810 [(set_attr "type" "ssemov")
8811 (set_attr "mode" "<MODE>")])
8813 (define_insn "sse4a_vmmovnt<mode>"
8814 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8815 (unspec:<ssescalarmode>
8816 [(vec_select:<ssescalarmode>
8817 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8818 (parallel [(const_int 0)]))]
8821 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8822 [(set_attr "type" "ssemov")
8823 (set_attr "mode" "<ssescalarmode>")])
8825 (define_insn "sse4a_extrqi"
8826 [(set (match_operand:V2DI 0 "register_operand" "=x")
8827 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8828 (match_operand 2 "const_int_operand" "")
8829 (match_operand 3 "const_int_operand" "")]
8832 "extrq\t{%3, %2, %0|%0, %2, %3}"
8833 [(set_attr "type" "sse")
8834 (set_attr "prefix_data16" "1")
8835 (set_attr "mode" "TI")])
8837 (define_insn "sse4a_extrq"
8838 [(set (match_operand:V2DI 0 "register_operand" "=x")
8839 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8840 (match_operand:V16QI 2 "register_operand" "x")]
8843 "extrq\t{%2, %0|%0, %2}"
8844 [(set_attr "type" "sse")
8845 (set_attr "prefix_data16" "1")
8846 (set_attr "mode" "TI")])
8848 (define_insn "sse4a_insertqi"
8849 [(set (match_operand:V2DI 0 "register_operand" "=x")
8850 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8851 (match_operand:V2DI 2 "register_operand" "x")
8852 (match_operand 3 "const_int_operand" "")
8853 (match_operand 4 "const_int_operand" "")]
8856 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8857 [(set_attr "type" "sseins")
8858 (set_attr "prefix_rep" "1")
8859 (set_attr "mode" "TI")])
8861 (define_insn "sse4a_insertq"
8862 [(set (match_operand:V2DI 0 "register_operand" "=x")
8863 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8864 (match_operand:V2DI 2 "register_operand" "x")]
8867 "insertq\t{%2, %0|%0, %2}"
8868 [(set_attr "type" "sseins")
8869 (set_attr "prefix_rep" "1")
8870 (set_attr "mode" "TI")])
8872 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8874 ;; Intel SSE4.1 instructions
8876 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8878 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
8879 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8880 (vec_merge:AVXMODEF2P
8881 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8882 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8883 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8885 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8886 [(set_attr "type" "ssemov")
8887 (set_attr "prefix" "vex")
8888 (set_attr "mode" "<avxvecmode>")])
8890 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8891 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8893 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8894 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8895 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8898 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8899 [(set_attr "type" "ssemov")
8900 (set_attr "prefix" "vex")
8901 (set_attr "mode" "<avxvecmode>")])
8903 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8904 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8905 (vec_merge:SSEMODEF2P
8906 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8907 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8908 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8910 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8911 [(set_attr "type" "ssemov")
8912 (set_attr "prefix_extra" "1")
8913 (set_attr "mode" "<MODE>")])
8915 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8916 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8918 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8919 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8920 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8923 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8924 [(set_attr "type" "ssemov")
8925 (set_attr "prefix_extra" "1")
8926 (set_attr "mode" "<MODE>")])
8928 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8929 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8931 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8932 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8933 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8936 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8937 [(set_attr "type" "ssemul")
8938 (set_attr "prefix" "vex")
8939 (set_attr "mode" "<avxvecmode>")])
8941 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8942 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8944 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8945 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8946 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8949 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8950 [(set_attr "type" "ssemul")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "mode" "<MODE>")])
8954 (define_insn "sse4_1_movntdqa"
8955 [(set (match_operand:V2DI 0 "register_operand" "=x")
8956 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8959 "%vmovntdqa\t{%1, %0|%0, %1}"
8960 [(set_attr "type" "ssecvt")
8961 (set_attr "prefix_extra" "1")
8962 (set_attr "prefix" "maybe_vex")
8963 (set_attr "mode" "TI")])
8965 (define_insn "*avx_mpsadbw"
8966 [(set (match_operand:V16QI 0 "register_operand" "=x")
8967 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8968 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8969 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8972 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8973 [(set_attr "type" "sselog1")
8974 (set_attr "prefix" "vex")
8975 (set_attr "mode" "TI")])
8977 (define_insn "sse4_1_mpsadbw"
8978 [(set (match_operand:V16QI 0 "register_operand" "=x")
8979 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8980 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8981 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8984 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
8985 [(set_attr "type" "sselog1")
8986 (set_attr "prefix_extra" "1")
8987 (set_attr "mode" "TI")])
8989 (define_insn "*avx_packusdw"
8990 [(set (match_operand:V8HI 0 "register_operand" "=x")
8993 (match_operand:V4SI 1 "register_operand" "x"))
8995 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
8997 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8998 [(set_attr "type" "sselog")
8999 (set_attr "prefix" "vex")
9000 (set_attr "mode" "TI")])
9002 (define_insn "sse4_1_packusdw"
9003 [(set (match_operand:V8HI 0 "register_operand" "=x")
9006 (match_operand:V4SI 1 "register_operand" "0"))
9008 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9010 "packusdw\t{%2, %0|%0, %2}"
9011 [(set_attr "type" "sselog")
9012 (set_attr "prefix_extra" "1")
9013 (set_attr "mode" "TI")])
9015 (define_insn "*avx_pblendvb"
9016 [(set (match_operand:V16QI 0 "register_operand" "=x")
9017 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9018 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9019 (match_operand:V16QI 3 "register_operand" "x")]
9022 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9023 [(set_attr "type" "ssemov")
9024 (set_attr "prefix" "vex")
9025 (set_attr "mode" "TI")])
9027 (define_insn "sse4_1_pblendvb"
9028 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9029 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9030 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9031 (match_operand:V16QI 3 "register_operand" "Yz")]
9034 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9035 [(set_attr "type" "ssemov")
9036 (set_attr "prefix_extra" "1")
9037 (set_attr "mode" "TI")])
9039 (define_insn "*avx_pblendw"
9040 [(set (match_operand:V8HI 0 "register_operand" "=x")
9042 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9043 (match_operand:V8HI 1 "register_operand" "x")
9044 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9046 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9047 [(set_attr "type" "ssemov")
9048 (set_attr "prefix" "vex")
9049 (set_attr "mode" "TI")])
9051 (define_insn "sse4_1_pblendw"
9052 [(set (match_operand:V8HI 0 "register_operand" "=x")
9054 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9055 (match_operand:V8HI 1 "register_operand" "0")
9056 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9058 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9059 [(set_attr "type" "ssemov")
9060 (set_attr "prefix_extra" "1")
9061 (set_attr "mode" "TI")])
9063 (define_insn "sse4_1_phminposuw"
9064 [(set (match_operand:V8HI 0 "register_operand" "=x")
9065 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9066 UNSPEC_PHMINPOSUW))]
9068 "%vphminposuw\t{%1, %0|%0, %1}"
9069 [(set_attr "type" "sselog1")
9070 (set_attr "prefix_extra" "1")
9071 (set_attr "prefix" "maybe_vex")
9072 (set_attr "mode" "TI")])
9074 (define_insn "sse4_1_extendv8qiv8hi2"
9075 [(set (match_operand:V8HI 0 "register_operand" "=x")
9078 (match_operand:V16QI 1 "register_operand" "x")
9079 (parallel [(const_int 0)
9088 "%vpmovsxbw\t{%1, %0|%0, %1}"
9089 [(set_attr "type" "ssemov")
9090 (set_attr "prefix_extra" "1")
9091 (set_attr "prefix" "maybe_vex")
9092 (set_attr "mode" "TI")])
9094 (define_insn "*sse4_1_extendv8qiv8hi2"
9095 [(set (match_operand:V8HI 0 "register_operand" "=x")
9098 (vec_duplicate:V16QI
9099 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9100 (parallel [(const_int 0)
9109 "%vpmovsxbw\t{%1, %0|%0, %1}"
9110 [(set_attr "type" "ssemov")
9111 (set_attr "prefix_extra" "1")
9112 (set_attr "prefix" "maybe_vex")
9113 (set_attr "mode" "TI")])
9115 (define_insn "sse4_1_extendv4qiv4si2"
9116 [(set (match_operand:V4SI 0 "register_operand" "=x")
9119 (match_operand:V16QI 1 "register_operand" "x")
9120 (parallel [(const_int 0)
9125 "%vpmovsxbd\t{%1, %0|%0, %1}"
9126 [(set_attr "type" "ssemov")
9127 (set_attr "prefix_extra" "1")
9128 (set_attr "prefix" "maybe_vex")
9129 (set_attr "mode" "TI")])
9131 (define_insn "*sse4_1_extendv4qiv4si2"
9132 [(set (match_operand:V4SI 0 "register_operand" "=x")
9135 (vec_duplicate:V16QI
9136 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9137 (parallel [(const_int 0)
9142 "%vpmovsxbd\t{%1, %0|%0, %1}"
9143 [(set_attr "type" "ssemov")
9144 (set_attr "prefix_extra" "1")
9145 (set_attr "prefix" "maybe_vex")
9146 (set_attr "mode" "TI")])
9148 (define_insn "sse4_1_extendv2qiv2di2"
9149 [(set (match_operand:V2DI 0 "register_operand" "=x")
9152 (match_operand:V16QI 1 "register_operand" "x")
9153 (parallel [(const_int 0)
9156 "%vpmovsxbq\t{%1, %0|%0, %1}"
9157 [(set_attr "type" "ssemov")
9158 (set_attr "prefix_extra" "1")
9159 (set_attr "prefix" "maybe_vex")
9160 (set_attr "mode" "TI")])
9162 (define_insn "*sse4_1_extendv2qiv2di2"
9163 [(set (match_operand:V2DI 0 "register_operand" "=x")
9166 (vec_duplicate:V16QI
9167 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9168 (parallel [(const_int 0)
9171 "%vpmovsxbq\t{%1, %0|%0, %1}"
9172 [(set_attr "type" "ssemov")
9173 (set_attr "prefix_extra" "1")
9174 (set_attr "prefix" "maybe_vex")
9175 (set_attr "mode" "TI")])
9177 (define_insn "sse4_1_extendv4hiv4si2"
9178 [(set (match_operand:V4SI 0 "register_operand" "=x")
9181 (match_operand:V8HI 1 "register_operand" "x")
9182 (parallel [(const_int 0)
9187 "%vpmovsxwd\t{%1, %0|%0, %1}"
9188 [(set_attr "type" "ssemov")
9189 (set_attr "prefix_extra" "1")
9190 (set_attr "prefix" "maybe_vex")
9191 (set_attr "mode" "TI")])
9193 (define_insn "*sse4_1_extendv4hiv4si2"
9194 [(set (match_operand:V4SI 0 "register_operand" "=x")
9198 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9199 (parallel [(const_int 0)
9204 "%vpmovsxwd\t{%1, %0|%0, %1}"
9205 [(set_attr "type" "ssemov")
9206 (set_attr "prefix_extra" "1")
9207 (set_attr "prefix" "maybe_vex")
9208 (set_attr "mode" "TI")])
9210 (define_insn "sse4_1_extendv2hiv2di2"
9211 [(set (match_operand:V2DI 0 "register_operand" "=x")
9214 (match_operand:V8HI 1 "register_operand" "x")
9215 (parallel [(const_int 0)
9218 "%vpmovsxwq\t{%1, %0|%0, %1}"
9219 [(set_attr "type" "ssemov")
9220 (set_attr "prefix_extra" "1")
9221 (set_attr "prefix" "maybe_vex")
9222 (set_attr "mode" "TI")])
9224 (define_insn "*sse4_1_extendv2hiv2di2"
9225 [(set (match_operand:V2DI 0 "register_operand" "=x")
9229 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9230 (parallel [(const_int 0)
9233 "%vpmovsxwq\t{%1, %0|%0, %1}"
9234 [(set_attr "type" "ssemov")
9235 (set_attr "prefix_extra" "1")
9236 (set_attr "prefix" "maybe_vex")
9237 (set_attr "mode" "TI")])
9239 (define_insn "sse4_1_extendv2siv2di2"
9240 [(set (match_operand:V2DI 0 "register_operand" "=x")
9243 (match_operand:V4SI 1 "register_operand" "x")
9244 (parallel [(const_int 0)
9247 "%vpmovsxdq\t{%1, %0|%0, %1}"
9248 [(set_attr "type" "ssemov")
9249 (set_attr "prefix_extra" "1")
9250 (set_attr "prefix" "maybe_vex")
9251 (set_attr "mode" "TI")])
9253 (define_insn "*sse4_1_extendv2siv2di2"
9254 [(set (match_operand:V2DI 0 "register_operand" "=x")
9258 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9259 (parallel [(const_int 0)
9262 "%vpmovsxdq\t{%1, %0|%0, %1}"
9263 [(set_attr "type" "ssemov")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "prefix" "maybe_vex")
9266 (set_attr "mode" "TI")])
9268 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9269 [(set (match_operand:V8HI 0 "register_operand" "=x")
9272 (match_operand:V16QI 1 "register_operand" "x")
9273 (parallel [(const_int 0)
9282 "%vpmovzxbw\t{%1, %0|%0, %1}"
9283 [(set_attr "type" "ssemov")
9284 (set_attr "prefix_extra" "1")
9285 (set_attr "prefix" "maybe_vex")
9286 (set_attr "mode" "TI")])
9288 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9289 [(set (match_operand:V8HI 0 "register_operand" "=x")
9292 (vec_duplicate:V16QI
9293 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9294 (parallel [(const_int 0)
9303 "%vpmovzxbw\t{%1, %0|%0, %1}"
9304 [(set_attr "type" "ssemov")
9305 (set_attr "prefix_extra" "1")
9306 (set_attr "prefix" "maybe_vex")
9307 (set_attr "mode" "TI")])
9309 (define_insn "sse4_1_zero_extendv4qiv4si2"
9310 [(set (match_operand:V4SI 0 "register_operand" "=x")
9313 (match_operand:V16QI 1 "register_operand" "x")
9314 (parallel [(const_int 0)
9319 "%vpmovzxbd\t{%1, %0|%0, %1}"
9320 [(set_attr "type" "ssemov")
9321 (set_attr "prefix_extra" "1")
9322 (set_attr "prefix" "maybe_vex")
9323 (set_attr "mode" "TI")])
9325 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9326 [(set (match_operand:V4SI 0 "register_operand" "=x")
9329 (vec_duplicate:V16QI
9330 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9331 (parallel [(const_int 0)
9336 "%vpmovzxbd\t{%1, %0|%0, %1}"
9337 [(set_attr "type" "ssemov")
9338 (set_attr "prefix_extra" "1")
9339 (set_attr "prefix" "maybe_vex")
9340 (set_attr "mode" "TI")])
9342 (define_insn "sse4_1_zero_extendv2qiv2di2"
9343 [(set (match_operand:V2DI 0 "register_operand" "=x")
9346 (match_operand:V16QI 1 "register_operand" "x")
9347 (parallel [(const_int 0)
9350 "%vpmovzxbq\t{%1, %0|%0, %1}"
9351 [(set_attr "type" "ssemov")
9352 (set_attr "prefix_extra" "1")
9353 (set_attr "prefix" "maybe_vex")
9354 (set_attr "mode" "TI")])
9356 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9357 [(set (match_operand:V2DI 0 "register_operand" "=x")
9360 (vec_duplicate:V16QI
9361 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9362 (parallel [(const_int 0)
9365 "%vpmovzxbq\t{%1, %0|%0, %1}"
9366 [(set_attr "type" "ssemov")
9367 (set_attr "prefix_extra" "1")
9368 (set_attr "prefix" "maybe_vex")
9369 (set_attr "mode" "TI")])
9371 (define_insn "sse4_1_zero_extendv4hiv4si2"
9372 [(set (match_operand:V4SI 0 "register_operand" "=x")
9375 (match_operand:V8HI 1 "register_operand" "x")
9376 (parallel [(const_int 0)
9381 "%vpmovzxwd\t{%1, %0|%0, %1}"
9382 [(set_attr "type" "ssemov")
9383 (set_attr "prefix_extra" "1")
9384 (set_attr "prefix" "maybe_vex")
9385 (set_attr "mode" "TI")])
9387 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9388 [(set (match_operand:V4SI 0 "register_operand" "=x")
9392 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9393 (parallel [(const_int 0)
9398 "%vpmovzxwd\t{%1, %0|%0, %1}"
9399 [(set_attr "type" "ssemov")
9400 (set_attr "prefix_extra" "1")
9401 (set_attr "prefix" "maybe_vex")
9402 (set_attr "mode" "TI")])
9404 (define_insn "sse4_1_zero_extendv2hiv2di2"
9405 [(set (match_operand:V2DI 0 "register_operand" "=x")
9408 (match_operand:V8HI 1 "register_operand" "x")
9409 (parallel [(const_int 0)
9412 "%vpmovzxwq\t{%1, %0|%0, %1}"
9413 [(set_attr "type" "ssemov")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "prefix" "maybe_vex")
9416 (set_attr "mode" "TI")])
9418 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9419 [(set (match_operand:V2DI 0 "register_operand" "=x")
9423 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9424 (parallel [(const_int 0)
9427 "%vpmovzxwq\t{%1, %0|%0, %1}"
9428 [(set_attr "type" "ssemov")
9429 (set_attr "prefix_extra" "1")
9430 (set_attr "prefix" "maybe_vex")
9431 (set_attr "mode" "TI")])
9433 (define_insn "sse4_1_zero_extendv2siv2di2"
9434 [(set (match_operand:V2DI 0 "register_operand" "=x")
9437 (match_operand:V4SI 1 "register_operand" "x")
9438 (parallel [(const_int 0)
9441 "%vpmovzxdq\t{%1, %0|%0, %1}"
9442 [(set_attr "type" "ssemov")
9443 (set_attr "prefix_extra" "1")
9444 (set_attr "prefix" "maybe_vex")
9445 (set_attr "mode" "TI")])
9447 (define_insn "*sse4_1_zero_extendv2siv2di2"
9448 [(set (match_operand:V2DI 0 "register_operand" "=x")
9452 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9453 (parallel [(const_int 0)
9456 "%vpmovzxdq\t{%1, %0|%0, %1}"
9457 [(set_attr "type" "ssemov")
9458 (set_attr "prefix_extra" "1")
9459 (set_attr "prefix" "maybe_vex")
9460 (set_attr "mode" "TI")])
9462 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9463 ;; setting FLAGS_REG. But it is not a really compare instruction.
9464 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9465 [(set (reg:CC FLAGS_REG)
9466 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9467 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9470 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9471 [(set_attr "type" "ssecomi")
9472 (set_attr "prefix" "vex")
9473 (set_attr "mode" "<MODE>")])
9475 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9476 ;; But it is not a really compare instruction.
9477 (define_insn "avx_ptest256"
9478 [(set (reg:CC FLAGS_REG)
9479 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9480 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9483 "vptest\t{%1, %0|%0, %1}"
9484 [(set_attr "type" "ssecomi")
9485 (set_attr "prefix" "vex")
9486 (set_attr "mode" "OI")])
9488 (define_insn "sse4_1_ptest"
9489 [(set (reg:CC FLAGS_REG)
9490 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9491 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9494 "%vptest\t{%1, %0|%0, %1}"
9495 [(set_attr "type" "ssecomi")
9496 (set_attr "prefix_extra" "1")
9497 (set_attr "prefix" "maybe_vex")
9498 (set_attr "mode" "TI")])
9500 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9501 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9502 (unspec:AVX256MODEF2P
9503 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9504 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9507 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9508 [(set_attr "type" "ssecvt")
9509 (set_attr "prefix" "vex")
9510 (set_attr "mode" "<MODE>")])
9512 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9513 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9515 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9516 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9519 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9520 [(set_attr "type" "ssecvt")
9521 (set_attr "prefix_extra" "1")
9522 (set_attr "prefix" "maybe_vex")
9523 (set_attr "mode" "<MODE>")])
9525 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9526 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9527 (vec_merge:SSEMODEF2P
9529 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9530 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9532 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9535 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9536 [(set_attr "type" "ssecvt")
9537 (set_attr "prefix" "vex")
9538 (set_attr "mode" "<MODE>")])
9540 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9542 (vec_merge:SSEMODEF2P
9544 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9545 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9547 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9550 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9551 [(set_attr "type" "ssecvt")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "mode" "<MODE>")])
9555 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9557 ;; Intel SSE4.2 string/text processing instructions
9559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9561 (define_insn_and_split "sse4_2_pcmpestr"
9562 [(set (match_operand:SI 0 "register_operand" "=c,c")
9564 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9565 (match_operand:SI 3 "register_operand" "a,a")
9566 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9567 (match_operand:SI 5 "register_operand" "d,d")
9568 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9570 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9578 (set (reg:CC FLAGS_REG)
9587 && !(reload_completed || reload_in_progress)"
9592 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9593 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9594 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9597 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9598 operands[3], operands[4],
9599 operands[5], operands[6]));
9601 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9602 operands[3], operands[4],
9603 operands[5], operands[6]));
9604 if (flags && !(ecx || xmm0))
9605 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9606 operands[2], operands[3],
9607 operands[4], operands[5],
9611 [(set_attr "type" "sselog")
9612 (set_attr "prefix_data16" "1")
9613 (set_attr "prefix_extra" "1")
9614 (set_attr "memory" "none,load")
9615 (set_attr "mode" "TI")])
9617 (define_insn "sse4_2_pcmpestri"
9618 [(set (match_operand:SI 0 "register_operand" "=c,c")
9620 [(match_operand:V16QI 1 "register_operand" "x,x")
9621 (match_operand:SI 2 "register_operand" "a,a")
9622 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9623 (match_operand:SI 4 "register_operand" "d,d")
9624 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9626 (set (reg:CC FLAGS_REG)
9635 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9636 [(set_attr "type" "sselog")
9637 (set_attr "prefix_data16" "1")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "prefix" "maybe_vex")
9640 (set_attr "memory" "none,load")
9641 (set_attr "mode" "TI")])
9643 (define_insn "sse4_2_pcmpestrm"
9644 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9646 [(match_operand:V16QI 1 "register_operand" "x,x")
9647 (match_operand:SI 2 "register_operand" "a,a")
9648 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9649 (match_operand:SI 4 "register_operand" "d,d")
9650 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9652 (set (reg:CC FLAGS_REG)
9661 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9662 [(set_attr "type" "sselog")
9663 (set_attr "prefix_data16" "1")
9664 (set_attr "prefix_extra" "1")
9665 (set_attr "prefix" "maybe_vex")
9666 (set_attr "memory" "none,load")
9667 (set_attr "mode" "TI")])
9669 (define_insn "sse4_2_pcmpestr_cconly"
9670 [(set (reg:CC FLAGS_REG)
9672 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9673 (match_operand:SI 3 "register_operand" "a,a,a,a")
9674 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9675 (match_operand:SI 5 "register_operand" "d,d,d,d")
9676 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9678 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9679 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9682 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9683 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9684 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9685 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9686 [(set_attr "type" "sselog")
9687 (set_attr "prefix_data16" "1")
9688 (set_attr "prefix_extra" "1")
9689 (set_attr "memory" "none,load,none,load")
9690 (set_attr "prefix" "maybe_vex")
9691 (set_attr "mode" "TI")])
9693 (define_insn_and_split "sse4_2_pcmpistr"
9694 [(set (match_operand:SI 0 "register_operand" "=c,c")
9696 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9697 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9698 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9700 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9706 (set (reg:CC FLAGS_REG)
9713 && !(reload_completed || reload_in_progress)"
9718 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9719 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9720 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9723 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9724 operands[3], operands[4]));
9726 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9727 operands[3], operands[4]));
9728 if (flags && !(ecx || xmm0))
9729 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9730 operands[2], operands[3],
9734 [(set_attr "type" "sselog")
9735 (set_attr "prefix_data16" "1")
9736 (set_attr "prefix_extra" "1")
9737 (set_attr "memory" "none,load")
9738 (set_attr "mode" "TI")])
9740 (define_insn "sse4_2_pcmpistri"
9741 [(set (match_operand:SI 0 "register_operand" "=c,c")
9743 [(match_operand:V16QI 1 "register_operand" "x,x")
9744 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9745 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9747 (set (reg:CC FLAGS_REG)
9754 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9755 [(set_attr "type" "sselog")
9756 (set_attr "prefix_data16" "1")
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "prefix" "maybe_vex")
9759 (set_attr "memory" "none,load")
9760 (set_attr "mode" "TI")])
9762 (define_insn "sse4_2_pcmpistrm"
9763 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9765 [(match_operand:V16QI 1 "register_operand" "x,x")
9766 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9767 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9769 (set (reg:CC FLAGS_REG)
9776 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9777 [(set_attr "type" "sselog")
9778 (set_attr "prefix_data16" "1")
9779 (set_attr "prefix_extra" "1")
9780 (set_attr "prefix" "maybe_vex")
9781 (set_attr "memory" "none,load")
9782 (set_attr "mode" "TI")])
9784 (define_insn "sse4_2_pcmpistr_cconly"
9785 [(set (reg:CC FLAGS_REG)
9787 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9788 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9789 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9791 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9792 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9795 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9796 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9797 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9798 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9799 [(set_attr "type" "sselog")
9800 (set_attr "prefix_data16" "1")
9801 (set_attr "prefix_extra" "1")
9802 (set_attr "memory" "none,load,none,load")
9803 (set_attr "prefix" "maybe_vex")
9804 (set_attr "mode" "TI")])
9806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9808 ;; SSE5 instructions
9810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9812 ;; SSE5 parallel integer multiply/add instructions.
9813 ;; Note the instruction does not allow the value being added to be a memory
9814 ;; operation. However by pretending via the nonimmediate_operand predicate
9815 ;; that it does and splitting it later allows the following to be recognized:
9816 ;; a[i] = b[i] * c[i] + d[i];
9817 (define_insn "sse5_pmacsww"
9818 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9821 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
9822 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
9823 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9824 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9826 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9827 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9828 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9829 [(set_attr "type" "ssemuladd")
9830 (set_attr "mode" "TI")])
9832 ;; Split pmacsww with two memory operands into a load and the pmacsww.
9834 [(set (match_operand:V8HI 0 "register_operand" "")
9836 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9837 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9838 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9840 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9841 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9842 && !reg_mentioned_p (operands[0], operands[1])
9843 && !reg_mentioned_p (operands[0], operands[2])
9844 && !reg_mentioned_p (operands[0], operands[3])"
9847 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9848 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9853 (define_insn "sse5_pmacssww"
9854 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9856 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9857 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
9858 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9859 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9861 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9862 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9863 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9864 [(set_attr "type" "ssemuladd")
9865 (set_attr "mode" "TI")])
9867 ;; Note the instruction does not allow the value being added to be a memory
9868 ;; operation. However by pretending via the nonimmediate_operand predicate
9869 ;; that it does and splitting it later allows the following to be recognized:
9870 ;; a[i] = b[i] * c[i] + d[i];
9871 (define_insn "sse5_pmacsdd"
9872 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9875 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9876 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9877 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9878 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9880 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9881 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9882 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9883 [(set_attr "type" "ssemuladd")
9884 (set_attr "mode" "TI")])
9886 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
9888 [(set (match_operand:V4SI 0 "register_operand" "")
9890 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9891 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9892 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9894 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9895 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9896 && !reg_mentioned_p (operands[0], operands[1])
9897 && !reg_mentioned_p (operands[0], operands[2])
9898 && !reg_mentioned_p (operands[0], operands[3])"
9901 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9902 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9907 (define_insn "sse5_pmacssdd"
9908 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9910 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9911 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9912 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9913 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9915 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9916 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9917 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9918 [(set_attr "type" "ssemuladd")
9919 (set_attr "mode" "TI")])
9921 (define_insn "sse5_pmacssdql"
9922 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9927 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9928 (parallel [(const_int 1)
9931 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9932 (parallel [(const_int 1)
9934 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9937 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9938 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9939 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9940 [(set_attr "type" "ssemuladd")
9941 (set_attr "mode" "TI")])
9943 (define_insn "sse5_pmacssdqh"
9944 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9949 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9950 (parallel [(const_int 0)
9954 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9955 (parallel [(const_int 0)
9957 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9958 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9960 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9961 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9962 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9963 [(set_attr "type" "ssemuladd")
9964 (set_attr "mode" "TI")])
9966 (define_insn "sse5_pmacsdql"
9967 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9972 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9973 (parallel [(const_int 1)
9977 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9978 (parallel [(const_int 1)
9980 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9981 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9983 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9984 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9985 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9986 [(set_attr "type" "ssemuladd")
9987 (set_attr "mode" "TI")])
9989 (define_insn_and_split "*sse5_pmacsdql_mem"
9990 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9995 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9996 (parallel [(const_int 1)
10000 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10001 (parallel [(const_int 1)
10003 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10004 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10006 "&& (reload_completed
10007 || (!reg_mentioned_p (operands[0], operands[1])
10008 && !reg_mentioned_p (operands[0], operands[2])))"
10009 [(set (match_dup 0)
10017 (parallel [(const_int 1)
10022 (parallel [(const_int 1)
10026 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10027 ;; fake it with a multiply/add. In general, we expect the define_split to
10028 ;; occur before register allocation, so we have to handle the corner case where
10029 ;; the target is the same as operands 1/2
10030 (define_insn_and_split "sse5_mulv2div2di3_low"
10031 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10035 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10036 (parallel [(const_int 1)
10040 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10041 (parallel [(const_int 1)
10042 (const_int 3)])))))]
10045 "&& (reload_completed
10046 || (!reg_mentioned_p (operands[0], operands[1])
10047 && !reg_mentioned_p (operands[0], operands[2])))"
10048 [(set (match_dup 0)
10056 (parallel [(const_int 1)
10061 (parallel [(const_int 1)
10065 operands[3] = CONST0_RTX (V2DImode);
10067 [(set_attr "type" "ssemuladd")
10068 (set_attr "mode" "TI")])
10070 (define_insn "sse5_pmacsdqh"
10071 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10076 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10077 (parallel [(const_int 0)
10081 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10082 (parallel [(const_int 0)
10084 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10085 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10087 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10088 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10089 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10090 [(set_attr "type" "ssemuladd")
10091 (set_attr "mode" "TI")])
10093 (define_insn_and_split "*sse5_pmacsdqh_mem"
10094 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10099 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10100 (parallel [(const_int 0)
10104 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10105 (parallel [(const_int 0)
10107 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10108 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10110 "&& (reload_completed
10111 || (!reg_mentioned_p (operands[0], operands[1])
10112 && !reg_mentioned_p (operands[0], operands[2])))"
10113 [(set (match_dup 0)
10121 (parallel [(const_int 0)
10126 (parallel [(const_int 0)
10130 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10131 ;; fake it with a multiply/add. In general, we expect the define_split to
10132 ;; occur before register allocation, so we have to handle the corner case where
10133 ;; the target is the same as either operands[1] or operands[2]
10134 (define_insn_and_split "sse5_mulv2div2di3_high"
10135 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10139 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10140 (parallel [(const_int 0)
10144 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10145 (parallel [(const_int 0)
10146 (const_int 2)])))))]
10149 "&& (reload_completed
10150 || (!reg_mentioned_p (operands[0], operands[1])
10151 && !reg_mentioned_p (operands[0], operands[2])))"
10152 [(set (match_dup 0)
10160 (parallel [(const_int 0)
10165 (parallel [(const_int 0)
10169 operands[3] = CONST0_RTX (V2DImode);
10171 [(set_attr "type" "ssemuladd")
10172 (set_attr "mode" "TI")])
10174 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10175 (define_insn "sse5_pmacsswd"
10176 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10181 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10182 (parallel [(const_int 1)
10188 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10189 (parallel [(const_int 1)
10193 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10194 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10196 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10197 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10198 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10199 [(set_attr "type" "ssemuladd")
10200 (set_attr "mode" "TI")])
10202 (define_insn "sse5_pmacswd"
10203 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10208 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10209 (parallel [(const_int 1)
10215 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10216 (parallel [(const_int 1)
10220 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10221 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10223 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10224 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10225 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10226 [(set_attr "type" "ssemuladd")
10227 (set_attr "mode" "TI")])
10229 (define_insn "sse5_pmadcsswd"
10230 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10236 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10237 (parallel [(const_int 0)
10243 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10244 (parallel [(const_int 0)
10252 (parallel [(const_int 1)
10259 (parallel [(const_int 1)
10262 (const_int 7)])))))
10263 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10264 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10266 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10267 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10268 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10269 [(set_attr "type" "ssemuladd")
10270 (set_attr "mode" "TI")])
10272 (define_insn "sse5_pmadcswd"
10273 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10279 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10280 (parallel [(const_int 0)
10286 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10287 (parallel [(const_int 0)
10295 (parallel [(const_int 1)
10302 (parallel [(const_int 1)
10305 (const_int 7)])))))
10306 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10307 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10309 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10310 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10311 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10312 [(set_attr "type" "ssemuladd")
10313 (set_attr "mode" "TI")])
10315 ;; SSE5 parallel XMM conditional moves
10316 (define_insn "sse5_pcmov_<mode>"
10317 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10318 (if_then_else:SSEMODE
10319 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10320 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10321 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10322 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10324 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10325 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10326 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10327 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10328 [(set_attr "type" "sse4arg")])
10330 ;; SSE5 horizontal add/subtract instructions
10331 (define_insn "sse5_phaddbw"
10332 [(set (match_operand:V8HI 0 "register_operand" "=x")
10336 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10337 (parallel [(const_int 0)
10348 (parallel [(const_int 1)
10355 (const_int 15)])))))]
10357 "phaddbw\t{%1, %0|%0, %1}"
10358 [(set_attr "type" "sseiadd1")])
10360 (define_insn "sse5_phaddbd"
10361 [(set (match_operand:V4SI 0 "register_operand" "=x")
10366 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10367 (parallel [(const_int 0)
10374 (parallel [(const_int 1)
10377 (const_int 13)]))))
10382 (parallel [(const_int 2)
10389 (parallel [(const_int 3)
10392 (const_int 15)]))))))]
10394 "phaddbd\t{%1, %0|%0, %1}"
10395 [(set_attr "type" "sseiadd1")])
10397 (define_insn "sse5_phaddbq"
10398 [(set (match_operand:V2DI 0 "register_operand" "=x")
10404 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10405 (parallel [(const_int 0)
10410 (parallel [(const_int 1)
10416 (parallel [(const_int 2)
10421 (parallel [(const_int 3)
10422 (const_int 7)])))))
10428 (parallel [(const_int 8)
10433 (parallel [(const_int 9)
10434 (const_int 13)]))))
10439 (parallel [(const_int 10)
10444 (parallel [(const_int 11)
10445 (const_int 15)])))))))]
10447 "phaddbq\t{%1, %0|%0, %1}"
10448 [(set_attr "type" "sseiadd1")])
10450 (define_insn "sse5_phaddwd"
10451 [(set (match_operand:V4SI 0 "register_operand" "=x")
10455 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10456 (parallel [(const_int 0)
10463 (parallel [(const_int 1)
10466 (const_int 7)])))))]
10468 "phaddwd\t{%1, %0|%0, %1}"
10469 [(set_attr "type" "sseiadd1")])
10471 (define_insn "sse5_phaddwq"
10472 [(set (match_operand:V2DI 0 "register_operand" "=x")
10477 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10478 (parallel [(const_int 0)
10483 (parallel [(const_int 1)
10489 (parallel [(const_int 2)
10494 (parallel [(const_int 3)
10495 (const_int 7)]))))))]
10497 "phaddwq\t{%1, %0|%0, %1}"
10498 [(set_attr "type" "sseiadd1")])
10500 (define_insn "sse5_phadddq"
10501 [(set (match_operand:V2DI 0 "register_operand" "=x")
10505 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10506 (parallel [(const_int 0)
10511 (parallel [(const_int 1)
10512 (const_int 3)])))))]
10514 "phadddq\t{%1, %0|%0, %1}"
10515 [(set_attr "type" "sseiadd1")])
10517 (define_insn "sse5_phaddubw"
10518 [(set (match_operand:V8HI 0 "register_operand" "=x")
10522 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10523 (parallel [(const_int 0)
10534 (parallel [(const_int 1)
10541 (const_int 15)])))))]
10543 "phaddubw\t{%1, %0|%0, %1}"
10544 [(set_attr "type" "sseiadd1")])
10546 (define_insn "sse5_phaddubd"
10547 [(set (match_operand:V4SI 0 "register_operand" "=x")
10552 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10553 (parallel [(const_int 0)
10560 (parallel [(const_int 1)
10563 (const_int 13)]))))
10568 (parallel [(const_int 2)
10575 (parallel [(const_int 3)
10578 (const_int 15)]))))))]
10580 "phaddubd\t{%1, %0|%0, %1}"
10581 [(set_attr "type" "sseiadd1")])
10583 (define_insn "sse5_phaddubq"
10584 [(set (match_operand:V2DI 0 "register_operand" "=x")
10590 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10591 (parallel [(const_int 0)
10596 (parallel [(const_int 1)
10602 (parallel [(const_int 2)
10607 (parallel [(const_int 3)
10608 (const_int 7)])))))
10614 (parallel [(const_int 8)
10619 (parallel [(const_int 9)
10620 (const_int 13)]))))
10625 (parallel [(const_int 10)
10630 (parallel [(const_int 11)
10631 (const_int 15)])))))))]
10633 "phaddubq\t{%1, %0|%0, %1}"
10634 [(set_attr "type" "sseiadd1")])
10636 (define_insn "sse5_phadduwd"
10637 [(set (match_operand:V4SI 0 "register_operand" "=x")
10641 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10642 (parallel [(const_int 0)
10649 (parallel [(const_int 1)
10652 (const_int 7)])))))]
10654 "phadduwd\t{%1, %0|%0, %1}"
10655 [(set_attr "type" "sseiadd1")])
10657 (define_insn "sse5_phadduwq"
10658 [(set (match_operand:V2DI 0 "register_operand" "=x")
10663 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10664 (parallel [(const_int 0)
10669 (parallel [(const_int 1)
10675 (parallel [(const_int 2)
10680 (parallel [(const_int 3)
10681 (const_int 7)]))))))]
10683 "phadduwq\t{%1, %0|%0, %1}"
10684 [(set_attr "type" "sseiadd1")])
10686 (define_insn "sse5_phaddudq"
10687 [(set (match_operand:V2DI 0 "register_operand" "=x")
10691 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10692 (parallel [(const_int 0)
10697 (parallel [(const_int 1)
10698 (const_int 3)])))))]
10700 "phaddudq\t{%1, %0|%0, %1}"
10701 [(set_attr "type" "sseiadd1")])
10703 (define_insn "sse5_phsubbw"
10704 [(set (match_operand:V8HI 0 "register_operand" "=x")
10708 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10709 (parallel [(const_int 0)
10720 (parallel [(const_int 1)
10727 (const_int 15)])))))]
10729 "phsubbw\t{%1, %0|%0, %1}"
10730 [(set_attr "type" "sseiadd1")])
10732 (define_insn "sse5_phsubwd"
10733 [(set (match_operand:V4SI 0 "register_operand" "=x")
10737 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10738 (parallel [(const_int 0)
10745 (parallel [(const_int 1)
10748 (const_int 7)])))))]
10750 "phsubwd\t{%1, %0|%0, %1}"
10751 [(set_attr "type" "sseiadd1")])
10753 (define_insn "sse5_phsubdq"
10754 [(set (match_operand:V2DI 0 "register_operand" "=x")
10758 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10759 (parallel [(const_int 0)
10764 (parallel [(const_int 1)
10765 (const_int 3)])))))]
10767 "phsubdq\t{%1, %0|%0, %1}"
10768 [(set_attr "type" "sseiadd1")])
10770 ;; SSE5 permute instructions
10771 (define_insn "sse5_pperm"
10772 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10774 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
10775 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
10776 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10777 UNSPEC_SSE5_PERMUTE))]
10778 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10779 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10780 [(set_attr "type" "sse4arg")
10781 (set_attr "mode" "TI")])
10783 ;; The following are for the various unpack insns which doesn't need the first
10784 ;; source operand, so we can just use the output operand for the first operand.
10785 ;; This allows either of the other two operands to be a memory operand. We
10786 ;; can't just use the first operand as an argument to the normal pperm because
10787 ;; then an output only argument, suddenly becomes an input operand.
10788 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10789 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10792 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10793 (match_operand 2 "" "")))) ;; parallel with const_int's
10794 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10796 && (register_operand (operands[1], V16QImode)
10797 || register_operand (operands[2], V16QImode))"
10798 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10799 [(set_attr "type" "sseadd")
10800 (set_attr "mode" "TI")])
10802 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10803 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10806 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10807 (match_operand 2 "" "")))) ;; parallel with const_int's
10808 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10810 && (register_operand (operands[1], V16QImode)
10811 || register_operand (operands[2], V16QImode))"
10812 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10813 [(set_attr "type" "sseadd")
10814 (set_attr "mode" "TI")])
10816 (define_insn "sse5_pperm_zero_v8hi_v4si"
10817 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10820 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10821 (match_operand 2 "" "")))) ;; parallel with const_int's
10822 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10824 && (register_operand (operands[1], V8HImode)
10825 || register_operand (operands[2], V16QImode))"
10826 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10827 [(set_attr "type" "sseadd")
10828 (set_attr "mode" "TI")])
10830 (define_insn "sse5_pperm_sign_v8hi_v4si"
10831 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10834 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10835 (match_operand 2 "" "")))) ;; parallel with const_int's
10836 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10838 && (register_operand (operands[1], V8HImode)
10839 || register_operand (operands[2], V16QImode))"
10840 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10841 [(set_attr "type" "sseadd")
10842 (set_attr "mode" "TI")])
10844 (define_insn "sse5_pperm_zero_v4si_v2di"
10845 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10848 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10849 (match_operand 2 "" "")))) ;; parallel with const_int's
10850 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10852 && (register_operand (operands[1], V4SImode)
10853 || register_operand (operands[2], V16QImode))"
10854 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10855 [(set_attr "type" "sseadd")
10856 (set_attr "mode" "TI")])
10858 (define_insn "sse5_pperm_sign_v4si_v2di"
10859 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10862 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10863 (match_operand 2 "" "")))) ;; parallel with const_int's
10864 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10866 && (register_operand (operands[1], V4SImode)
10867 || register_operand (operands[2], V16QImode))"
10868 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10869 [(set_attr "type" "sseadd")
10870 (set_attr "mode" "TI")])
10872 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10873 (define_insn "sse5_pperm_pack_v2di_v4si"
10874 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10877 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
10879 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10880 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10881 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10882 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10883 [(set_attr "type" "sse4arg")
10884 (set_attr "mode" "TI")])
10886 (define_insn "sse5_pperm_pack_v4si_v8hi"
10887 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
10890 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
10892 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10893 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10894 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10895 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10896 [(set_attr "type" "sse4arg")
10897 (set_attr "mode" "TI")])
10899 (define_insn "sse5_pperm_pack_v8hi_v16qi"
10900 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10903 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
10905 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10906 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10907 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10908 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10909 [(set_attr "type" "sse4arg")
10910 (set_attr "mode" "TI")])
10912 ;; Floating point permutation (permps, permpd)
10913 (define_insn "sse5_perm<mode>"
10914 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10916 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10917 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10918 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10919 UNSPEC_SSE5_PERMUTE))]
10920 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10921 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10922 [(set_attr "type" "sse4arg")
10923 (set_attr "mode" "<MODE>")])
10925 ;; SSE5 packed rotate instructions
10926 (define_expand "rotl<mode>3"
10927 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10928 (rotate:SSEMODE1248
10929 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10930 (match_operand:SI 2 "general_operand")))]
10933 /* If we were given a scalar, convert it to parallel */
10934 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10936 rtvec vs = rtvec_alloc (<ssescalarnum>);
10937 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10938 rtx reg = gen_reg_rtx (<MODE>mode);
10939 rtx op2 = operands[2];
10942 if (GET_MODE (op2) != <ssescalarmode>mode)
10944 op2 = gen_reg_rtx (<ssescalarmode>mode);
10945 convert_move (op2, operands[2], false);
10948 for (i = 0; i < <ssescalarnum>; i++)
10949 RTVEC_ELT (vs, i) = op2;
10951 emit_insn (gen_vec_init<mode> (reg, par));
10952 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10957 (define_expand "rotr<mode>3"
10958 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10959 (rotatert:SSEMODE1248
10960 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10961 (match_operand:SI 2 "general_operand")))]
10964 /* If we were given a scalar, convert it to parallel */
10965 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10967 rtvec vs = rtvec_alloc (<ssescalarnum>);
10968 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10969 rtx neg = gen_reg_rtx (<MODE>mode);
10970 rtx reg = gen_reg_rtx (<MODE>mode);
10971 rtx op2 = operands[2];
10974 if (GET_MODE (op2) != <ssescalarmode>mode)
10976 op2 = gen_reg_rtx (<ssescalarmode>mode);
10977 convert_move (op2, operands[2], false);
10980 for (i = 0; i < <ssescalarnum>; i++)
10981 RTVEC_ELT (vs, i) = op2;
10983 emit_insn (gen_vec_init<mode> (reg, par));
10984 emit_insn (gen_neg<mode>2 (neg, reg));
10985 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
10990 (define_insn "sse5_rotl<mode>3"
10991 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10992 (rotate:SSEMODE1248
10993 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10994 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10996 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10997 [(set_attr "type" "sseishft")
10998 (set_attr "mode" "TI")])
11000 (define_insn "sse5_rotr<mode>3"
11001 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11002 (rotatert:SSEMODE1248
11003 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11004 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11007 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11008 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11010 [(set_attr "type" "sseishft")
11011 (set_attr "mode" "TI")])
11013 (define_expand "vrotr<mode>3"
11014 [(match_operand:SSEMODE1248 0 "register_operand" "")
11015 (match_operand:SSEMODE1248 1 "register_operand" "")
11016 (match_operand:SSEMODE1248 2 "register_operand" "")]
11019 rtx reg = gen_reg_rtx (<MODE>mode);
11020 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11021 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11025 (define_expand "vrotl<mode>3"
11026 [(match_operand:SSEMODE1248 0 "register_operand" "")
11027 (match_operand:SSEMODE1248 1 "register_operand" "")
11028 (match_operand:SSEMODE1248 2 "register_operand" "")]
11031 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11035 (define_insn "sse5_vrotl<mode>3"
11036 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11037 (if_then_else:SSEMODE1248
11039 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11041 (rotate:SSEMODE1248
11042 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11044 (rotatert:SSEMODE1248
11046 (neg:SSEMODE1248 (match_dup 2)))))]
11047 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11048 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11049 [(set_attr "type" "sseishft")
11050 (set_attr "mode" "TI")])
11052 ;; SSE5 packed shift instructions.
11053 ;; FIXME: add V2DI back in
11054 (define_expand "vlshr<mode>3"
11055 [(match_operand:SSEMODE124 0 "register_operand" "")
11056 (match_operand:SSEMODE124 1 "register_operand" "")
11057 (match_operand:SSEMODE124 2 "register_operand" "")]
11060 rtx neg = gen_reg_rtx (<MODE>mode);
11061 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11062 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11066 (define_expand "vashr<mode>3"
11067 [(match_operand:SSEMODE124 0 "register_operand" "")
11068 (match_operand:SSEMODE124 1 "register_operand" "")
11069 (match_operand:SSEMODE124 2 "register_operand" "")]
11072 rtx neg = gen_reg_rtx (<MODE>mode);
11073 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11074 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11078 (define_expand "vashl<mode>3"
11079 [(match_operand:SSEMODE124 0 "register_operand" "")
11080 (match_operand:SSEMODE124 1 "register_operand" "")
11081 (match_operand:SSEMODE124 2 "register_operand" "")]
11084 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11088 (define_insn "sse5_ashl<mode>3"
11089 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11090 (if_then_else:SSEMODE1248
11092 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11094 (ashift:SSEMODE1248
11095 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11097 (ashiftrt:SSEMODE1248
11099 (neg:SSEMODE1248 (match_dup 2)))))]
11100 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11101 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11102 [(set_attr "type" "sseishft")
11103 (set_attr "mode" "TI")])
11105 (define_insn "sse5_lshl<mode>3"
11106 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11107 (if_then_else:SSEMODE1248
11109 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11111 (ashift:SSEMODE1248
11112 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11114 (lshiftrt:SSEMODE1248
11116 (neg:SSEMODE1248 (match_dup 2)))))]
11117 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11118 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11119 [(set_attr "type" "sseishft")
11120 (set_attr "mode" "TI")])
11122 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11123 (define_expand "ashlv16qi3"
11124 [(match_operand:V16QI 0 "register_operand" "")
11125 (match_operand:V16QI 1 "register_operand" "")
11126 (match_operand:SI 2 "nonmemory_operand" "")]
11129 rtvec vs = rtvec_alloc (16);
11130 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11131 rtx reg = gen_reg_rtx (V16QImode);
11133 for (i = 0; i < 16; i++)
11134 RTVEC_ELT (vs, i) = operands[2];
11136 emit_insn (gen_vec_initv16qi (reg, par));
11137 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11141 (define_expand "lshlv16qi3"
11142 [(match_operand:V16QI 0 "register_operand" "")
11143 (match_operand:V16QI 1 "register_operand" "")
11144 (match_operand:SI 2 "nonmemory_operand" "")]
11147 rtvec vs = rtvec_alloc (16);
11148 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11149 rtx reg = gen_reg_rtx (V16QImode);
11151 for (i = 0; i < 16; i++)
11152 RTVEC_ELT (vs, i) = operands[2];
11154 emit_insn (gen_vec_initv16qi (reg, par));
11155 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11159 (define_expand "ashrv16qi3"
11160 [(match_operand:V16QI 0 "register_operand" "")
11161 (match_operand:V16QI 1 "register_operand" "")
11162 (match_operand:SI 2 "nonmemory_operand" "")]
11165 rtvec vs = rtvec_alloc (16);
11166 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11167 rtx reg = gen_reg_rtx (V16QImode);
11169 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
11170 ? GEN_INT (- INTVAL (operands[2]))
11173 for (i = 0; i < 16; i++)
11174 RTVEC_ELT (vs, i) = ele;
11176 emit_insn (gen_vec_initv16qi (reg, par));
11178 if (GET_CODE (operands[2]) != CONST_INT)
11180 rtx neg = gen_reg_rtx (V16QImode);
11181 emit_insn (gen_negv16qi2 (neg, reg));
11182 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11185 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11190 (define_expand "ashrv2di3"
11191 [(match_operand:V2DI 0 "register_operand" "")
11192 (match_operand:V2DI 1 "register_operand" "")
11193 (match_operand:DI 2 "nonmemory_operand" "")]
11196 rtvec vs = rtvec_alloc (2);
11197 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11198 rtx reg = gen_reg_rtx (V2DImode);
11201 if (GET_CODE (operands[2]) == CONST_INT)
11202 ele = GEN_INT (- INTVAL (operands[2]));
11203 else if (GET_MODE (operands[2]) != DImode)
11205 rtx move = gen_reg_rtx (DImode);
11206 ele = gen_reg_rtx (DImode);
11207 convert_move (move, operands[2], false);
11208 emit_insn (gen_negdi2 (ele, move));
11212 ele = gen_reg_rtx (DImode);
11213 emit_insn (gen_negdi2 (ele, operands[2]));
11216 RTVEC_ELT (vs, 0) = ele;
11217 RTVEC_ELT (vs, 1) = ele;
11218 emit_insn (gen_vec_initv2di (reg, par));
11219 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11223 ;; SSE5 FRCZ support
11225 (define_insn "sse5_frcz<mode>2"
11226 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11228 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11231 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11232 [(set_attr "type" "ssecvt1")
11233 (set_attr "prefix_extra" "1")
11234 (set_attr "mode" "<MODE>")])
11237 (define_insn "sse5_vmfrcz<mode>2"
11238 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11239 (vec_merge:SSEMODEF2P
11241 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11243 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11246 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11247 [(set_attr "type" "ssecvt1")
11248 (set_attr "prefix_extra" "1")
11249 (set_attr "mode" "<MODE>")])
11251 (define_insn "sse5_cvtph2ps"
11252 [(set (match_operand:V4SF 0 "register_operand" "=x")
11253 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11256 "cvtph2ps\t{%1, %0|%0, %1}"
11257 [(set_attr "type" "ssecvt")
11258 (set_attr "mode" "V4SF")])
11260 (define_insn "sse5_cvtps2ph"
11261 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11262 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11265 "cvtps2ph\t{%1, %0|%0, %1}"
11266 [(set_attr "type" "ssecvt")
11267 (set_attr "mode" "V4SF")])
11269 ;; Scalar versions of the com instructions that use vector types that are
11270 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11271 ;; com instructions fill in 0's in the upper bits instead of leaving them
11272 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11273 (define_expand "sse5_vmmaskcmp<mode>3"
11274 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11275 (vec_merge:SSEMODEF2P
11276 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11277 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11278 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11283 operands[4] = CONST0_RTX (<MODE>mode);
11286 (define_insn "*sse5_vmmaskcmp<mode>3"
11287 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11288 (vec_merge:SSEMODEF2P
11289 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11290 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11291 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11292 (match_operand:SSEMODEF2P 4 "")
11295 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11296 [(set_attr "type" "sse4arg")
11297 (set_attr "mode" "<ssescalarmode>")])
11299 ;; We don't have a comparison operator that always returns true/false, so
11300 ;; handle comfalse and comtrue specially.
11301 (define_insn "sse5_com_tf<mode>3"
11302 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11304 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11305 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11306 (match_operand:SI 3 "const_int_operand" "n")]
11307 UNSPEC_SSE5_TRUEFALSE))]
11310 const char *ret = NULL;
11312 switch (INTVAL (operands[3]))
11315 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11319 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11323 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11327 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11331 gcc_unreachable ();
11336 [(set_attr "type" "ssecmp")
11337 (set_attr "mode" "<MODE>")])
11339 (define_insn "sse5_maskcmp<mode>3"
11340 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11341 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11342 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11343 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11345 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11346 [(set_attr "type" "ssecmp")
11347 (set_attr "mode" "<MODE>")])
11349 (define_insn "sse5_maskcmp<mode>3"
11350 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11351 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11352 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11353 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11355 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11356 [(set_attr "type" "sse4arg")
11357 (set_attr "mode" "TI")])
11359 (define_insn "sse5_maskcmp_uns<mode>3"
11360 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11361 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11362 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11363 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11365 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11366 [(set_attr "type" "ssecmp")
11367 (set_attr "mode" "TI")])
11369 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11370 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11371 ;; the exact instruction generated for the intrinsic.
11372 (define_insn "sse5_maskcmp_uns2<mode>3"
11373 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11374 (unspec:SSEMODE1248
11375 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11376 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11377 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11378 UNSPEC_SSE5_UNSIGNED_CMP))]
11380 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11381 [(set_attr "type" "ssecmp")
11382 (set_attr "mode" "TI")])
11384 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11385 ;; being added here to be complete.
11386 (define_insn "sse5_pcom_tf<mode>3"
11387 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11388 (unspec:SSEMODE1248
11389 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11390 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11391 (match_operand:SI 3 "const_int_operand" "n")]
11392 UNSPEC_SSE5_TRUEFALSE))]
11395 return ((INTVAL (operands[3]) != 0)
11396 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11397 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11399 [(set_attr "type" "ssecmp")
11400 (set_attr "mode" "TI")])
11402 (define_insn "*avx_aesenc"
11403 [(set (match_operand:V2DI 0 "register_operand" "=x")
11404 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11405 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11407 "TARGET_AES && TARGET_AVX"
11408 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11409 [(set_attr "type" "sselog1")
11410 (set_attr "prefix" "vex")
11411 (set_attr "mode" "TI")])
11413 (define_insn "aesenc"
11414 [(set (match_operand:V2DI 0 "register_operand" "=x")
11415 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11416 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11419 "aesenc\t{%2, %0|%0, %2}"
11420 [(set_attr "type" "sselog1")
11421 (set_attr "prefix_extra" "1")
11422 (set_attr "mode" "TI")])
11424 (define_insn "*avx_aesenclast"
11425 [(set (match_operand:V2DI 0 "register_operand" "=x")
11426 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11427 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11428 UNSPEC_AESENCLAST))]
11429 "TARGET_AES && TARGET_AVX"
11430 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11431 [(set_attr "type" "sselog1")
11432 (set_attr "prefix" "vex")
11433 (set_attr "mode" "TI")])
11435 (define_insn "aesenclast"
11436 [(set (match_operand:V2DI 0 "register_operand" "=x")
11437 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11438 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11439 UNSPEC_AESENCLAST))]
11441 "aesenclast\t{%2, %0|%0, %2}"
11442 [(set_attr "type" "sselog1")
11443 (set_attr "prefix_extra" "1")
11444 (set_attr "mode" "TI")])
11446 (define_insn "*avx_aesdec"
11447 [(set (match_operand:V2DI 0 "register_operand" "=x")
11448 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11449 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11451 "TARGET_AES && TARGET_AVX"
11452 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11453 [(set_attr "type" "sselog1")
11454 (set_attr "prefix" "vex")
11455 (set_attr "mode" "TI")])
11457 (define_insn "aesdec"
11458 [(set (match_operand:V2DI 0 "register_operand" "=x")
11459 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11460 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11463 "aesdec\t{%2, %0|%0, %2}"
11464 [(set_attr "type" "sselog1")
11465 (set_attr "prefix_extra" "1")
11466 (set_attr "mode" "TI")])
11468 (define_insn "*avx_aesdeclast"
11469 [(set (match_operand:V2DI 0 "register_operand" "=x")
11470 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11471 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11472 UNSPEC_AESDECLAST))]
11473 "TARGET_AES && TARGET_AVX"
11474 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11475 [(set_attr "type" "sselog1")
11476 (set_attr "prefix" "vex")
11477 (set_attr "mode" "TI")])
11479 (define_insn "aesdeclast"
11480 [(set (match_operand:V2DI 0 "register_operand" "=x")
11481 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11482 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11483 UNSPEC_AESDECLAST))]
11485 "aesdeclast\t{%2, %0|%0, %2}"
11486 [(set_attr "type" "sselog1")
11487 (set_attr "prefix_extra" "1")
11488 (set_attr "mode" "TI")])
11490 (define_insn "aesimc"
11491 [(set (match_operand:V2DI 0 "register_operand" "=x")
11492 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11495 "%vaesimc\t{%1, %0|%0, %1}"
11496 [(set_attr "type" "sselog1")
11497 (set_attr "prefix_extra" "1")
11498 (set_attr "prefix" "maybe_vex")
11499 (set_attr "mode" "TI")])
11501 (define_insn "aeskeygenassist"
11502 [(set (match_operand:V2DI 0 "register_operand" "=x")
11503 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11504 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11505 UNSPEC_AESKEYGENASSIST))]
11507 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11508 [(set_attr "type" "sselog1")
11509 (set_attr "prefix_extra" "1")
11510 (set_attr "prefix" "maybe_vex")
11511 (set_attr "mode" "TI")])
11513 (define_insn "*vpclmulqdq"
11514 [(set (match_operand:V2DI 0 "register_operand" "=x")
11515 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11516 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11517 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11519 "TARGET_PCLMUL && TARGET_AVX"
11520 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11521 [(set_attr "type" "sselog1")
11522 (set_attr "prefix" "vex")
11523 (set_attr "mode" "TI")])
11525 (define_insn "pclmulqdq"
11526 [(set (match_operand:V2DI 0 "register_operand" "=x")
11527 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11528 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11529 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11532 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11533 [(set_attr "type" "sselog1")
11534 (set_attr "prefix_extra" "1")
11535 (set_attr "mode" "TI")])
11537 (define_expand "avx_vzeroall"
11538 [(match_par_dup 0 [(const_int 0)])]
11541 int nregs = TARGET_64BIT ? 16 : 8;
11544 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11546 XVECEXP (operands[0], 0, 0)
11547 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11550 for (regno = 0; regno < nregs; regno++)
11551 XVECEXP (operands[0], 0, regno + 1)
11552 = gen_rtx_SET (VOIDmode,
11553 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11554 CONST0_RTX (V8SImode));
11557 (define_insn "*avx_vzeroall"
11558 [(match_parallel 0 "vzeroall_operation"
11559 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11560 (set (match_operand 1 "register_operand" "=x")
11561 (match_operand 2 "const0_operand" "X"))])]
11564 [(set_attr "type" "sse")
11565 (set_attr "memory" "none")
11566 (set_attr "prefix" "vex")
11567 (set_attr "mode" "OI")])
11569 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11570 (define_insn "avx_vzeroupper"
11571 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11572 (clobber (reg:V8SI XMM0_REG))
11573 (clobber (reg:V8SI XMM1_REG))
11574 (clobber (reg:V8SI XMM2_REG))
11575 (clobber (reg:V8SI XMM3_REG))
11576 (clobber (reg:V8SI XMM4_REG))
11577 (clobber (reg:V8SI XMM5_REG))
11578 (clobber (reg:V8SI XMM6_REG))
11579 (clobber (reg:V8SI XMM7_REG))]
11580 "TARGET_AVX && !TARGET_64BIT"
11582 [(set_attr "type" "sse")
11583 (set_attr "memory" "none")
11584 (set_attr "prefix" "vex")
11585 (set_attr "mode" "OI")])
11587 (define_insn "avx_vzeroupper_rex64"
11588 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11589 (clobber (reg:V8SI XMM0_REG))
11590 (clobber (reg:V8SI XMM1_REG))
11591 (clobber (reg:V8SI XMM2_REG))
11592 (clobber (reg:V8SI XMM3_REG))
11593 (clobber (reg:V8SI XMM4_REG))
11594 (clobber (reg:V8SI XMM5_REG))
11595 (clobber (reg:V8SI XMM6_REG))
11596 (clobber (reg:V8SI XMM7_REG))
11597 (clobber (reg:V8SI XMM8_REG))
11598 (clobber (reg:V8SI XMM9_REG))
11599 (clobber (reg:V8SI XMM10_REG))
11600 (clobber (reg:V8SI XMM11_REG))
11601 (clobber (reg:V8SI XMM12_REG))
11602 (clobber (reg:V8SI XMM13_REG))
11603 (clobber (reg:V8SI XMM14_REG))
11604 (clobber (reg:V8SI XMM15_REG))]
11605 "TARGET_AVX && TARGET_64BIT"
11607 [(set_attr "type" "sse")
11608 (set_attr "memory" "none")
11609 (set_attr "prefix" "vex")
11610 (set_attr "mode" "OI")])
11612 (define_insn "avx_vpermil<mode>"
11613 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11615 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11616 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11619 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11620 [(set_attr "type" "sselog")
11621 (set_attr "prefix" "vex")
11622 (set_attr "mode" "<MODE>")])
11624 (define_insn "avx_vpermilvar<mode>3"
11625 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11627 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11628 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11631 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11632 [(set_attr "type" "sselog")
11633 (set_attr "prefix" "vex")
11634 (set_attr "mode" "<MODE>")])
11636 (define_insn "avx_vperm2f128<mode>3"
11637 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11638 (unspec:AVX256MODE2P
11639 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11640 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11641 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11642 UNSPEC_VPERMIL2F128))]
11644 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11645 [(set_attr "type" "sselog")
11646 (set_attr "prefix" "vex")
11647 (set_attr "mode" "V8SF")])
11649 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11650 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11651 (vec_concat:AVXMODEF4P
11652 (vec_concat:<avxhalfvecmode>
11653 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11655 (vec_concat:<avxhalfvecmode>
11659 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11660 [(set_attr "type" "ssemov")
11661 (set_attr "prefix" "vex")
11662 (set_attr "mode" "<avxscalarmode>")])
11664 (define_insn "avx_vbroadcastss256"
11665 [(set (match_operand:V8SF 0 "register_operand" "=x")
11669 (match_operand:SF 1 "memory_operand" "m")
11682 "vbroadcastss\t{%1, %0|%0, %1}"
11683 [(set_attr "type" "ssemov")
11684 (set_attr "prefix" "vex")
11685 (set_attr "mode" "SF")])
11687 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11688 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11689 (vec_concat:AVX256MODEF2P
11690 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11693 "vbroadcastf128\t{%1, %0|%0, %1}"
11694 [(set_attr "type" "ssemov")
11695 (set_attr "prefix" "vex")
11696 (set_attr "mode" "V4SF")])
11698 (define_expand "avx_vinsertf128<mode>"
11699 [(match_operand:AVX256MODE 0 "register_operand" "")
11700 (match_operand:AVX256MODE 1 "register_operand" "")
11701 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11702 (match_operand:SI 3 "const_0_to_1_operand" "")]
11705 switch (INTVAL (operands[3]))
11708 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11712 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11716 gcc_unreachable ();
11721 (define_insn "vec_set_lo_<mode>"
11722 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11723 (vec_concat:AVX256MODE4P
11724 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11725 (vec_select:<avxhalfvecmode>
11726 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11727 (parallel [(const_int 2) (const_int 3)]))))]
11729 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11730 [(set_attr "type" "sselog")
11731 (set_attr "prefix" "vex")
11732 (set_attr "mode" "V8SF")])
11734 (define_insn "vec_set_hi_<mode>"
11735 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11736 (vec_concat:AVX256MODE4P
11737 (vec_select:<avxhalfvecmode>
11738 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11739 (parallel [(const_int 0) (const_int 1)]))
11740 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11742 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11743 [(set_attr "type" "sselog")
11744 (set_attr "prefix" "vex")
11745 (set_attr "mode" "V8SF")])
11747 (define_insn "vec_set_lo_<mode>"
11748 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11749 (vec_concat:AVX256MODE8P
11750 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11751 (vec_select:<avxhalfvecmode>
11752 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11753 (parallel [(const_int 4) (const_int 5)
11754 (const_int 6) (const_int 7)]))))]
11756 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11757 [(set_attr "type" "sselog")
11758 (set_attr "prefix" "vex")
11759 (set_attr "mode" "V8SF")])
11761 (define_insn "vec_set_hi_<mode>"
11762 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11763 (vec_concat:AVX256MODE8P
11764 (vec_select:<avxhalfvecmode>
11765 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11766 (parallel [(const_int 0) (const_int 1)
11767 (const_int 2) (const_int 3)]))
11768 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11770 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11771 [(set_attr "type" "sselog")
11772 (set_attr "prefix" "vex")
11773 (set_attr "mode" "V8SF")])
11775 (define_insn "vec_set_lo_v16hi"
11776 [(set (match_operand:V16HI 0 "register_operand" "=x")
11778 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11780 (match_operand:V16HI 1 "register_operand" "x")
11781 (parallel [(const_int 8) (const_int 9)
11782 (const_int 10) (const_int 11)
11783 (const_int 12) (const_int 13)
11784 (const_int 14) (const_int 15)]))))]
11786 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11787 [(set_attr "type" "sselog")
11788 (set_attr "prefix" "vex")
11789 (set_attr "mode" "V8SF")])
11791 (define_insn "vec_set_hi_v16hi"
11792 [(set (match_operand:V16HI 0 "register_operand" "=x")
11795 (match_operand:V16HI 1 "register_operand" "x")
11796 (parallel [(const_int 0) (const_int 1)
11797 (const_int 2) (const_int 3)
11798 (const_int 4) (const_int 5)
11799 (const_int 6) (const_int 7)]))
11800 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11802 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11803 [(set_attr "type" "sselog")
11804 (set_attr "prefix" "vex")
11805 (set_attr "mode" "V8SF")])
11807 (define_insn "vec_set_lo_v32qi"
11808 [(set (match_operand:V32QI 0 "register_operand" "=x")
11810 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11812 (match_operand:V32QI 1 "register_operand" "x")
11813 (parallel [(const_int 16) (const_int 17)
11814 (const_int 18) (const_int 19)
11815 (const_int 20) (const_int 21)
11816 (const_int 22) (const_int 23)
11817 (const_int 24) (const_int 25)
11818 (const_int 26) (const_int 27)
11819 (const_int 28) (const_int 29)
11820 (const_int 30) (const_int 31)]))))]
11822 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11823 [(set_attr "type" "sselog")
11824 (set_attr "prefix" "vex")
11825 (set_attr "mode" "V8SF")])
11827 (define_insn "vec_set_hi_v32qi"
11828 [(set (match_operand:V32QI 0 "register_operand" "=x")
11831 (match_operand:V32QI 1 "register_operand" "x")
11832 (parallel [(const_int 0) (const_int 1)
11833 (const_int 2) (const_int 3)
11834 (const_int 4) (const_int 5)
11835 (const_int 6) (const_int 7)
11836 (const_int 8) (const_int 9)
11837 (const_int 10) (const_int 11)
11838 (const_int 12) (const_int 13)
11839 (const_int 14) (const_int 15)]))
11840 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11842 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11843 [(set_attr "type" "sselog")
11844 (set_attr "prefix" "vex")
11845 (set_attr "mode" "V8SF")])
11847 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11848 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11850 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11851 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11855 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11856 [(set_attr "type" "sselog1")
11857 (set_attr "prefix" "vex")
11858 (set_attr "mode" "<MODE>")])
11860 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11861 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11863 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11864 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11866 UNSPEC_MASKSTORE))]
11868 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11869 [(set_attr "type" "sselog1")
11870 (set_attr "prefix" "vex")
11871 (set_attr "mode" "<MODE>")])
11873 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11874 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11875 (unspec:AVX256MODE2P
11876 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
11880 switch (which_alternative)
11885 switch (get_attr_mode (insn))
11888 return "vmovaps\t{%1, %x0|%x0, %1}";
11890 return "vmovapd\t{%1, %x0|%x0, %1}";
11892 return "vmovdqa\t{%1, %x0|%x0, %1}";
11899 gcc_unreachable ();
11901 [(set_attr "type" "ssemov")
11902 (set_attr "prefix" "vex")
11903 (set_attr "mode" "<avxvecmode>")
11904 (set (attr "length")
11905 (if_then_else (eq_attr "alternative" "0")
11907 (const_string "*")))])
11909 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
11910 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
11911 (unspec:<avxhalfvecmode>
11912 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
11916 switch (which_alternative)
11921 switch (get_attr_mode (insn))
11924 return "vmovaps\t{%x1, %0|%0, %x1}";
11926 return "vmovapd\t{%x1, %0|%0, %x1}";
11928 return "vmovdqa\t{%x1, %0|%0, %x1}";
11935 gcc_unreachable ();
11937 [(set_attr "type" "ssemov")
11938 (set_attr "prefix" "vex")
11939 (set_attr "mode" "<avxvecmode>")
11940 (set (attr "length")
11941 (if_then_else (eq_attr "alternative" "0")
11943 (const_string "*")))])
11945 (define_expand "vec_init<mode>"
11946 [(match_operand:AVX256MODE 0 "register_operand" "")
11947 (match_operand 1 "" "")]
11950 ix86_expand_vector_init (false, operands[0], operands[1]);
11954 (define_insn "*vec_concat<mode>_avx"
11955 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11956 (vec_concat:AVX256MODE
11957 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11958 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11961 switch (which_alternative)
11964 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11966 switch (get_attr_mode (insn))
11969 return "vmovaps\t{%1, %x0|%x0, %1}";
11971 return "vmovapd\t{%1, %x0|%x0, %1}";
11973 return "vmovdqa\t{%1, %x0|%x0, %1}";
11976 gcc_unreachable ();
11979 [(set_attr "type" "sselog,ssemov")
11980 (set_attr "prefix" "vex")
11981 (set_attr "mode" "<avxvecmode>")])