1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
90 (define_c_enum "unspecv" [
100 ;; All vector modes including V?TImode, used in move patterns.
101 (define_mode_iterator V16
102 [(V32QI "TARGET_AVX") V16QI
103 (V16HI "TARGET_AVX") V8HI
104 (V8SI "TARGET_AVX") V4SI
105 (V4DI "TARGET_AVX") V2DI
106 (V2TI "TARGET_AVX") V1TI
107 (V8SF "TARGET_AVX") V4SF
108 (V4DF "TARGET_AVX") V2DF])
111 (define_mode_iterator V
112 [(V32QI "TARGET_AVX") V16QI
113 (V16HI "TARGET_AVX") V8HI
114 (V8SI "TARGET_AVX") V4SI
115 (V4DI "TARGET_AVX") V2DI
116 (V8SF "TARGET_AVX") V4SF
117 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
119 ;; All 128bit vector modes
120 (define_mode_iterator V_128
121 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
123 ;; All 256bit vector modes
124 (define_mode_iterator V_256
125 [V32QI V16HI V8SI V4DI V8SF V4DF])
127 ;; All vector float modes
128 (define_mode_iterator VF
129 [(V8SF "TARGET_AVX") V4SF
130 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
132 ;; All SFmode vector float modes
133 (define_mode_iterator VF1
134 [(V8SF "TARGET_AVX") V4SF])
136 ;; All DFmode vector float modes
137 (define_mode_iterator VF2
138 [(V4DF "TARGET_AVX") V2DF])
140 ;; All 128bit vector float modes
141 (define_mode_iterator VF_128
142 [V4SF (V2DF "TARGET_SSE2")])
144 ;; All 256bit vector float modes
145 (define_mode_iterator VF_256
148 ;; All vector integer modes
149 (define_mode_iterator VI
150 [(V32QI "TARGET_AVX") V16QI
151 (V16HI "TARGET_AVX") V8HI
152 (V8SI "TARGET_AVX") V4SI
153 (V4DI "TARGET_AVX") V2DI])
155 (define_mode_iterator VI_AVX2
156 [(V32QI "TARGET_AVX2") V16QI
157 (V16HI "TARGET_AVX2") V8HI
158 (V8SI "TARGET_AVX2") V4SI
159 (V4DI "TARGET_AVX2") V2DI])
161 ;; All QImode vector integer modes
162 (define_mode_iterator VI1
163 [(V32QI "TARGET_AVX") V16QI])
165 ;; All DImode vector integer modes
166 (define_mode_iterator VI8
167 [(V4DI "TARGET_AVX") V2DI])
169 (define_mode_iterator VI1_AVX2
170 [(V32QI "TARGET_AVX2") V16QI])
172 (define_mode_iterator VI2_AVX2
173 [(V16HI "TARGET_AVX2") V8HI])
175 (define_mode_iterator VI4_AVX2
176 [(V8SI "TARGET_AVX2") V4SI])
178 (define_mode_iterator VI8_AVX2
179 [(V4DI "TARGET_AVX2") V2DI])
181 ;; ??? We should probably use TImode instead.
182 (define_mode_iterator VIMAX_AVX2
183 [(V2TI "TARGET_AVX2") V1TI])
185 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
186 (define_mode_iterator SSESCALARMODE
187 [(V2TI "TARGET_AVX2") TI])
189 (define_mode_iterator VI12_AVX2
190 [(V32QI "TARGET_AVX2") V16QI
191 (V16HI "TARGET_AVX2") V8HI])
193 (define_mode_iterator VI24_AVX2
194 [(V16HI "TARGET_AVX2") V8HI
195 (V8SI "TARGET_AVX2") V4SI])
197 (define_mode_iterator VI124_AVX2
198 [(V32QI "TARGET_AVX2") V16QI
199 (V16HI "TARGET_AVX2") V8HI
200 (V8SI "TARGET_AVX2") V4SI])
202 (define_mode_iterator VI248_AVX2
203 [(V16HI "TARGET_AVX2") V8HI
204 (V8SI "TARGET_AVX2") V4SI
205 (V4DI "TARGET_AVX2") V2DI])
207 (define_mode_iterator VI48_AVX2
208 [(V8SI "TARGET_AVX2") V4SI
209 (V4DI "TARGET_AVX2") V2DI])
211 (define_mode_iterator V48_AVX2
214 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
215 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
217 (define_mode_attr sse2_avx2
218 [(V16QI "sse2") (V32QI "avx2")
219 (V8HI "sse2") (V16HI "avx2")
220 (V4SI "sse2") (V8SI "avx2")
221 (V2DI "sse2") (V4DI "avx2")
222 (V1TI "sse2") (V2TI "avx2")])
224 (define_mode_attr ssse3_avx2
225 [(V16QI "ssse3") (V32QI "avx2")
226 (V8HI "ssse3") (V16HI "avx2")
227 (V4SI "ssse3") (V8SI "avx2")
228 (V2DI "ssse3") (V4DI "avx2")
229 (TI "ssse3") (V2TI "avx2")])
231 (define_mode_attr sse4_1_avx2
232 [(V16QI "sse4_1") (V32QI "avx2")
233 (V8HI "sse4_1") (V16HI "avx2")
234 (V4SI "sse4_1") (V8SI "avx2")
235 (V2DI "sse4_1") (V4DI "avx2")])
237 (define_mode_attr avx_avx2
238 [(V4SF "avx") (V2DF "avx")
239 (V8SF "avx") (V4DF "avx")
240 (V4SI "avx2") (V2DI "avx2")
241 (V8SI "avx2") (V4DI "avx2")])
243 (define_mode_attr vec_avx2
244 [(V16QI "vec") (V32QI "avx2")
245 (V8HI "vec") (V16HI "avx2")
246 (V4SI "vec") (V8SI "avx2")
247 (V2DI "vec") (V4DI "avx2")])
249 (define_mode_attr ssedoublemode
250 [(V16HI "V16SI") (V8HI "V8SI")])
252 (define_mode_attr ssebytemode
253 [(V4DI "V32QI") (V2DI "V16QI")])
255 ;; All 128bit vector integer modes
256 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
258 ;; All 256bit vector integer modes
259 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
261 ;; Random 128bit vector integer mode combinations
262 (define_mode_iterator VI12_128 [V16QI V8HI])
263 (define_mode_iterator VI14_128 [V16QI V4SI])
264 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
265 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
266 (define_mode_iterator VI24_128 [V8HI V4SI])
267 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
268 (define_mode_iterator VI48_128 [V4SI V2DI])
270 ;; Random 256bit vector integer mode combinations
271 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
272 (define_mode_iterator VI48_256 [V8SI V4DI])
274 ;; Int-float size matches
275 (define_mode_iterator VI4F_128 [V4SI V4SF])
276 (define_mode_iterator VI8F_128 [V2DI V2DF])
277 (define_mode_iterator VI4F_256 [V8SI V8SF])
278 (define_mode_iterator VI8F_256 [V4DI V4DF])
280 ;; Mapping from float mode to required SSE level
281 (define_mode_attr sse
282 [(SF "sse") (DF "sse2")
283 (V4SF "sse") (V2DF "sse2")
284 (V8SF "avx") (V4DF "avx")])
286 (define_mode_attr sse2
287 [(V16QI "sse2") (V32QI "avx")
288 (V2DI "sse2") (V4DI "avx")])
290 (define_mode_attr sse3
291 [(V16QI "sse3") (V32QI "avx")])
293 (define_mode_attr sse4_1
294 [(V4SF "sse4_1") (V2DF "sse4_1")
295 (V8SF "avx") (V4DF "avx")])
297 (define_mode_attr avxsizesuffix
298 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
299 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
300 (V8SF "256") (V4DF "256")
301 (V4SF "") (V2DF "")])
303 ;; SSE instruction mode
304 (define_mode_attr sseinsnmode
305 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
306 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
307 (V8SF "V8SF") (V4DF "V4DF")
308 (V4SF "V4SF") (V2DF "V2DF")
311 ;; Mapping of vector float modes to an integer mode of the same size
312 (define_mode_attr sseintvecmode
313 [(V8SF "V8SI") (V4DF "V4DI")
314 (V4SF "V4SI") (V2DF "V2DI")
315 (V8SI "V8SI") (V4DI "V4DI")
316 (V4SI "V4SI") (V2DI "V2DI")
317 (V16HI "V16HI") (V8HI "V8HI")
318 (V32QI "V32QI") (V16QI "V16QI")])
320 (define_mode_attr sseintvecmodelower
321 [(V8SF "v8si") (V4DF "v4di")
322 (V4SF "v4si") (V2DF "v2di")
323 (V8SI "v8si") (V4DI "v4di")
324 (V4SI "v4si") (V2DI "v2di")
325 (V16HI "v16hi") (V8HI "v8hi")
326 (V32QI "v32qi") (V16QI "v16qi")])
328 ;; Mapping of vector modes to a vector mode of double size
329 (define_mode_attr ssedoublevecmode
330 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
331 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
332 (V8SF "V16SF") (V4DF "V8DF")
333 (V4SF "V8SF") (V2DF "V4DF")])
335 ;; Mapping of vector modes to a vector mode of half size
336 (define_mode_attr ssehalfvecmode
337 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
338 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
339 (V8SF "V4SF") (V4DF "V2DF")
342 ;; Mapping of vector modes back to the scalar modes
343 (define_mode_attr ssescalarmode
344 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
345 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
346 (V8SF "SF") (V4DF "DF")
347 (V4SF "SF") (V2DF "DF")])
349 ;; Number of scalar elements in each vector type
350 (define_mode_attr ssescalarnum
351 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
352 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
353 (V8SF "8") (V4DF "4")
354 (V4SF "4") (V2DF "2")])
356 ;; SSE prefix for integer vector modes
357 (define_mode_attr sseintprefix
358 [(V2DI "p") (V2DF "")
361 (V8SI "p") (V8SF "")])
363 ;; SSE scalar suffix for vector modes
364 (define_mode_attr ssescalarmodesuffix
366 (V8SF "ss") (V4DF "sd")
367 (V4SF "ss") (V2DF "sd")
368 (V8SI "ss") (V4DI "sd")
371 ;; Pack/unpack vector modes
372 (define_mode_attr sseunpackmode
373 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
374 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
376 (define_mode_attr ssepackmode
377 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
378 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
380 ;; Mapping of the max integer size for xop rotate immediate constraint
381 (define_mode_attr sserotatemax
382 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
384 ;; Mapping of mode to cast intrinsic name
385 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
387 ;; Instruction suffix for sign and zero extensions.
388 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
390 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
391 (define_mode_attr i128
392 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
393 (V8SI "%~128") (V4DI "%~128")])
396 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
398 ;; Mapping of immediate bits for blend instructions
399 (define_mode_attr blendbits
400 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
402 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
404 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410 ;; All of these patterns are enabled for SSE1 as well as SSE2.
411 ;; This is essential for maintaining stable calling conventions.
413 (define_expand "mov<mode>"
414 [(set (match_operand:V16 0 "nonimmediate_operand" "")
415 (match_operand:V16 1 "nonimmediate_operand" ""))]
418 ix86_expand_vector_move (<MODE>mode, operands);
422 (define_insn "*mov<mode>_internal"
423 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
424 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
426 && (register_operand (operands[0], <MODE>mode)
427 || register_operand (operands[1], <MODE>mode))"
429 switch (which_alternative)
432 return standard_sse_constant_opcode (insn, operands[1]);
435 switch (get_attr_mode (insn))
440 && (misaligned_operand (operands[0], <MODE>mode)
441 || misaligned_operand (operands[1], <MODE>mode)))
442 return "vmovups\t{%1, %0|%0, %1}";
444 return "%vmovaps\t{%1, %0|%0, %1}";
449 && (misaligned_operand (operands[0], <MODE>mode)
450 || misaligned_operand (operands[1], <MODE>mode)))
451 return "vmovupd\t{%1, %0|%0, %1}";
452 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
453 return "%vmovaps\t{%1, %0|%0, %1}";
455 return "%vmovapd\t{%1, %0|%0, %1}";
460 && (misaligned_operand (operands[0], <MODE>mode)
461 || misaligned_operand (operands[1], <MODE>mode)))
462 return "vmovdqu\t{%1, %0|%0, %1}";
463 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
464 return "%vmovaps\t{%1, %0|%0, %1}";
466 return "%vmovdqa\t{%1, %0|%0, %1}";
475 [(set_attr "type" "sselog1,ssemov,ssemov")
476 (set_attr "prefix" "maybe_vex")
478 (cond [(match_test "TARGET_AVX")
479 (const_string "<sseinsnmode>")
480 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
481 (not (match_test "TARGET_SSE2")))
482 (and (eq_attr "alternative" "2")
483 (match_test "TARGET_SSE_TYPELESS_STORES")))
484 (const_string "V4SF")
485 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
486 (const_string "V4SF")
487 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
488 (const_string "V2DF")
490 (const_string "TI")))])
492 (define_insn "sse2_movq128"
493 [(set (match_operand:V2DI 0 "register_operand" "=x")
496 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
497 (parallel [(const_int 0)]))
500 "%vmovq\t{%1, %0|%0, %1}"
501 [(set_attr "type" "ssemov")
502 (set_attr "prefix" "maybe_vex")
503 (set_attr "mode" "TI")])
505 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
506 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
507 ;; from memory, we'd prefer to load the memory directly into the %xmm
508 ;; register. To facilitate this happy circumstance, this pattern won't
509 ;; split until after register allocation. If the 64-bit value didn't
510 ;; come from memory, this is the best we can do. This is much better
511 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
514 (define_insn_and_split "movdi_to_sse"
516 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
517 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
518 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
519 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
521 "&& reload_completed"
524 if (register_operand (operands[1], DImode))
526 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
527 Assemble the 64-bit DImode value in an xmm register. */
528 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
529 gen_rtx_SUBREG (SImode, operands[1], 0)));
530 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
531 gen_rtx_SUBREG (SImode, operands[1], 4)));
532 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
535 else if (memory_operand (operands[1], DImode))
536 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
537 operands[1], const0_rtx));
543 [(set (match_operand:V4SF 0 "register_operand" "")
544 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
545 "TARGET_SSE && reload_completed"
548 (vec_duplicate:V4SF (match_dup 1))
552 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
553 operands[2] = CONST0_RTX (V4SFmode);
557 [(set (match_operand:V2DF 0 "register_operand" "")
558 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
559 "TARGET_SSE2 && reload_completed"
560 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
562 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
563 operands[2] = CONST0_RTX (DFmode);
566 (define_expand "push<mode>1"
567 [(match_operand:V16 0 "register_operand" "")]
570 ix86_expand_push (<MODE>mode, operands[0]);
574 (define_expand "movmisalign<mode>"
575 [(set (match_operand:V16 0 "nonimmediate_operand" "")
576 (match_operand:V16 1 "nonimmediate_operand" ""))]
579 ix86_expand_vector_move_misalign (<MODE>mode, operands);
583 (define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>"
584 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
586 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
588 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
589 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
590 [(set_attr "type" "ssemov")
591 (set_attr "movu" "1")
592 (set_attr "prefix" "maybe_vex")
593 (set_attr "mode" "<MODE>")])
595 (define_insn "<sse2>_movdqu<avxsizesuffix>"
596 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
597 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
599 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
600 "%vmovdqu\t{%1, %0|%0, %1}"
601 [(set_attr "type" "ssemov")
602 (set_attr "movu" "1")
603 (set (attr "prefix_data16")
605 (match_test "TARGET_AVX")
608 (set_attr "prefix" "maybe_vex")
609 (set_attr "mode" "<sseinsnmode>")])
611 (define_insn "<sse3>_lddqu<avxsizesuffix>"
612 [(set (match_operand:VI1 0 "register_operand" "=x")
613 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
616 "%vlddqu\t{%1, %0|%0, %1}"
617 [(set_attr "type" "ssemov")
618 (set_attr "movu" "1")
619 (set (attr "prefix_data16")
621 (match_test "TARGET_AVX")
624 (set (attr "prefix_rep")
626 (match_test "TARGET_AVX")
629 (set_attr "prefix" "maybe_vex")
630 (set_attr "mode" "<sseinsnmode>")])
632 (define_insn "sse2_movnti<mode>"
633 [(set (match_operand:SWI48 0 "memory_operand" "=m")
634 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
637 "movnti\t{%1, %0|%0, %1}"
638 [(set_attr "type" "ssemov")
639 (set_attr "prefix_data16" "0")
640 (set_attr "mode" "<MODE>")])
642 (define_insn "<sse>_movnt<mode>"
643 [(set (match_operand:VF 0 "memory_operand" "=m")
644 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
647 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
648 [(set_attr "type" "ssemov")
649 (set_attr "prefix" "maybe_vex")
650 (set_attr "mode" "<MODE>")])
652 (define_insn "<sse2>_movnt<mode>"
653 [(set (match_operand:VI8 0 "memory_operand" "=m")
654 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
657 "%vmovntdq\t{%1, %0|%0, %1}"
658 [(set_attr "type" "ssecvt")
659 (set (attr "prefix_data16")
661 (match_test "TARGET_AVX")
664 (set_attr "prefix" "maybe_vex")
665 (set_attr "mode" "<sseinsnmode>")])
667 ; Expand patterns for non-temporal stores. At the moment, only those
668 ; that directly map to insns are defined; it would be possible to
669 ; define patterns for other modes that would expand to several insns.
671 ;; Modes handled by storent patterns.
672 (define_mode_iterator STORENT_MODE
673 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
674 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
675 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
676 (V8SF "TARGET_AVX") V4SF
677 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
679 (define_expand "storent<mode>"
680 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
682 [(match_operand:STORENT_MODE 1 "register_operand" "")]
686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
688 ;; Parallel floating point arithmetic
690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
692 (define_expand "<code><mode>2"
693 [(set (match_operand:VF 0 "register_operand" "")
695 (match_operand:VF 1 "register_operand" "")))]
697 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
699 (define_insn_and_split "*absneg<mode>2"
700 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
701 (match_operator:VF 3 "absneg_operator"
702 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
703 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
706 "&& reload_completed"
709 enum rtx_code absneg_op;
715 if (MEM_P (operands[1]))
716 op1 = operands[2], op2 = operands[1];
718 op1 = operands[1], op2 = operands[2];
723 if (rtx_equal_p (operands[0], operands[1]))
729 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
730 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
731 t = gen_rtx_SET (VOIDmode, operands[0], t);
735 [(set_attr "isa" "noavx,noavx,avx,avx")])
737 (define_expand "<plusminus_insn><mode>3"
738 [(set (match_operand:VF 0 "register_operand" "")
740 (match_operand:VF 1 "nonimmediate_operand" "")
741 (match_operand:VF 2 "nonimmediate_operand" "")))]
743 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
745 (define_insn "*<plusminus_insn><mode>3"
746 [(set (match_operand:VF 0 "register_operand" "=x,x")
748 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
749 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
750 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
752 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
753 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
754 [(set_attr "isa" "noavx,avx")
755 (set_attr "type" "sseadd")
756 (set_attr "prefix" "orig,vex")
757 (set_attr "mode" "<MODE>")])
759 (define_insn "<sse>_vm<plusminus_insn><mode>3"
760 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
763 (match_operand:VF_128 1 "register_operand" "0,x")
764 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
769 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
770 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
771 [(set_attr "isa" "noavx,avx")
772 (set_attr "type" "sseadd")
773 (set_attr "prefix" "orig,vex")
774 (set_attr "mode" "<ssescalarmode>")])
776 (define_expand "mul<mode>3"
777 [(set (match_operand:VF 0 "register_operand" "")
779 (match_operand:VF 1 "nonimmediate_operand" "")
780 (match_operand:VF 2 "nonimmediate_operand" "")))]
782 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
784 (define_insn "*mul<mode>3"
785 [(set (match_operand:VF 0 "register_operand" "=x,x")
787 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
788 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
789 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
791 mul<ssemodesuffix>\t{%2, %0|%0, %2}
792 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
793 [(set_attr "isa" "noavx,avx")
794 (set_attr "type" "ssemul")
795 (set_attr "prefix" "orig,vex")
796 (set_attr "mode" "<MODE>")])
798 (define_insn "<sse>_vmmul<mode>3"
799 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
802 (match_operand:VF_128 1 "register_operand" "0,x")
803 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
808 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
809 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
810 [(set_attr "isa" "noavx,avx")
811 (set_attr "type" "ssemul")
812 (set_attr "prefix" "orig,vex")
813 (set_attr "mode" "<ssescalarmode>")])
815 (define_expand "div<mode>3"
816 [(set (match_operand:VF2 0 "register_operand" "")
817 (div:VF2 (match_operand:VF2 1 "register_operand" "")
818 (match_operand:VF2 2 "nonimmediate_operand" "")))]
820 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
822 (define_expand "div<mode>3"
823 [(set (match_operand:VF1 0 "register_operand" "")
824 (div:VF1 (match_operand:VF1 1 "register_operand" "")
825 (match_operand:VF1 2 "nonimmediate_operand" "")))]
828 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
831 && TARGET_RECIP_VEC_DIV
832 && !optimize_insn_for_size_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
841 (define_insn "<sse>_div<mode>3"
842 [(set (match_operand:VF 0 "register_operand" "=x,x")
844 (match_operand:VF 1 "register_operand" "0,x")
845 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
848 div<ssemodesuffix>\t{%2, %0|%0, %2}
849 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
850 [(set_attr "isa" "noavx,avx")
851 (set_attr "type" "ssediv")
852 (set_attr "prefix" "orig,vex")
853 (set_attr "mode" "<MODE>")])
855 (define_insn "<sse>_vmdiv<mode>3"
856 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
859 (match_operand:VF_128 1 "register_operand" "0,x")
860 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
865 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
866 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
867 [(set_attr "isa" "noavx,avx")
868 (set_attr "type" "ssediv")
869 (set_attr "prefix" "orig,vex")
870 (set_attr "mode" "<ssescalarmode>")])
872 (define_insn "<sse>_rcp<mode>2"
873 [(set (match_operand:VF1 0 "register_operand" "=x")
875 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
877 "%vrcpps\t{%1, %0|%0, %1}"
878 [(set_attr "type" "sse")
879 (set_attr "atom_sse_attr" "rcp")
880 (set_attr "prefix" "maybe_vex")
881 (set_attr "mode" "<MODE>")])
883 (define_insn "sse_vmrcpv4sf2"
884 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
886 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
888 (match_operand:V4SF 2 "register_operand" "0,x")
892 rcpss\t{%1, %0|%0, %1}
893 vrcpss\t{%1, %2, %0|%0, %2, %1}"
894 [(set_attr "isa" "noavx,avx")
895 (set_attr "type" "sse")
896 (set_attr "atom_sse_attr" "rcp")
897 (set_attr "prefix" "orig,vex")
898 (set_attr "mode" "SF")])
900 (define_expand "sqrt<mode>2"
901 [(set (match_operand:VF2 0 "register_operand" "")
902 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
905 (define_expand "sqrt<mode>2"
906 [(set (match_operand:VF1 0 "register_operand" "")
907 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
911 && TARGET_RECIP_VEC_SQRT
912 && !optimize_insn_for_size_p ()
913 && flag_finite_math_only && !flag_trapping_math
914 && flag_unsafe_math_optimizations)
916 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
921 (define_insn "<sse>_sqrt<mode>2"
922 [(set (match_operand:VF 0 "register_operand" "=x")
923 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
925 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
926 [(set_attr "type" "sse")
927 (set_attr "atom_sse_attr" "sqrt")
928 (set_attr "prefix" "maybe_vex")
929 (set_attr "mode" "<MODE>")])
931 (define_insn "<sse>_vmsqrt<mode>2"
932 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
935 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
936 (match_operand:VF_128 2 "register_operand" "0,x")
940 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
941 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
942 [(set_attr "isa" "noavx,avx")
943 (set_attr "type" "sse")
944 (set_attr "atom_sse_attr" "sqrt")
945 (set_attr "prefix" "orig,vex")
946 (set_attr "mode" "<ssescalarmode>")])
948 (define_expand "rsqrt<mode>2"
949 [(set (match_operand:VF1 0 "register_operand" "")
951 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
954 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
958 (define_insn "<sse>_rsqrt<mode>2"
959 [(set (match_operand:VF1 0 "register_operand" "=x")
961 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
963 "%vrsqrtps\t{%1, %0|%0, %1}"
964 [(set_attr "type" "sse")
965 (set_attr "prefix" "maybe_vex")
966 (set_attr "mode" "<MODE>")])
968 (define_insn "sse_vmrsqrtv4sf2"
969 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
971 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
973 (match_operand:V4SF 2 "register_operand" "0,x")
977 rsqrtss\t{%1, %0|%0, %1}
978 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
979 [(set_attr "isa" "noavx,avx")
980 (set_attr "type" "sse")
981 (set_attr "prefix" "orig,vex")
982 (set_attr "mode" "SF")])
984 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
985 ;; isn't really correct, as those rtl operators aren't defined when
986 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
988 (define_expand "<code><mode>3"
989 [(set (match_operand:VF 0 "register_operand" "")
991 (match_operand:VF 1 "nonimmediate_operand" "")
992 (match_operand:VF 2 "nonimmediate_operand" "")))]
995 if (!flag_finite_math_only)
996 operands[1] = force_reg (<MODE>mode, operands[1]);
997 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1000 (define_insn "*<code><mode>3_finite"
1001 [(set (match_operand:VF 0 "register_operand" "=x,x")
1003 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1004 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1005 "TARGET_SSE && flag_finite_math_only
1006 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1008 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1009 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1010 [(set_attr "isa" "noavx,avx")
1011 (set_attr "type" "sseadd")
1012 (set_attr "prefix" "orig,vex")
1013 (set_attr "mode" "<MODE>")])
1015 (define_insn "*<code><mode>3"
1016 [(set (match_operand:VF 0 "register_operand" "=x,x")
1018 (match_operand:VF 1 "register_operand" "0,x")
1019 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1020 "TARGET_SSE && !flag_finite_math_only"
1022 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1023 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1024 [(set_attr "isa" "noavx,avx")
1025 (set_attr "type" "sseadd")
1026 (set_attr "prefix" "orig,vex")
1027 (set_attr "mode" "<MODE>")])
1029 (define_insn "<sse>_vm<code><mode>3"
1030 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1033 (match_operand:VF_128 1 "register_operand" "0,x")
1034 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1039 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1040 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "isa" "noavx,avx")
1042 (set_attr "type" "sse")
1043 (set_attr "prefix" "orig,vex")
1044 (set_attr "mode" "<ssescalarmode>")])
1046 ;; These versions of the min/max patterns implement exactly the operations
1047 ;; min = (op1 < op2 ? op1 : op2)
1048 ;; max = (!(op1 < op2) ? op1 : op2)
1049 ;; Their operands are not commutative, and thus they may be used in the
1050 ;; presence of -0.0 and NaN.
1052 (define_insn "*ieee_smin<mode>3"
1053 [(set (match_operand:VF 0 "register_operand" "=x,x")
1055 [(match_operand:VF 1 "register_operand" "0,x")
1056 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1060 min<ssemodesuffix>\t{%2, %0|%0, %2}
1061 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1062 [(set_attr "isa" "noavx,avx")
1063 (set_attr "type" "sseadd")
1064 (set_attr "prefix" "orig,vex")
1065 (set_attr "mode" "<MODE>")])
1067 (define_insn "*ieee_smax<mode>3"
1068 [(set (match_operand:VF 0 "register_operand" "=x,x")
1070 [(match_operand:VF 1 "register_operand" "0,x")
1071 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1075 max<ssemodesuffix>\t{%2, %0|%0, %2}
1076 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1077 [(set_attr "isa" "noavx,avx")
1078 (set_attr "type" "sseadd")
1079 (set_attr "prefix" "orig,vex")
1080 (set_attr "mode" "<MODE>")])
1082 (define_insn "avx_addsubv4df3"
1083 [(set (match_operand:V4DF 0 "register_operand" "=x")
1086 (match_operand:V4DF 1 "register_operand" "x")
1087 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1088 (minus:V4DF (match_dup 1) (match_dup 2))
1091 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1092 [(set_attr "type" "sseadd")
1093 (set_attr "prefix" "vex")
1094 (set_attr "mode" "V4DF")])
1096 (define_insn "sse3_addsubv2df3"
1097 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1100 (match_operand:V2DF 1 "register_operand" "0,x")
1101 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1102 (minus:V2DF (match_dup 1) (match_dup 2))
1106 addsubpd\t{%2, %0|%0, %2}
1107 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "isa" "noavx,avx")
1109 (set_attr "type" "sseadd")
1110 (set_attr "atom_unit" "complex")
1111 (set_attr "prefix" "orig,vex")
1112 (set_attr "mode" "V2DF")])
1114 (define_insn "avx_addsubv8sf3"
1115 [(set (match_operand:V8SF 0 "register_operand" "=x")
1118 (match_operand:V8SF 1 "register_operand" "x")
1119 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1120 (minus:V8SF (match_dup 1) (match_dup 2))
1123 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1124 [(set_attr "type" "sseadd")
1125 (set_attr "prefix" "vex")
1126 (set_attr "mode" "V8SF")])
1128 (define_insn "sse3_addsubv4sf3"
1129 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1132 (match_operand:V4SF 1 "register_operand" "0,x")
1133 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1134 (minus:V4SF (match_dup 1) (match_dup 2))
1138 addsubps\t{%2, %0|%0, %2}
1139 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1140 [(set_attr "isa" "noavx,avx")
1141 (set_attr "type" "sseadd")
1142 (set_attr "prefix" "orig,vex")
1143 (set_attr "prefix_rep" "1,*")
1144 (set_attr "mode" "V4SF")])
1146 (define_insn "avx_h<plusminus_insn>v4df3"
1147 [(set (match_operand:V4DF 0 "register_operand" "=x")
1152 (match_operand:V4DF 1 "register_operand" "x")
1153 (parallel [(const_int 0)]))
1154 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1157 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1158 (parallel [(const_int 0)]))
1159 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1162 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1163 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1165 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1166 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1168 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1169 [(set_attr "type" "sseadd")
1170 (set_attr "prefix" "vex")
1171 (set_attr "mode" "V4DF")])
1173 (define_insn "sse3_h<plusminus_insn>v2df3"
1174 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1178 (match_operand:V2DF 1 "register_operand" "0,x")
1179 (parallel [(const_int 0)]))
1180 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1183 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1184 (parallel [(const_int 0)]))
1185 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1188 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1189 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1190 [(set_attr "isa" "noavx,avx")
1191 (set_attr "type" "sseadd")
1192 (set_attr "prefix" "orig,vex")
1193 (set_attr "mode" "V2DF")])
1195 (define_insn "avx_h<plusminus_insn>v8sf3"
1196 [(set (match_operand:V8SF 0 "register_operand" "=x")
1202 (match_operand:V8SF 1 "register_operand" "x")
1203 (parallel [(const_int 0)]))
1204 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1206 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1207 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1211 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1212 (parallel [(const_int 0)]))
1213 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1215 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1216 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1220 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1221 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1223 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1224 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1227 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1228 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1230 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1231 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1233 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1234 [(set_attr "type" "sseadd")
1235 (set_attr "prefix" "vex")
1236 (set_attr "mode" "V8SF")])
1238 (define_insn "sse3_h<plusminus_insn>v4sf3"
1239 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1244 (match_operand:V4SF 1 "register_operand" "0,x")
1245 (parallel [(const_int 0)]))
1246 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1248 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1249 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1253 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1254 (parallel [(const_int 0)]))
1255 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1257 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1258 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1261 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1262 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1263 [(set_attr "isa" "noavx,avx")
1264 (set_attr "type" "sseadd")
1265 (set_attr "atom_unit" "complex")
1266 (set_attr "prefix" "orig,vex")
1267 (set_attr "prefix_rep" "1,*")
1268 (set_attr "mode" "V4SF")])
1270 (define_expand "reduc_splus_v4df"
1271 [(match_operand:V4DF 0 "register_operand" "")
1272 (match_operand:V4DF 1 "register_operand" "")]
1275 rtx tmp = gen_reg_rtx (V4DFmode);
1276 rtx tmp2 = gen_reg_rtx (V4DFmode);
1277 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1278 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1279 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1283 (define_expand "reduc_splus_v2df"
1284 [(match_operand:V2DF 0 "register_operand" "")
1285 (match_operand:V2DF 1 "register_operand" "")]
1288 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1292 (define_expand "reduc_splus_v8sf"
1293 [(match_operand:V8SF 0 "register_operand" "")
1294 (match_operand:V8SF 1 "register_operand" "")]
1297 rtx tmp = gen_reg_rtx (V8SFmode);
1298 rtx tmp2 = gen_reg_rtx (V8SFmode);
1299 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1300 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1301 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1302 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1306 (define_expand "reduc_splus_v4sf"
1307 [(match_operand:V4SF 0 "register_operand" "")
1308 (match_operand:V4SF 1 "register_operand" "")]
1313 rtx tmp = gen_reg_rtx (V4SFmode);
1314 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1315 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1318 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1322 ;; Modes handled by reduc_sm{in,ax}* patterns.
1323 (define_mode_iterator REDUC_SMINMAX_MODE
1324 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1325 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1326 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1327 (V4SF "TARGET_SSE")])
1329 (define_expand "reduc_<code>_<mode>"
1330 [(smaxmin:REDUC_SMINMAX_MODE
1331 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1332 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1335 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1339 (define_expand "reduc_<code>_<mode>"
1341 (match_operand:VI_256 0 "register_operand" "")
1342 (match_operand:VI_256 1 "register_operand" ""))]
1345 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1349 (define_expand "reduc_umin_v8hi"
1351 (match_operand:V8HI 0 "register_operand" "")
1352 (match_operand:V8HI 1 "register_operand" ""))]
1355 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1361 ;; Parallel floating point comparisons
1363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1365 (define_insn "avx_cmp<mode>3"
1366 [(set (match_operand:VF 0 "register_operand" "=x")
1368 [(match_operand:VF 1 "register_operand" "x")
1369 (match_operand:VF 2 "nonimmediate_operand" "xm")
1370 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1373 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1374 [(set_attr "type" "ssecmp")
1375 (set_attr "length_immediate" "1")
1376 (set_attr "prefix" "vex")
1377 (set_attr "mode" "<MODE>")])
1379 (define_insn "avx_vmcmp<mode>3"
1380 [(set (match_operand:VF_128 0 "register_operand" "=x")
1383 [(match_operand:VF_128 1 "register_operand" "x")
1384 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1385 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1390 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1391 [(set_attr "type" "ssecmp")
1392 (set_attr "length_immediate" "1")
1393 (set_attr "prefix" "vex")
1394 (set_attr "mode" "<ssescalarmode>")])
1396 (define_insn "*<sse>_maskcmp<mode>3_comm"
1397 [(set (match_operand:VF 0 "register_operand" "=x,x")
1398 (match_operator:VF 3 "sse_comparison_operator"
1399 [(match_operand:VF 1 "register_operand" "%0,x")
1400 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1402 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1404 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1405 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1406 [(set_attr "isa" "noavx,avx")
1407 (set_attr "type" "ssecmp")
1408 (set_attr "length_immediate" "1")
1409 (set_attr "prefix" "orig,vex")
1410 (set_attr "mode" "<MODE>")])
1412 (define_insn "<sse>_maskcmp<mode>3"
1413 [(set (match_operand:VF 0 "register_operand" "=x,x")
1414 (match_operator:VF 3 "sse_comparison_operator"
1415 [(match_operand:VF 1 "register_operand" "0,x")
1416 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1419 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1420 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1421 [(set_attr "isa" "noavx,avx")
1422 (set_attr "type" "ssecmp")
1423 (set_attr "length_immediate" "1")
1424 (set_attr "prefix" "orig,vex")
1425 (set_attr "mode" "<MODE>")])
1427 (define_insn "<sse>_vmmaskcmp<mode>3"
1428 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1430 (match_operator:VF_128 3 "sse_comparison_operator"
1431 [(match_operand:VF_128 1 "register_operand" "0,x")
1432 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1437 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1438 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1439 [(set_attr "isa" "noavx,avx")
1440 (set_attr "type" "ssecmp")
1441 (set_attr "length_immediate" "1,*")
1442 (set_attr "prefix" "orig,vex")
1443 (set_attr "mode" "<ssescalarmode>")])
1445 (define_insn "<sse>_comi"
1446 [(set (reg:CCFP FLAGS_REG)
1449 (match_operand:<ssevecmode> 0 "register_operand" "x")
1450 (parallel [(const_int 0)]))
1452 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1453 (parallel [(const_int 0)]))))]
1454 "SSE_FLOAT_MODE_P (<MODE>mode)"
1455 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1456 [(set_attr "type" "ssecomi")
1457 (set_attr "prefix" "maybe_vex")
1458 (set_attr "prefix_rep" "0")
1459 (set (attr "prefix_data16")
1460 (if_then_else (eq_attr "mode" "DF")
1462 (const_string "0")))
1463 (set_attr "mode" "<MODE>")])
1465 (define_insn "<sse>_ucomi"
1466 [(set (reg:CCFPU FLAGS_REG)
1469 (match_operand:<ssevecmode> 0 "register_operand" "x")
1470 (parallel [(const_int 0)]))
1472 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1473 (parallel [(const_int 0)]))))]
1474 "SSE_FLOAT_MODE_P (<MODE>mode)"
1475 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1476 [(set_attr "type" "ssecomi")
1477 (set_attr "prefix" "maybe_vex")
1478 (set_attr "prefix_rep" "0")
1479 (set (attr "prefix_data16")
1480 (if_then_else (eq_attr "mode" "DF")
1482 (const_string "0")))
1483 (set_attr "mode" "<MODE>")])
1485 (define_expand "vcond<V_256:mode><VF_256:mode>"
1486 [(set (match_operand:V_256 0 "register_operand" "")
1488 (match_operator 3 ""
1489 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1490 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1491 (match_operand:V_256 1 "general_operand" "")
1492 (match_operand:V_256 2 "general_operand" "")))]
1494 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1495 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1497 bool ok = ix86_expand_fp_vcond (operands);
1502 (define_expand "vcond<V_128:mode><VF_128:mode>"
1503 [(set (match_operand:V_128 0 "register_operand" "")
1505 (match_operator 3 ""
1506 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1507 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1508 (match_operand:V_128 1 "general_operand" "")
1509 (match_operand:V_128 2 "general_operand" "")))]
1511 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1512 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1514 bool ok = ix86_expand_fp_vcond (operands);
1519 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1521 ;; Parallel floating point logical operations
1523 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1525 (define_insn "<sse>_andnot<mode>3"
1526 [(set (match_operand:VF 0 "register_operand" "=x,x")
1529 (match_operand:VF 1 "register_operand" "0,x"))
1530 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1533 static char buf[32];
1536 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1538 switch (which_alternative)
1541 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1544 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1550 snprintf (buf, sizeof (buf), insn, suffix);
1553 [(set_attr "isa" "noavx,avx")
1554 (set_attr "type" "sselog")
1555 (set_attr "prefix" "orig,vex")
1556 (set_attr "mode" "<MODE>")])
1558 (define_expand "<code><mode>3"
1559 [(set (match_operand:VF 0 "register_operand" "")
1561 (match_operand:VF 1 "nonimmediate_operand" "")
1562 (match_operand:VF 2 "nonimmediate_operand" "")))]
1564 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1566 (define_insn "*<code><mode>3"
1567 [(set (match_operand:VF 0 "register_operand" "=x,x")
1569 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1570 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1571 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1573 static char buf[32];
1576 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1578 switch (which_alternative)
1581 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1584 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1590 snprintf (buf, sizeof (buf), insn, suffix);
1593 [(set_attr "isa" "noavx,avx")
1594 (set_attr "type" "sselog")
1595 (set_attr "prefix" "orig,vex")
1596 (set_attr "mode" "<MODE>")])
1598 (define_expand "copysign<mode>3"
1601 (not:VF (match_dup 3))
1602 (match_operand:VF 1 "nonimmediate_operand" "")))
1604 (and:VF (match_dup 3)
1605 (match_operand:VF 2 "nonimmediate_operand" "")))
1606 (set (match_operand:VF 0 "register_operand" "")
1607 (ior:VF (match_dup 4) (match_dup 5)))]
1610 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1612 operands[4] = gen_reg_rtx (<MODE>mode);
1613 operands[5] = gen_reg_rtx (<MODE>mode);
1616 ;; Also define scalar versions. These are used for abs, neg, and
1617 ;; conditional move. Using subregs into vector modes causes register
1618 ;; allocation lossage. These patterns do not allow memory operands
1619 ;; because the native instructions read the full 128-bits.
1621 (define_insn "*andnot<mode>3"
1622 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1625 (match_operand:MODEF 1 "register_operand" "0,x"))
1626 (match_operand:MODEF 2 "register_operand" "x,x")))]
1627 "SSE_FLOAT_MODE_P (<MODE>mode)"
1629 static char buf[32];
1632 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1634 switch (which_alternative)
1637 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1640 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1646 snprintf (buf, sizeof (buf), insn, suffix);
1649 [(set_attr "isa" "noavx,avx")
1650 (set_attr "type" "sselog")
1651 (set_attr "prefix" "orig,vex")
1652 (set_attr "mode" "<ssevecmode>")])
1654 (define_insn "*<code><mode>3"
1655 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1657 (match_operand:MODEF 1 "register_operand" "%0,x")
1658 (match_operand:MODEF 2 "register_operand" "x,x")))]
1659 "SSE_FLOAT_MODE_P (<MODE>mode)"
1661 static char buf[32];
1664 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1666 switch (which_alternative)
1669 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1672 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1678 snprintf (buf, sizeof (buf), insn, suffix);
1681 [(set_attr "isa" "noavx,avx")
1682 (set_attr "type" "sselog")
1683 (set_attr "prefix" "orig,vex")
1684 (set_attr "mode" "<ssevecmode>")])
1686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1688 ;; FMA floating point multiply/accumulate instructions. These include
1689 ;; scalar versions of the instructions as well as vector versions.
1691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1693 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
1695 ;; The standard names for fma is only available with SSE math enabled.
1696 (define_expand "fma<mode>4"
1697 [(set (match_operand:FMAMODE 0 "register_operand")
1699 (match_operand:FMAMODE 1 "nonimmediate_operand")
1700 (match_operand:FMAMODE 2 "nonimmediate_operand")
1701 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1702 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1704 (define_expand "fms<mode>4"
1705 [(set (match_operand:FMAMODE 0 "register_operand")
1707 (match_operand:FMAMODE 1 "nonimmediate_operand")
1708 (match_operand:FMAMODE 2 "nonimmediate_operand")
1709 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1710 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1712 (define_expand "fnma<mode>4"
1713 [(set (match_operand:FMAMODE 0 "register_operand")
1715 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1716 (match_operand:FMAMODE 2 "nonimmediate_operand")
1717 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1718 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1720 (define_expand "fnms<mode>4"
1721 [(set (match_operand:FMAMODE 0 "register_operand")
1723 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1724 (match_operand:FMAMODE 2 "nonimmediate_operand")
1725 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1726 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1728 ;; The builtin for intrinsics is not constrained by SSE math enabled.
1729 (define_expand "fma4i_fmadd_<mode>"
1730 [(set (match_operand:FMAMODE 0 "register_operand")
1732 (match_operand:FMAMODE 1 "nonimmediate_operand")
1733 (match_operand:FMAMODE 2 "nonimmediate_operand")
1734 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1735 "TARGET_FMA || TARGET_FMA4")
1737 (define_insn "*fma_fmadd_<mode>"
1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1740 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1741 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1742 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1743 "TARGET_FMA || TARGET_FMA4"
1745 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1746 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1747 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1748 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1749 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1750 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1751 (set_attr "type" "ssemuladd")
1752 (set_attr "mode" "<MODE>")])
1754 (define_insn "*fma_fmsub_<mode>"
1755 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1757 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1758 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1760 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1761 "TARGET_FMA || TARGET_FMA4"
1763 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1764 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1765 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1766 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1767 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1768 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1769 (set_attr "type" "ssemuladd")
1770 (set_attr "mode" "<MODE>")])
1772 (define_insn "*fma_fnmadd_<mode>"
1773 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1776 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1777 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1778 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1779 "TARGET_FMA || TARGET_FMA4"
1781 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1782 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1783 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1784 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1785 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1786 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1787 (set_attr "type" "ssemuladd")
1788 (set_attr "mode" "<MODE>")])
1790 (define_insn "*fma_fnmsub_<mode>"
1791 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1794 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1795 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1797 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1798 "TARGET_FMA || TARGET_FMA4"
1800 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1801 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1802 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1803 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1804 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1805 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1806 (set_attr "type" "ssemuladd")
1807 (set_attr "mode" "<MODE>")])
1809 ;; FMA parallel floating point multiply addsub and subadd operations.
1811 ;; It would be possible to represent these without the UNSPEC as
1814 ;; (fma op1 op2 op3)
1815 ;; (fma op1 op2 (neg op3))
1818 ;; But this doesn't seem useful in practice.
1820 (define_expand "fmaddsub_<mode>"
1821 [(set (match_operand:VF 0 "register_operand")
1823 [(match_operand:VF 1 "nonimmediate_operand")
1824 (match_operand:VF 2 "nonimmediate_operand")
1825 (match_operand:VF 3 "nonimmediate_operand")]
1827 "TARGET_FMA || TARGET_FMA4")
1829 (define_insn "*fma_fmaddsub_<mode>"
1830 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1832 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1833 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1834 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
1836 "TARGET_FMA || TARGET_FMA4"
1838 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1839 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1840 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1841 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1842 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1843 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1844 (set_attr "type" "ssemuladd")
1845 (set_attr "mode" "<MODE>")])
1847 (define_insn "*fma_fmsubadd_<mode>"
1848 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1850 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1851 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1853 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
1855 "TARGET_FMA || TARGET_FMA4"
1857 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1858 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1859 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1860 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1861 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1862 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1863 (set_attr "type" "ssemuladd")
1864 (set_attr "mode" "<MODE>")])
1866 ;; FMA3 floating point scalar intrinsics. These merge result with
1867 ;; high-order elements from the destination register.
1869 (define_expand "fmai_vmfmadd_<mode>"
1870 [(set (match_operand:VF_128 0 "register_operand")
1873 (match_operand:VF_128 1 "nonimmediate_operand")
1874 (match_operand:VF_128 2 "nonimmediate_operand")
1875 (match_operand:VF_128 3 "nonimmediate_operand"))
1880 (define_insn "*fmai_fmadd_<mode>"
1881 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1884 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1885 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1886 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1891 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1892 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1893 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1894 [(set_attr "type" "ssemuladd")
1895 (set_attr "mode" "<MODE>")])
1897 (define_insn "*fmai_fmsub_<mode>"
1898 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1901 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1902 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1904 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1909 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1910 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1911 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1912 [(set_attr "type" "ssemuladd")
1913 (set_attr "mode" "<MODE>")])
1915 (define_insn "*fmai_fnmadd_<mode>"
1916 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1920 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1921 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1922 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1927 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1928 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1929 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1930 [(set_attr "type" "ssemuladd")
1931 (set_attr "mode" "<MODE>")])
1933 (define_insn "*fmai_fnmsub_<mode>"
1934 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1938 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1939 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1941 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1946 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1947 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1948 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1949 [(set_attr "type" "ssemuladd")
1950 (set_attr "mode" "<MODE>")])
1952 ;; FMA4 floating point scalar intrinsics. These write the
1953 ;; entire destination register, with the high-order elements zeroed.
1955 (define_expand "fma4i_vmfmadd_<mode>"
1956 [(set (match_operand:VF_128 0 "register_operand")
1959 (match_operand:VF_128 1 "nonimmediate_operand")
1960 (match_operand:VF_128 2 "nonimmediate_operand")
1961 (match_operand:VF_128 3 "nonimmediate_operand"))
1966 operands[4] = CONST0_RTX (<MODE>mode);
1969 (define_insn "*fma4i_vmfmadd_<mode>"
1970 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1973 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1974 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1975 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1976 (match_operand:VF_128 4 "const0_operand" "")
1979 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1980 [(set_attr "type" "ssemuladd")
1981 (set_attr "mode" "<MODE>")])
1983 (define_insn "*fma4i_vmfmsub_<mode>"
1984 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1987 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1988 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1990 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1991 (match_operand:VF_128 4 "const0_operand" "")
1994 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1995 [(set_attr "type" "ssemuladd")
1996 (set_attr "mode" "<MODE>")])
1998 (define_insn "*fma4i_vmfnmadd_<mode>"
1999 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2003 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2004 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2005 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2006 (match_operand:VF_128 4 "const0_operand" "")
2009 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2013 (define_insn "*fma4i_vmfnmsub_<mode>"
2014 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2018 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2019 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2021 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2022 (match_operand:VF_128 4 "const0_operand" "")
2025 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2026 [(set_attr "type" "ssemuladd")
2027 (set_attr "mode" "<MODE>")])
2029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2031 ;; Parallel single-precision floating point conversion operations
2033 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2035 (define_insn "sse_cvtpi2ps"
2036 [(set (match_operand:V4SF 0 "register_operand" "=x")
2039 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2040 (match_operand:V4SF 1 "register_operand" "0")
2043 "cvtpi2ps\t{%2, %0|%0, %2}"
2044 [(set_attr "type" "ssecvt")
2045 (set_attr "mode" "V4SF")])
2047 (define_insn "sse_cvtps2pi"
2048 [(set (match_operand:V2SI 0 "register_operand" "=y")
2050 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2052 (parallel [(const_int 0) (const_int 1)])))]
2054 "cvtps2pi\t{%1, %0|%0, %1}"
2055 [(set_attr "type" "ssecvt")
2056 (set_attr "unit" "mmx")
2057 (set_attr "mode" "DI")])
2059 (define_insn "sse_cvttps2pi"
2060 [(set (match_operand:V2SI 0 "register_operand" "=y")
2062 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2063 (parallel [(const_int 0) (const_int 1)])))]
2065 "cvttps2pi\t{%1, %0|%0, %1}"
2066 [(set_attr "type" "ssecvt")
2067 (set_attr "unit" "mmx")
2068 (set_attr "prefix_rep" "0")
2069 (set_attr "mode" "SF")])
2071 (define_insn "sse_cvtsi2ss"
2072 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2075 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2076 (match_operand:V4SF 1 "register_operand" "0,0,x")
2080 cvtsi2ss\t{%2, %0|%0, %2}
2081 cvtsi2ss\t{%2, %0|%0, %2}
2082 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2083 [(set_attr "isa" "noavx,noavx,avx")
2084 (set_attr "type" "sseicvt")
2085 (set_attr "athlon_decode" "vector,double,*")
2086 (set_attr "amdfam10_decode" "vector,double,*")
2087 (set_attr "bdver1_decode" "double,direct,*")
2088 (set_attr "prefix" "orig,orig,vex")
2089 (set_attr "mode" "SF")])
2091 (define_insn "sse_cvtsi2ssq"
2092 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2095 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2096 (match_operand:V4SF 1 "register_operand" "0,0,x")
2098 "TARGET_SSE && TARGET_64BIT"
2100 cvtsi2ssq\t{%2, %0|%0, %2}
2101 cvtsi2ssq\t{%2, %0|%0, %2}
2102 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2103 [(set_attr "isa" "noavx,noavx,avx")
2104 (set_attr "type" "sseicvt")
2105 (set_attr "athlon_decode" "vector,double,*")
2106 (set_attr "amdfam10_decode" "vector,double,*")
2107 (set_attr "bdver1_decode" "double,direct,*")
2108 (set_attr "length_vex" "*,*,4")
2109 (set_attr "prefix_rex" "1,1,*")
2110 (set_attr "prefix" "orig,orig,vex")
2111 (set_attr "mode" "SF")])
2113 (define_insn "sse_cvtss2si"
2114 [(set (match_operand:SI 0 "register_operand" "=r,r")
2117 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2118 (parallel [(const_int 0)]))]
2119 UNSPEC_FIX_NOTRUNC))]
2121 "%vcvtss2si\t{%1, %0|%0, %1}"
2122 [(set_attr "type" "sseicvt")
2123 (set_attr "athlon_decode" "double,vector")
2124 (set_attr "bdver1_decode" "double,double")
2125 (set_attr "prefix_rep" "1")
2126 (set_attr "prefix" "maybe_vex")
2127 (set_attr "mode" "SI")])
2129 (define_insn "sse_cvtss2si_2"
2130 [(set (match_operand:SI 0 "register_operand" "=r,r")
2131 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2132 UNSPEC_FIX_NOTRUNC))]
2134 "%vcvtss2si\t{%1, %0|%0, %1}"
2135 [(set_attr "type" "sseicvt")
2136 (set_attr "athlon_decode" "double,vector")
2137 (set_attr "amdfam10_decode" "double,double")
2138 (set_attr "bdver1_decode" "double,double")
2139 (set_attr "prefix_rep" "1")
2140 (set_attr "prefix" "maybe_vex")
2141 (set_attr "mode" "SI")])
2143 (define_insn "sse_cvtss2siq"
2144 [(set (match_operand:DI 0 "register_operand" "=r,r")
2147 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2148 (parallel [(const_int 0)]))]
2149 UNSPEC_FIX_NOTRUNC))]
2150 "TARGET_SSE && TARGET_64BIT"
2151 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2152 [(set_attr "type" "sseicvt")
2153 (set_attr "athlon_decode" "double,vector")
2154 (set_attr "bdver1_decode" "double,double")
2155 (set_attr "prefix_rep" "1")
2156 (set_attr "prefix" "maybe_vex")
2157 (set_attr "mode" "DI")])
2159 (define_insn "sse_cvtss2siq_2"
2160 [(set (match_operand:DI 0 "register_operand" "=r,r")
2161 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2162 UNSPEC_FIX_NOTRUNC))]
2163 "TARGET_SSE && TARGET_64BIT"
2164 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2165 [(set_attr "type" "sseicvt")
2166 (set_attr "athlon_decode" "double,vector")
2167 (set_attr "amdfam10_decode" "double,double")
2168 (set_attr "bdver1_decode" "double,double")
2169 (set_attr "prefix_rep" "1")
2170 (set_attr "prefix" "maybe_vex")
2171 (set_attr "mode" "DI")])
2173 (define_insn "sse_cvttss2si"
2174 [(set (match_operand:SI 0 "register_operand" "=r,r")
2177 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2178 (parallel [(const_int 0)]))))]
2180 "%vcvttss2si\t{%1, %0|%0, %1}"
2181 [(set_attr "type" "sseicvt")
2182 (set_attr "athlon_decode" "double,vector")
2183 (set_attr "amdfam10_decode" "double,double")
2184 (set_attr "bdver1_decode" "double,double")
2185 (set_attr "prefix_rep" "1")
2186 (set_attr "prefix" "maybe_vex")
2187 (set_attr "mode" "SI")])
2189 (define_insn "sse_cvttss2siq"
2190 [(set (match_operand:DI 0 "register_operand" "=r,r")
2193 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2194 (parallel [(const_int 0)]))))]
2195 "TARGET_SSE && TARGET_64BIT"
2196 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2197 [(set_attr "type" "sseicvt")
2198 (set_attr "athlon_decode" "double,vector")
2199 (set_attr "amdfam10_decode" "double,double")
2200 (set_attr "bdver1_decode" "double,double")
2201 (set_attr "prefix_rep" "1")
2202 (set_attr "prefix" "maybe_vex")
2203 (set_attr "mode" "DI")])
2205 (define_insn "float<sseintvecmodelower><mode>2"
2206 [(set (match_operand:VF1 0 "register_operand" "=x")
2208 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2210 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2211 [(set_attr "type" "ssecvt")
2212 (set_attr "prefix" "maybe_vex")
2213 (set_attr "mode" "<sseinsnmode>")])
2215 (define_expand "floatuns<sseintvecmodelower><mode>2"
2216 [(match_operand:VF1 0 "register_operand" "")
2217 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2218 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2220 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2224 (define_insn "avx_cvtps2dq256"
2225 [(set (match_operand:V8SI 0 "register_operand" "=x")
2226 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2227 UNSPEC_FIX_NOTRUNC))]
2229 "vcvtps2dq\t{%1, %0|%0, %1}"
2230 [(set_attr "type" "ssecvt")
2231 (set_attr "prefix" "vex")
2232 (set_attr "mode" "OI")])
2234 (define_insn "sse2_cvtps2dq"
2235 [(set (match_operand:V4SI 0 "register_operand" "=x")
2236 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2237 UNSPEC_FIX_NOTRUNC))]
2239 "%vcvtps2dq\t{%1, %0|%0, %1}"
2240 [(set_attr "type" "ssecvt")
2241 (set (attr "prefix_data16")
2243 (match_test "TARGET_AVX")
2245 (const_string "1")))
2246 (set_attr "prefix" "maybe_vex")
2247 (set_attr "mode" "TI")])
2249 (define_insn "fix_truncv8sfv8si2"
2250 [(set (match_operand:V8SI 0 "register_operand" "=x")
2251 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2253 "vcvttps2dq\t{%1, %0|%0, %1}"
2254 [(set_attr "type" "ssecvt")
2255 (set_attr "prefix" "vex")
2256 (set_attr "mode" "OI")])
2258 (define_insn "fix_truncv4sfv4si2"
2259 [(set (match_operand:V4SI 0 "register_operand" "=x")
2260 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2262 "%vcvttps2dq\t{%1, %0|%0, %1}"
2263 [(set_attr "type" "ssecvt")
2264 (set (attr "prefix_rep")
2266 (match_test "TARGET_AVX")
2268 (const_string "1")))
2269 (set (attr "prefix_data16")
2271 (match_test "TARGET_AVX")
2273 (const_string "0")))
2274 (set_attr "prefix_data16" "0")
2275 (set_attr "prefix" "maybe_vex")
2276 (set_attr "mode" "TI")])
2278 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2279 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2280 (match_operand:VF1 1 "register_operand" "")]
2284 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2285 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2286 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2287 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2291 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2293 ;; Parallel double-precision floating point conversion operations
2295 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2297 (define_insn "sse2_cvtpi2pd"
2298 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2299 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2301 "cvtpi2pd\t{%1, %0|%0, %1}"
2302 [(set_attr "type" "ssecvt")
2303 (set_attr "unit" "mmx,*")
2304 (set_attr "prefix_data16" "1,*")
2305 (set_attr "mode" "V2DF")])
2307 (define_insn "sse2_cvtpd2pi"
2308 [(set (match_operand:V2SI 0 "register_operand" "=y")
2309 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2310 UNSPEC_FIX_NOTRUNC))]
2312 "cvtpd2pi\t{%1, %0|%0, %1}"
2313 [(set_attr "type" "ssecvt")
2314 (set_attr "unit" "mmx")
2315 (set_attr "bdver1_decode" "double")
2316 (set_attr "prefix_data16" "1")
2317 (set_attr "mode" "DI")])
2319 (define_insn "sse2_cvttpd2pi"
2320 [(set (match_operand:V2SI 0 "register_operand" "=y")
2321 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2323 "cvttpd2pi\t{%1, %0|%0, %1}"
2324 [(set_attr "type" "ssecvt")
2325 (set_attr "unit" "mmx")
2326 (set_attr "bdver1_decode" "double")
2327 (set_attr "prefix_data16" "1")
2328 (set_attr "mode" "TI")])
2330 (define_insn "sse2_cvtsi2sd"
2331 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2334 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2335 (match_operand:V2DF 1 "register_operand" "0,0,x")
2339 cvtsi2sd\t{%2, %0|%0, %2}
2340 cvtsi2sd\t{%2, %0|%0, %2}
2341 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2342 [(set_attr "isa" "noavx,noavx,avx")
2343 (set_attr "type" "sseicvt")
2344 (set_attr "athlon_decode" "double,direct,*")
2345 (set_attr "amdfam10_decode" "vector,double,*")
2346 (set_attr "bdver1_decode" "double,direct,*")
2347 (set_attr "prefix" "orig,orig,vex")
2348 (set_attr "mode" "DF")])
2350 (define_insn "sse2_cvtsi2sdq"
2351 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2354 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2355 (match_operand:V2DF 1 "register_operand" "0,0,x")
2357 "TARGET_SSE2 && TARGET_64BIT"
2359 cvtsi2sdq\t{%2, %0|%0, %2}
2360 cvtsi2sdq\t{%2, %0|%0, %2}
2361 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2362 [(set_attr "isa" "noavx,noavx,avx")
2363 (set_attr "type" "sseicvt")
2364 (set_attr "athlon_decode" "double,direct,*")
2365 (set_attr "amdfam10_decode" "vector,double,*")
2366 (set_attr "bdver1_decode" "double,direct,*")
2367 (set_attr "length_vex" "*,*,4")
2368 (set_attr "prefix_rex" "1,1,*")
2369 (set_attr "prefix" "orig,orig,vex")
2370 (set_attr "mode" "DF")])
2372 (define_insn "sse2_cvtsd2si"
2373 [(set (match_operand:SI 0 "register_operand" "=r,r")
2376 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2377 (parallel [(const_int 0)]))]
2378 UNSPEC_FIX_NOTRUNC))]
2380 "%vcvtsd2si\t{%1, %0|%0, %1}"
2381 [(set_attr "type" "sseicvt")
2382 (set_attr "athlon_decode" "double,vector")
2383 (set_attr "bdver1_decode" "double,double")
2384 (set_attr "prefix_rep" "1")
2385 (set_attr "prefix" "maybe_vex")
2386 (set_attr "mode" "SI")])
2388 (define_insn "sse2_cvtsd2si_2"
2389 [(set (match_operand:SI 0 "register_operand" "=r,r")
2390 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2391 UNSPEC_FIX_NOTRUNC))]
2393 "%vcvtsd2si\t{%1, %0|%0, %1}"
2394 [(set_attr "type" "sseicvt")
2395 (set_attr "athlon_decode" "double,vector")
2396 (set_attr "amdfam10_decode" "double,double")
2397 (set_attr "bdver1_decode" "double,double")
2398 (set_attr "prefix_rep" "1")
2399 (set_attr "prefix" "maybe_vex")
2400 (set_attr "mode" "SI")])
2402 (define_insn "sse2_cvtsd2siq"
2403 [(set (match_operand:DI 0 "register_operand" "=r,r")
2406 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2407 (parallel [(const_int 0)]))]
2408 UNSPEC_FIX_NOTRUNC))]
2409 "TARGET_SSE2 && TARGET_64BIT"
2410 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2411 [(set_attr "type" "sseicvt")
2412 (set_attr "athlon_decode" "double,vector")
2413 (set_attr "bdver1_decode" "double,double")
2414 (set_attr "prefix_rep" "1")
2415 (set_attr "prefix" "maybe_vex")
2416 (set_attr "mode" "DI")])
2418 (define_insn "sse2_cvtsd2siq_2"
2419 [(set (match_operand:DI 0 "register_operand" "=r,r")
2420 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2421 UNSPEC_FIX_NOTRUNC))]
2422 "TARGET_SSE2 && TARGET_64BIT"
2423 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2424 [(set_attr "type" "sseicvt")
2425 (set_attr "athlon_decode" "double,vector")
2426 (set_attr "amdfam10_decode" "double,double")
2427 (set_attr "bdver1_decode" "double,double")
2428 (set_attr "prefix_rep" "1")
2429 (set_attr "prefix" "maybe_vex")
2430 (set_attr "mode" "DI")])
2432 (define_insn "sse2_cvttsd2si"
2433 [(set (match_operand:SI 0 "register_operand" "=r,r")
2436 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2437 (parallel [(const_int 0)]))))]
2439 "%vcvttsd2si\t{%1, %0|%0, %1}"
2440 [(set_attr "type" "sseicvt")
2441 (set_attr "athlon_decode" "double,vector")
2442 (set_attr "amdfam10_decode" "double,double")
2443 (set_attr "bdver1_decode" "double,double")
2444 (set_attr "prefix_rep" "1")
2445 (set_attr "prefix" "maybe_vex")
2446 (set_attr "mode" "SI")])
2448 (define_insn "sse2_cvttsd2siq"
2449 [(set (match_operand:DI 0 "register_operand" "=r,r")
2452 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2453 (parallel [(const_int 0)]))))]
2454 "TARGET_SSE2 && TARGET_64BIT"
2455 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2456 [(set_attr "type" "sseicvt")
2457 (set_attr "athlon_decode" "double,vector")
2458 (set_attr "amdfam10_decode" "double,double")
2459 (set_attr "bdver1_decode" "double,double")
2460 (set_attr "prefix_rep" "1")
2461 (set_attr "prefix" "maybe_vex")
2462 (set_attr "mode" "DI")])
2464 (define_insn "floatv4siv4df2"
2465 [(set (match_operand:V4DF 0 "register_operand" "=x")
2466 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2468 "vcvtdq2pd\t{%1, %0|%0, %1}"
2469 [(set_attr "type" "ssecvt")
2470 (set_attr "prefix" "vex")
2471 (set_attr "mode" "V4DF")])
2473 (define_insn "avx_cvtdq2pd256_2"
2474 [(set (match_operand:V4DF 0 "register_operand" "=x")
2477 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2478 (parallel [(const_int 0) (const_int 1)
2479 (const_int 2) (const_int 3)]))))]
2481 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2482 [(set_attr "type" "ssecvt")
2483 (set_attr "prefix" "vex")
2484 (set_attr "mode" "V4DF")])
2486 (define_insn "sse2_cvtdq2pd"
2487 [(set (match_operand:V2DF 0 "register_operand" "=x")
2490 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2491 (parallel [(const_int 0) (const_int 1)]))))]
2493 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2494 [(set_attr "type" "ssecvt")
2495 (set_attr "prefix" "maybe_vex")
2496 (set_attr "mode" "V2DF")])
2498 (define_insn "avx_cvtpd2dq256"
2499 [(set (match_operand:V4SI 0 "register_operand" "=x")
2500 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2501 UNSPEC_FIX_NOTRUNC))]
2503 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2504 [(set_attr "type" "ssecvt")
2505 (set_attr "prefix" "vex")
2506 (set_attr "mode" "OI")])
2508 (define_expand "avx_cvtpd2dq256_2"
2509 [(set (match_operand:V8SI 0 "register_operand" "")
2511 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2515 "operands[2] = CONST0_RTX (V4SImode);")
2517 (define_insn "*avx_cvtpd2dq256_2"
2518 [(set (match_operand:V8SI 0 "register_operand" "=x")
2520 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2522 (match_operand:V4SI 2 "const0_operand" "")))]
2524 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2525 [(set_attr "type" "ssecvt")
2526 (set_attr "prefix" "vex")
2527 (set_attr "mode" "OI")])
2529 (define_expand "sse2_cvtpd2dq"
2530 [(set (match_operand:V4SI 0 "register_operand" "")
2532 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2536 "operands[2] = CONST0_RTX (V2SImode);")
2538 (define_insn "*sse2_cvtpd2dq"
2539 [(set (match_operand:V4SI 0 "register_operand" "=x")
2541 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2543 (match_operand:V2SI 2 "const0_operand" "")))]
2547 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2549 return "cvtpd2dq\t{%1, %0|%0, %1}";
2551 [(set_attr "type" "ssecvt")
2552 (set_attr "prefix_rep" "1")
2553 (set_attr "prefix_data16" "0")
2554 (set_attr "prefix" "maybe_vex")
2555 (set_attr "mode" "TI")
2556 (set_attr "amdfam10_decode" "double")
2557 (set_attr "athlon_decode" "vector")
2558 (set_attr "bdver1_decode" "double")])
2560 (define_insn "fix_truncv4dfv4si2"
2561 [(set (match_operand:V4SI 0 "register_operand" "=x")
2562 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2564 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2565 [(set_attr "type" "ssecvt")
2566 (set_attr "prefix" "vex")
2567 (set_attr "mode" "OI")])
2569 (define_expand "avx_cvttpd2dq256_2"
2570 [(set (match_operand:V8SI 0 "register_operand" "")
2572 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2575 "operands[2] = CONST0_RTX (V4SImode);")
2577 (define_insn "*avx_cvttpd2dq256_2"
2578 [(set (match_operand:V8SI 0 "register_operand" "=x")
2580 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2581 (match_operand:V4SI 2 "const0_operand" "")))]
2583 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2584 [(set_attr "type" "ssecvt")
2585 (set_attr "prefix" "vex")
2586 (set_attr "mode" "OI")])
2588 (define_expand "sse2_cvttpd2dq"
2589 [(set (match_operand:V4SI 0 "register_operand" "")
2591 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2594 "operands[2] = CONST0_RTX (V2SImode);")
2596 (define_insn "*sse2_cvttpd2dq"
2597 [(set (match_operand:V4SI 0 "register_operand" "=x")
2599 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2600 (match_operand:V2SI 2 "const0_operand" "")))]
2604 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2606 return "cvttpd2dq\t{%1, %0|%0, %1}";
2608 [(set_attr "type" "ssecvt")
2609 (set_attr "amdfam10_decode" "double")
2610 (set_attr "athlon_decode" "vector")
2611 (set_attr "bdver1_decode" "double")
2612 (set_attr "prefix" "maybe_vex")
2613 (set_attr "mode" "TI")])
2615 (define_insn "sse2_cvtsd2ss"
2616 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2619 (float_truncate:V2SF
2620 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2621 (match_operand:V4SF 1 "register_operand" "0,0,x")
2625 cvtsd2ss\t{%2, %0|%0, %2}
2626 cvtsd2ss\t{%2, %0|%0, %2}
2627 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2628 [(set_attr "isa" "noavx,noavx,avx")
2629 (set_attr "type" "ssecvt")
2630 (set_attr "athlon_decode" "vector,double,*")
2631 (set_attr "amdfam10_decode" "vector,double,*")
2632 (set_attr "bdver1_decode" "direct,direct,*")
2633 (set_attr "prefix" "orig,orig,vex")
2634 (set_attr "mode" "SF")])
2636 (define_insn "sse2_cvtss2sd"
2637 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2641 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2642 (parallel [(const_int 0) (const_int 1)])))
2643 (match_operand:V2DF 1 "register_operand" "0,0,x")
2647 cvtss2sd\t{%2, %0|%0, %2}
2648 cvtss2sd\t{%2, %0|%0, %2}
2649 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2650 [(set_attr "isa" "noavx,noavx,avx")
2651 (set_attr "type" "ssecvt")
2652 (set_attr "amdfam10_decode" "vector,double,*")
2653 (set_attr "athlon_decode" "direct,direct,*")
2654 (set_attr "bdver1_decode" "direct,direct,*")
2655 (set_attr "prefix" "orig,orig,vex")
2656 (set_attr "mode" "DF")])
2658 (define_insn "avx_cvtpd2ps256"
2659 [(set (match_operand:V4SF 0 "register_operand" "=x")
2660 (float_truncate:V4SF
2661 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2663 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2664 [(set_attr "type" "ssecvt")
2665 (set_attr "prefix" "vex")
2666 (set_attr "mode" "V4SF")])
2668 (define_expand "sse2_cvtpd2ps"
2669 [(set (match_operand:V4SF 0 "register_operand" "")
2671 (float_truncate:V2SF
2672 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2675 "operands[2] = CONST0_RTX (V2SFmode);")
2677 (define_insn "*sse2_cvtpd2ps"
2678 [(set (match_operand:V4SF 0 "register_operand" "=x")
2680 (float_truncate:V2SF
2681 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2682 (match_operand:V2SF 2 "const0_operand" "")))]
2686 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2688 return "cvtpd2ps\t{%1, %0|%0, %1}";
2690 [(set_attr "type" "ssecvt")
2691 (set_attr "amdfam10_decode" "double")
2692 (set_attr "athlon_decode" "vector")
2693 (set_attr "bdver1_decode" "double")
2694 (set_attr "prefix_data16" "1")
2695 (set_attr "prefix" "maybe_vex")
2696 (set_attr "mode" "V4SF")])
2698 (define_insn "avx_cvtps2pd256"
2699 [(set (match_operand:V4DF 0 "register_operand" "=x")
2701 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2703 "vcvtps2pd\t{%1, %0|%0, %1}"
2704 [(set_attr "type" "ssecvt")
2705 (set_attr "prefix" "vex")
2706 (set_attr "mode" "V4DF")])
2708 (define_insn "*avx_cvtps2pd256_2"
2709 [(set (match_operand:V4DF 0 "register_operand" "=x")
2712 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2713 (parallel [(const_int 0) (const_int 1)
2714 (const_int 2) (const_int 3)]))))]
2716 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2717 [(set_attr "type" "ssecvt")
2718 (set_attr "prefix" "vex")
2719 (set_attr "mode" "V4DF")])
2721 (define_insn "sse2_cvtps2pd"
2722 [(set (match_operand:V2DF 0 "register_operand" "=x")
2725 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2726 (parallel [(const_int 0) (const_int 1)]))))]
2728 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "amdfam10_decode" "direct")
2731 (set_attr "athlon_decode" "double")
2732 (set_attr "bdver1_decode" "double")
2733 (set_attr "prefix_data16" "0")
2734 (set_attr "prefix" "maybe_vex")
2735 (set_attr "mode" "V2DF")])
2737 (define_expand "vec_unpacks_hi_v4sf"
2742 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2743 (parallel [(const_int 6) (const_int 7)
2744 (const_int 2) (const_int 3)])))
2745 (set (match_operand:V2DF 0 "register_operand" "")
2749 (parallel [(const_int 0) (const_int 1)]))))]
2751 "operands[2] = gen_reg_rtx (V4SFmode);")
2753 (define_expand "vec_unpacks_hi_v8sf"
2756 (match_operand:V8SF 1 "nonimmediate_operand" "")
2757 (parallel [(const_int 4) (const_int 5)
2758 (const_int 6) (const_int 7)])))
2759 (set (match_operand:V4DF 0 "register_operand" "")
2763 "operands[2] = gen_reg_rtx (V4SFmode);")
2765 (define_expand "vec_unpacks_lo_v4sf"
2766 [(set (match_operand:V2DF 0 "register_operand" "")
2769 (match_operand:V4SF 1 "nonimmediate_operand" "")
2770 (parallel [(const_int 0) (const_int 1)]))))]
2773 (define_expand "vec_unpacks_lo_v8sf"
2774 [(set (match_operand:V4DF 0 "register_operand" "")
2777 (match_operand:V8SF 1 "nonimmediate_operand" "")
2778 (parallel [(const_int 0) (const_int 1)
2779 (const_int 2) (const_int 3)]))))]
2782 (define_mode_attr sseunpackfltmode
2783 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2785 (define_expand "vec_unpacks_float_hi_<mode>"
2786 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2787 (match_operand:VI2_AVX2 1 "register_operand" "")]
2790 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2792 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2793 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2794 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2798 (define_expand "vec_unpacks_float_lo_<mode>"
2799 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2800 (match_operand:VI2_AVX2 1 "register_operand" "")]
2803 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2805 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2806 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2807 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2811 (define_expand "vec_unpacku_float_hi_<mode>"
2812 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2813 (match_operand:VI2_AVX2 1 "register_operand" "")]
2816 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2818 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2819 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2820 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2824 (define_expand "vec_unpacku_float_lo_<mode>"
2825 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2826 (match_operand:VI2_AVX2 1 "register_operand" "")]
2829 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2831 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2832 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2833 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2837 (define_expand "vec_unpacks_float_hi_v4si"
2840 (match_operand:V4SI 1 "nonimmediate_operand" "")
2841 (parallel [(const_int 2) (const_int 3)
2842 (const_int 2) (const_int 3)])))
2843 (set (match_operand:V2DF 0 "register_operand" "")
2847 (parallel [(const_int 0) (const_int 1)]))))]
2849 "operands[2] = gen_reg_rtx (V4SImode);")
2851 (define_expand "vec_unpacks_float_lo_v4si"
2852 [(set (match_operand:V2DF 0 "register_operand" "")
2855 (match_operand:V4SI 1 "nonimmediate_operand" "")
2856 (parallel [(const_int 0) (const_int 1)]))))]
2859 (define_expand "vec_unpacks_float_hi_v8si"
2862 (match_operand:V8SI 1 "nonimmediate_operand" "")
2863 (parallel [(const_int 4) (const_int 5)
2864 (const_int 6) (const_int 7)])))
2865 (set (match_operand:V4DF 0 "register_operand" "")
2869 "operands[2] = gen_reg_rtx (V4SImode);")
2871 (define_expand "vec_unpacks_float_lo_v8si"
2872 [(set (match_operand:V4DF 0 "register_operand" "")
2875 (match_operand:V8SI 1 "nonimmediate_operand" "")
2876 (parallel [(const_int 0) (const_int 1)
2877 (const_int 2) (const_int 3)]))))]
2880 (define_expand "vec_unpacku_float_hi_v4si"
2883 (match_operand:V4SI 1 "nonimmediate_operand" "")
2884 (parallel [(const_int 2) (const_int 3)
2885 (const_int 2) (const_int 3)])))
2890 (parallel [(const_int 0) (const_int 1)]))))
2892 (lt:V2DF (match_dup 6) (match_dup 3)))
2894 (and:V2DF (match_dup 7) (match_dup 4)))
2895 (set (match_operand:V2DF 0 "register_operand" "")
2896 (plus:V2DF (match_dup 6) (match_dup 8)))]
2899 REAL_VALUE_TYPE TWO32r;
2903 real_ldexp (&TWO32r, &dconst1, 32);
2904 x = const_double_from_real_value (TWO32r, DFmode);
2906 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2907 operands[4] = force_reg (V2DFmode,
2908 ix86_build_const_vector (V2DFmode, 1, x));
2910 operands[5] = gen_reg_rtx (V4SImode);
2912 for (i = 6; i < 9; i++)
2913 operands[i] = gen_reg_rtx (V2DFmode);
2916 (define_expand "vec_unpacku_float_lo_v4si"
2920 (match_operand:V4SI 1 "nonimmediate_operand" "")
2921 (parallel [(const_int 0) (const_int 1)]))))
2923 (lt:V2DF (match_dup 5) (match_dup 3)))
2925 (and:V2DF (match_dup 6) (match_dup 4)))
2926 (set (match_operand:V2DF 0 "register_operand" "")
2927 (plus:V2DF (match_dup 5) (match_dup 7)))]
2930 REAL_VALUE_TYPE TWO32r;
2934 real_ldexp (&TWO32r, &dconst1, 32);
2935 x = const_double_from_real_value (TWO32r, DFmode);
2937 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2938 operands[4] = force_reg (V2DFmode,
2939 ix86_build_const_vector (V2DFmode, 1, x));
2941 for (i = 5; i < 8; i++)
2942 operands[i] = gen_reg_rtx (V2DFmode);
2945 (define_expand "vec_unpacku_float_hi_v8si"
2946 [(match_operand:V4DF 0 "register_operand" "")
2947 (match_operand:V8SI 1 "register_operand" "")]
2950 REAL_VALUE_TYPE TWO32r;
2954 real_ldexp (&TWO32r, &dconst1, 32);
2955 x = const_double_from_real_value (TWO32r, DFmode);
2957 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2958 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2959 tmp[5] = gen_reg_rtx (V4SImode);
2961 for (i = 2; i < 5; i++)
2962 tmp[i] = gen_reg_rtx (V4DFmode);
2963 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2964 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2965 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2966 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2967 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2968 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2972 (define_expand "vec_unpacku_float_lo_v8si"
2973 [(match_operand:V4DF 0 "register_operand" "")
2974 (match_operand:V8SI 1 "nonimmediate_operand" "")]
2977 REAL_VALUE_TYPE TWO32r;
2981 real_ldexp (&TWO32r, &dconst1, 32);
2982 x = const_double_from_real_value (TWO32r, DFmode);
2984 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2985 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2987 for (i = 2; i < 5; i++)
2988 tmp[i] = gen_reg_rtx (V4DFmode);
2989 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
2990 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2991 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2992 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2993 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2997 (define_expand "vec_pack_trunc_v4df"
2999 (float_truncate:V4SF
3000 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3002 (float_truncate:V4SF
3003 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3004 (set (match_operand:V8SF 0 "register_operand" "")
3010 operands[3] = gen_reg_rtx (V4SFmode);
3011 operands[4] = gen_reg_rtx (V4SFmode);
3014 (define_expand "vec_pack_trunc_v2df"
3015 [(match_operand:V4SF 0 "register_operand" "")
3016 (match_operand:V2DF 1 "nonimmediate_operand" "")
3017 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3022 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3024 tmp0 = gen_reg_rtx (V4DFmode);
3025 tmp1 = force_reg (V2DFmode, operands[1]);
3027 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3028 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3032 tmp0 = gen_reg_rtx (V4SFmode);
3033 tmp1 = gen_reg_rtx (V4SFmode);
3035 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3036 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3037 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3042 (define_expand "vec_pack_sfix_trunc_v4df"
3043 [(match_operand:V8SI 0 "register_operand" "")
3044 (match_operand:V4DF 1 "nonimmediate_operand" "")
3045 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3050 r1 = gen_reg_rtx (V4SImode);
3051 r2 = gen_reg_rtx (V4SImode);
3053 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3054 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3055 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3059 (define_expand "vec_pack_sfix_trunc_v2df"
3060 [(match_operand:V4SI 0 "register_operand" "")
3061 (match_operand:V2DF 1 "nonimmediate_operand" "")
3062 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3067 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3069 tmp0 = gen_reg_rtx (V4DFmode);
3070 tmp1 = force_reg (V2DFmode, operands[1]);
3072 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3073 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3077 tmp0 = gen_reg_rtx (V4SImode);
3078 tmp1 = gen_reg_rtx (V4SImode);
3080 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3081 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3083 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3084 gen_lowpart (V2DImode, tmp0),
3085 gen_lowpart (V2DImode, tmp1)));
3090 (define_mode_attr ssepackfltmode
3091 [(V4DF "V8SI") (V2DF "V4SI")])
3093 (define_expand "vec_pack_ufix_trunc_<mode>"
3094 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3095 (match_operand:VF2 1 "register_operand" "")
3096 (match_operand:VF2 2 "register_operand" "")]
3100 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3101 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3102 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3103 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3104 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3106 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3107 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3111 tmp[5] = gen_reg_rtx (V8SFmode);
3112 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3113 gen_lowpart (V8SFmode, tmp[3]), 0);
3114 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3116 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3117 operands[0], 0, OPTAB_DIRECT);
3118 if (tmp[6] != operands[0])
3119 emit_move_insn (operands[0], tmp[6]);
3123 (define_expand "vec_pack_sfix_v4df"
3124 [(match_operand:V8SI 0 "register_operand" "")
3125 (match_operand:V4DF 1 "nonimmediate_operand" "")
3126 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3131 r1 = gen_reg_rtx (V4SImode);
3132 r2 = gen_reg_rtx (V4SImode);
3134 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3135 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3136 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3140 (define_expand "vec_pack_sfix_v2df"
3141 [(match_operand:V4SI 0 "register_operand" "")
3142 (match_operand:V2DF 1 "nonimmediate_operand" "")
3143 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3148 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3150 tmp0 = gen_reg_rtx (V4DFmode);
3151 tmp1 = force_reg (V2DFmode, operands[1]);
3153 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3154 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3158 tmp0 = gen_reg_rtx (V4SImode);
3159 tmp1 = gen_reg_rtx (V4SImode);
3161 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3162 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3164 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3165 gen_lowpart (V2DImode, tmp0),
3166 gen_lowpart (V2DImode, tmp1)));
3171 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3173 ;; Parallel single-precision floating point element swizzling
3175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3177 (define_expand "sse_movhlps_exp"
3178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3181 (match_operand:V4SF 1 "nonimmediate_operand" "")
3182 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3183 (parallel [(const_int 6)
3189 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3191 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3193 /* Fix up the destination if needed. */
3194 if (dst != operands[0])
3195 emit_move_insn (operands[0], dst);
3200 (define_insn "sse_movhlps"
3201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3204 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3205 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3206 (parallel [(const_int 6)
3210 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3212 movhlps\t{%2, %0|%0, %2}
3213 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3214 movlps\t{%H2, %0|%0, %H2}
3215 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3216 %vmovhps\t{%2, %0|%0, %2}"
3217 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3218 (set_attr "type" "ssemov")
3219 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3220 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3222 (define_expand "sse_movlhps_exp"
3223 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3226 (match_operand:V4SF 1 "nonimmediate_operand" "")
3227 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3228 (parallel [(const_int 0)
3234 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3236 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3238 /* Fix up the destination if needed. */
3239 if (dst != operands[0])
3240 emit_move_insn (operands[0], dst);
3245 (define_insn "sse_movlhps"
3246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3249 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3250 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3251 (parallel [(const_int 0)
3255 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3257 movlhps\t{%2, %0|%0, %2}
3258 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3259 movhps\t{%2, %0|%0, %2}
3260 vmovhps\t{%2, %1, %0|%0, %1, %2}
3261 %vmovlps\t{%2, %H0|%H0, %2}"
3262 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3263 (set_attr "type" "ssemov")
3264 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3265 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3267 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3268 (define_insn "avx_unpckhps256"
3269 [(set (match_operand:V8SF 0 "register_operand" "=x")
3272 (match_operand:V8SF 1 "register_operand" "x")
3273 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3274 (parallel [(const_int 2) (const_int 10)
3275 (const_int 3) (const_int 11)
3276 (const_int 6) (const_int 14)
3277 (const_int 7) (const_int 15)])))]
3279 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3280 [(set_attr "type" "sselog")
3281 (set_attr "prefix" "vex")
3282 (set_attr "mode" "V8SF")])
3284 (define_expand "vec_interleave_highv8sf"
3288 (match_operand:V8SF 1 "register_operand" "x")
3289 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3290 (parallel [(const_int 0) (const_int 8)
3291 (const_int 1) (const_int 9)
3292 (const_int 4) (const_int 12)
3293 (const_int 5) (const_int 13)])))
3299 (parallel [(const_int 2) (const_int 10)
3300 (const_int 3) (const_int 11)
3301 (const_int 6) (const_int 14)
3302 (const_int 7) (const_int 15)])))
3303 (set (match_operand:V8SF 0 "register_operand" "")
3308 (parallel [(const_int 4) (const_int 5)
3309 (const_int 6) (const_int 7)
3310 (const_int 12) (const_int 13)
3311 (const_int 14) (const_int 15)])))]
3314 operands[3] = gen_reg_rtx (V8SFmode);
3315 operands[4] = gen_reg_rtx (V8SFmode);
3318 (define_insn "vec_interleave_highv4sf"
3319 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3322 (match_operand:V4SF 1 "register_operand" "0,x")
3323 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3324 (parallel [(const_int 2) (const_int 6)
3325 (const_int 3) (const_int 7)])))]
3328 unpckhps\t{%2, %0|%0, %2}
3329 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3330 [(set_attr "isa" "noavx,avx")
3331 (set_attr "type" "sselog")
3332 (set_attr "prefix" "orig,vex")
3333 (set_attr "mode" "V4SF")])
3335 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3336 (define_insn "avx_unpcklps256"
3337 [(set (match_operand:V8SF 0 "register_operand" "=x")
3340 (match_operand:V8SF 1 "register_operand" "x")
3341 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3342 (parallel [(const_int 0) (const_int 8)
3343 (const_int 1) (const_int 9)
3344 (const_int 4) (const_int 12)
3345 (const_int 5) (const_int 13)])))]
3347 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3348 [(set_attr "type" "sselog")
3349 (set_attr "prefix" "vex")
3350 (set_attr "mode" "V8SF")])
3352 (define_expand "vec_interleave_lowv8sf"
3356 (match_operand:V8SF 1 "register_operand" "x")
3357 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3358 (parallel [(const_int 0) (const_int 8)
3359 (const_int 1) (const_int 9)
3360 (const_int 4) (const_int 12)
3361 (const_int 5) (const_int 13)])))
3367 (parallel [(const_int 2) (const_int 10)
3368 (const_int 3) (const_int 11)
3369 (const_int 6) (const_int 14)
3370 (const_int 7) (const_int 15)])))
3371 (set (match_operand:V8SF 0 "register_operand" "")
3376 (parallel [(const_int 0) (const_int 1)
3377 (const_int 2) (const_int 3)
3378 (const_int 8) (const_int 9)
3379 (const_int 10) (const_int 11)])))]
3382 operands[3] = gen_reg_rtx (V8SFmode);
3383 operands[4] = gen_reg_rtx (V8SFmode);
3386 (define_insn "vec_interleave_lowv4sf"
3387 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3390 (match_operand:V4SF 1 "register_operand" "0,x")
3391 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3392 (parallel [(const_int 0) (const_int 4)
3393 (const_int 1) (const_int 5)])))]
3396 unpcklps\t{%2, %0|%0, %2}
3397 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3398 [(set_attr "isa" "noavx,avx")
3399 (set_attr "type" "sselog")
3400 (set_attr "prefix" "orig,vex")
3401 (set_attr "mode" "V4SF")])
3403 ;; These are modeled with the same vec_concat as the others so that we
3404 ;; capture users of shufps that can use the new instructions
3405 (define_insn "avx_movshdup256"
3406 [(set (match_operand:V8SF 0 "register_operand" "=x")
3409 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3411 (parallel [(const_int 1) (const_int 1)
3412 (const_int 3) (const_int 3)
3413 (const_int 5) (const_int 5)
3414 (const_int 7) (const_int 7)])))]
3416 "vmovshdup\t{%1, %0|%0, %1}"
3417 [(set_attr "type" "sse")
3418 (set_attr "prefix" "vex")
3419 (set_attr "mode" "V8SF")])
3421 (define_insn "sse3_movshdup"
3422 [(set (match_operand:V4SF 0 "register_operand" "=x")
3425 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3427 (parallel [(const_int 1)
3432 "%vmovshdup\t{%1, %0|%0, %1}"
3433 [(set_attr "type" "sse")
3434 (set_attr "prefix_rep" "1")
3435 (set_attr "prefix" "maybe_vex")
3436 (set_attr "mode" "V4SF")])
3438 (define_insn "avx_movsldup256"
3439 [(set (match_operand:V8SF 0 "register_operand" "=x")
3442 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3444 (parallel [(const_int 0) (const_int 0)
3445 (const_int 2) (const_int 2)
3446 (const_int 4) (const_int 4)
3447 (const_int 6) (const_int 6)])))]
3449 "vmovsldup\t{%1, %0|%0, %1}"
3450 [(set_attr "type" "sse")
3451 (set_attr "prefix" "vex")
3452 (set_attr "mode" "V8SF")])
3454 (define_insn "sse3_movsldup"
3455 [(set (match_operand:V4SF 0 "register_operand" "=x")
3458 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3460 (parallel [(const_int 0)
3465 "%vmovsldup\t{%1, %0|%0, %1}"
3466 [(set_attr "type" "sse")
3467 (set_attr "prefix_rep" "1")
3468 (set_attr "prefix" "maybe_vex")
3469 (set_attr "mode" "V4SF")])
3471 (define_expand "avx_shufps256"
3472 [(match_operand:V8SF 0 "register_operand" "")
3473 (match_operand:V8SF 1 "register_operand" "")
3474 (match_operand:V8SF 2 "nonimmediate_operand" "")
3475 (match_operand:SI 3 "const_int_operand" "")]
3478 int mask = INTVAL (operands[3]);
3479 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3480 GEN_INT ((mask >> 0) & 3),
3481 GEN_INT ((mask >> 2) & 3),
3482 GEN_INT (((mask >> 4) & 3) + 8),
3483 GEN_INT (((mask >> 6) & 3) + 8),
3484 GEN_INT (((mask >> 0) & 3) + 4),
3485 GEN_INT (((mask >> 2) & 3) + 4),
3486 GEN_INT (((mask >> 4) & 3) + 12),
3487 GEN_INT (((mask >> 6) & 3) + 12)));
3491 ;; One bit in mask selects 2 elements.
3492 (define_insn "avx_shufps256_1"
3493 [(set (match_operand:V8SF 0 "register_operand" "=x")
3496 (match_operand:V8SF 1 "register_operand" "x")
3497 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3498 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3499 (match_operand 4 "const_0_to_3_operand" "")
3500 (match_operand 5 "const_8_to_11_operand" "")
3501 (match_operand 6 "const_8_to_11_operand" "")
3502 (match_operand 7 "const_4_to_7_operand" "")
3503 (match_operand 8 "const_4_to_7_operand" "")
3504 (match_operand 9 "const_12_to_15_operand" "")
3505 (match_operand 10 "const_12_to_15_operand" "")])))]
3507 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3508 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3509 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3510 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3513 mask = INTVAL (operands[3]);
3514 mask |= INTVAL (operands[4]) << 2;
3515 mask |= (INTVAL (operands[5]) - 8) << 4;
3516 mask |= (INTVAL (operands[6]) - 8) << 6;
3517 operands[3] = GEN_INT (mask);
3519 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3521 [(set_attr "type" "sselog")
3522 (set_attr "length_immediate" "1")
3523 (set_attr "prefix" "vex")
3524 (set_attr "mode" "V8SF")])
3526 (define_expand "sse_shufps"
3527 [(match_operand:V4SF 0 "register_operand" "")
3528 (match_operand:V4SF 1 "register_operand" "")
3529 (match_operand:V4SF 2 "nonimmediate_operand" "")
3530 (match_operand:SI 3 "const_int_operand" "")]
3533 int mask = INTVAL (operands[3]);
3534 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3535 GEN_INT ((mask >> 0) & 3),
3536 GEN_INT ((mask >> 2) & 3),
3537 GEN_INT (((mask >> 4) & 3) + 4),
3538 GEN_INT (((mask >> 6) & 3) + 4)));
3542 (define_insn "sse_shufps_<mode>"
3543 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3544 (vec_select:VI4F_128
3545 (vec_concat:<ssedoublevecmode>
3546 (match_operand:VI4F_128 1 "register_operand" "0,x")
3547 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3548 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3549 (match_operand 4 "const_0_to_3_operand" "")
3550 (match_operand 5 "const_4_to_7_operand" "")
3551 (match_operand 6 "const_4_to_7_operand" "")])))]
3555 mask |= INTVAL (operands[3]) << 0;
3556 mask |= INTVAL (operands[4]) << 2;
3557 mask |= (INTVAL (operands[5]) - 4) << 4;
3558 mask |= (INTVAL (operands[6]) - 4) << 6;
3559 operands[3] = GEN_INT (mask);
3561 switch (which_alternative)
3564 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3566 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3571 [(set_attr "isa" "noavx,avx")
3572 (set_attr "type" "sselog")
3573 (set_attr "length_immediate" "1")
3574 (set_attr "prefix" "orig,vex")
3575 (set_attr "mode" "V4SF")])
3577 (define_insn "sse_storehps"
3578 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3580 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3581 (parallel [(const_int 2) (const_int 3)])))]
3584 %vmovhps\t{%1, %0|%0, %1}
3585 %vmovhlps\t{%1, %d0|%d0, %1}
3586 %vmovlps\t{%H1, %d0|%d0, %H1}"
3587 [(set_attr "type" "ssemov")
3588 (set_attr "prefix" "maybe_vex")
3589 (set_attr "mode" "V2SF,V4SF,V2SF")])
3591 (define_expand "sse_loadhps_exp"
3592 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3595 (match_operand:V4SF 1 "nonimmediate_operand" "")
3596 (parallel [(const_int 0) (const_int 1)]))
3597 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3600 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3602 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3604 /* Fix up the destination if needed. */
3605 if (dst != operands[0])
3606 emit_move_insn (operands[0], dst);
3611 (define_insn "sse_loadhps"
3612 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3615 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3616 (parallel [(const_int 0) (const_int 1)]))
3617 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3620 movhps\t{%2, %0|%0, %2}
3621 vmovhps\t{%2, %1, %0|%0, %1, %2}
3622 movlhps\t{%2, %0|%0, %2}
3623 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3624 %vmovlps\t{%2, %H0|%H0, %2}"
3625 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3626 (set_attr "type" "ssemov")
3627 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3628 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3630 (define_insn "sse_storelps"
3631 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3633 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3634 (parallel [(const_int 0) (const_int 1)])))]
3637 %vmovlps\t{%1, %0|%0, %1}
3638 %vmovaps\t{%1, %0|%0, %1}
3639 %vmovlps\t{%1, %d0|%d0, %1}"
3640 [(set_attr "type" "ssemov")
3641 (set_attr "prefix" "maybe_vex")
3642 (set_attr "mode" "V2SF,V4SF,V2SF")])
3644 (define_expand "sse_loadlps_exp"
3645 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3647 (match_operand:V2SF 2 "nonimmediate_operand" "")
3649 (match_operand:V4SF 1 "nonimmediate_operand" "")
3650 (parallel [(const_int 2) (const_int 3)]))))]
3653 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3655 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3657 /* Fix up the destination if needed. */
3658 if (dst != operands[0])
3659 emit_move_insn (operands[0], dst);
3664 (define_insn "sse_loadlps"
3665 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3667 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
3669 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3670 (parallel [(const_int 2) (const_int 3)]))))]
3673 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3674 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3675 movlps\t{%2, %0|%0, %2}
3676 vmovlps\t{%2, %1, %0|%0, %1, %2}
3677 %vmovlps\t{%2, %0|%0, %2}"
3678 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3679 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3680 (set_attr "length_immediate" "1,1,*,*,*")
3681 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3682 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3684 (define_insn "sse_movss"
3685 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3687 (match_operand:V4SF 2 "register_operand" " x,x")
3688 (match_operand:V4SF 1 "register_operand" " 0,x")
3692 movss\t{%2, %0|%0, %2}
3693 vmovss\t{%2, %1, %0|%0, %1, %2}"
3694 [(set_attr "isa" "noavx,avx")
3695 (set_attr "type" "ssemov")
3696 (set_attr "prefix" "orig,vex")
3697 (set_attr "mode" "SF")])
3699 (define_insn "avx2_vec_dup<mode>"
3700 [(set (match_operand:VF1 0 "register_operand" "=x")
3703 (match_operand:V4SF 1 "register_operand" "x")
3704 (parallel [(const_int 0)]))))]
3706 "vbroadcastss\t{%1, %0|%0, %1}"
3707 [(set_attr "type" "sselog1")
3708 (set_attr "prefix" "vex")
3709 (set_attr "mode" "<MODE>")])
3711 (define_insn "vec_dupv4sf"
3712 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3714 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3717 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3718 vbroadcastss\t{%1, %0|%0, %1}
3719 shufps\t{$0, %0, %0|%0, %0, 0}"
3720 [(set_attr "isa" "avx,avx,noavx")
3721 (set_attr "type" "sselog1,ssemov,sselog1")
3722 (set_attr "length_immediate" "1,0,1")
3723 (set_attr "prefix_extra" "0,1,*")
3724 (set_attr "prefix" "vex,vex,orig")
3725 (set_attr "mode" "V4SF")])
3727 ;; Although insertps takes register source, we prefer
3728 ;; unpcklps with register source since it is shorter.
3729 (define_insn "*vec_concatv2sf_sse4_1"
3730 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3732 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3733 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3736 unpcklps\t{%2, %0|%0, %2}
3737 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3738 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3739 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3740 %vmovss\t{%1, %0|%0, %1}
3741 punpckldq\t{%2, %0|%0, %2}
3742 movd\t{%1, %0|%0, %1}"
3743 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3744 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3745 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3746 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3747 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3748 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3749 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3751 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3752 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3753 ;; alternatives pretty much forces the MMX alternative to be chosen.
3754 (define_insn "*vec_concatv2sf_sse"
3755 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3757 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3758 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3761 unpcklps\t{%2, %0|%0, %2}
3762 movss\t{%1, %0|%0, %1}
3763 punpckldq\t{%2, %0|%0, %2}
3764 movd\t{%1, %0|%0, %1}"
3765 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3766 (set_attr "mode" "V4SF,SF,DI,DI")])
3768 (define_insn "*vec_concatv4sf"
3769 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3771 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3772 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3775 movlhps\t{%2, %0|%0, %2}
3776 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3777 movhps\t{%2, %0|%0, %2}
3778 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3779 [(set_attr "isa" "noavx,avx,noavx,avx")
3780 (set_attr "type" "ssemov")
3781 (set_attr "prefix" "orig,vex,orig,vex")
3782 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3784 (define_expand "vec_init<mode>"
3785 [(match_operand:V_128 0 "register_operand" "")
3786 (match_operand 1 "" "")]
3789 ix86_expand_vector_init (false, operands[0], operands[1]);
3793 ;; Avoid combining registers from different units in a single alternative,
3794 ;; see comment above inline_secondary_memory_needed function in i386.c
3795 (define_insn "vec_set<mode>_0"
3796 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3797 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
3799 (vec_duplicate:VI4F_128
3800 (match_operand:<ssescalarmode> 2 "general_operand"
3801 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
3802 (match_operand:VI4F_128 1 "vector_move_operand"
3803 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
3807 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3808 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3809 %vmovd\t{%2, %0|%0, %2}
3810 movss\t{%2, %0|%0, %2}
3811 movss\t{%2, %0|%0, %2}
3812 vmovss\t{%2, %1, %0|%0, %1, %2}
3813 pinsrd\t{$0, %2, %0|%0, %2, 0}
3814 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3818 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3820 (cond [(eq_attr "alternative" "0,6,7")
3821 (const_string "sselog")
3822 (eq_attr "alternative" "9")
3823 (const_string "imov")
3824 (eq_attr "alternative" "10")
3825 (const_string "fmov")
3827 (const_string "ssemov")))
3828 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3829 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3830 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3831 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3833 ;; A subset is vec_setv4sf.
3834 (define_insn "*vec_setv4sf_sse4_1"
3835 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3838 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3839 (match_operand:V4SF 1 "register_operand" "0,x")
3840 (match_operand:SI 3 "const_int_operand" "")))]
3842 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3843 < GET_MODE_NUNITS (V4SFmode))"
3845 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3846 switch (which_alternative)
3849 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3851 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3856 [(set_attr "isa" "noavx,avx")
3857 (set_attr "type" "sselog")
3858 (set_attr "prefix_data16" "1,*")
3859 (set_attr "prefix_extra" "1")
3860 (set_attr "length_immediate" "1")
3861 (set_attr "prefix" "orig,vex")
3862 (set_attr "mode" "V4SF")])
3864 (define_insn "sse4_1_insertps"
3865 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3866 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3867 (match_operand:V4SF 1 "register_operand" "0,x")
3868 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3872 if (MEM_P (operands[2]))
3874 unsigned count_s = INTVAL (operands[3]) >> 6;
3876 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3877 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3879 switch (which_alternative)
3882 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3884 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3889 [(set_attr "isa" "noavx,avx")
3890 (set_attr "type" "sselog")
3891 (set_attr "prefix_data16" "1,*")
3892 (set_attr "prefix_extra" "1")
3893 (set_attr "length_immediate" "1")
3894 (set_attr "prefix" "orig,vex")
3895 (set_attr "mode" "V4SF")])
3898 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3900 (vec_duplicate:VI4F_128
3901 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3904 "TARGET_SSE && reload_completed"
3907 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3912 (define_expand "vec_set<mode>"
3913 [(match_operand:V 0 "register_operand" "")
3914 (match_operand:<ssescalarmode> 1 "register_operand" "")
3915 (match_operand 2 "const_int_operand" "")]
3918 ix86_expand_vector_set (false, operands[0], operands[1],
3919 INTVAL (operands[2]));
3923 (define_insn_and_split "*vec_extractv4sf_0"
3924 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3926 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3927 (parallel [(const_int 0)])))]
3928 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3930 "&& reload_completed"
3933 rtx op1 = operands[1];
3935 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3937 op1 = gen_lowpart (SFmode, op1);
3938 emit_move_insn (operands[0], op1);
3942 (define_insn_and_split "*sse4_1_extractps"
3943 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3945 (match_operand:V4SF 1 "register_operand" "x,0,x")
3946 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3949 %vextractps\t{%2, %1, %0|%0, %1, %2}
3952 "&& reload_completed && SSE_REG_P (operands[0])"
3955 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3956 switch (INTVAL (operands[2]))
3960 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3961 operands[2], operands[2],
3962 GEN_INT (INTVAL (operands[2]) + 4),
3963 GEN_INT (INTVAL (operands[2]) + 4)));
3966 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3969 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3974 [(set_attr "isa" "*,noavx,avx")
3975 (set_attr "type" "sselog,*,*")
3976 (set_attr "prefix_data16" "1,*,*")
3977 (set_attr "prefix_extra" "1,*,*")
3978 (set_attr "length_immediate" "1,*,*")
3979 (set_attr "prefix" "maybe_vex,*,*")
3980 (set_attr "mode" "V4SF,*,*")])
3982 (define_insn_and_split "*vec_extract_v4sf_mem"
3983 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3985 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3986 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3989 "&& reload_completed"
3992 int i = INTVAL (operands[2]);
3994 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3998 (define_expand "avx_vextractf128<mode>"
3999 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4000 (match_operand:V_256 1 "register_operand" "")
4001 (match_operand:SI 2 "const_0_to_1_operand" "")]
4004 rtx (*insn)(rtx, rtx);
4006 switch (INTVAL (operands[2]))
4009 insn = gen_vec_extract_lo_<mode>;
4012 insn = gen_vec_extract_hi_<mode>;
4018 emit_insn (insn (operands[0], operands[1]));
4022 (define_insn_and_split "vec_extract_lo_<mode>"
4023 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4024 (vec_select:<ssehalfvecmode>
4025 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4026 (parallel [(const_int 0) (const_int 1)])))]
4027 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4029 "&& reload_completed"
4032 rtx op1 = operands[1];
4034 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4036 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4037 emit_move_insn (operands[0], op1);
4041 (define_insn "vec_extract_hi_<mode>"
4042 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4043 (vec_select:<ssehalfvecmode>
4044 (match_operand:VI8F_256 1 "register_operand" "x,x")
4045 (parallel [(const_int 2) (const_int 3)])))]
4047 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4048 [(set_attr "type" "sselog")
4049 (set_attr "prefix_extra" "1")
4050 (set_attr "length_immediate" "1")
4051 (set_attr "memory" "none,store")
4052 (set_attr "prefix" "vex")
4053 (set_attr "mode" "<sseinsnmode>")])
4055 (define_insn_and_split "vec_extract_lo_<mode>"
4056 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4057 (vec_select:<ssehalfvecmode>
4058 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4059 (parallel [(const_int 0) (const_int 1)
4060 (const_int 2) (const_int 3)])))]
4061 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4063 "&& reload_completed"
4066 rtx op1 = operands[1];
4068 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4070 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4071 emit_move_insn (operands[0], op1);
4075 (define_insn "vec_extract_hi_<mode>"
4076 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4077 (vec_select:<ssehalfvecmode>
4078 (match_operand:VI4F_256 1 "register_operand" "x,x")
4079 (parallel [(const_int 4) (const_int 5)
4080 (const_int 6) (const_int 7)])))]
4082 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4083 [(set_attr "type" "sselog")
4084 (set_attr "prefix_extra" "1")
4085 (set_attr "length_immediate" "1")
4086 (set_attr "memory" "none,store")
4087 (set_attr "prefix" "vex")
4088 (set_attr "mode" "<sseinsnmode>")])
4090 (define_insn_and_split "vec_extract_lo_v16hi"
4091 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4093 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4094 (parallel [(const_int 0) (const_int 1)
4095 (const_int 2) (const_int 3)
4096 (const_int 4) (const_int 5)
4097 (const_int 6) (const_int 7)])))]
4098 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4100 "&& reload_completed"
4103 rtx op1 = operands[1];
4105 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4107 op1 = gen_lowpart (V8HImode, op1);
4108 emit_move_insn (operands[0], op1);
4112 (define_insn "vec_extract_hi_v16hi"
4113 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4115 (match_operand:V16HI 1 "register_operand" "x,x")
4116 (parallel [(const_int 8) (const_int 9)
4117 (const_int 10) (const_int 11)
4118 (const_int 12) (const_int 13)
4119 (const_int 14) (const_int 15)])))]
4121 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4122 [(set_attr "type" "sselog")
4123 (set_attr "prefix_extra" "1")
4124 (set_attr "length_immediate" "1")
4125 (set_attr "memory" "none,store")
4126 (set_attr "prefix" "vex")
4127 (set_attr "mode" "OI")])
4129 (define_insn_and_split "vec_extract_lo_v32qi"
4130 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4132 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4133 (parallel [(const_int 0) (const_int 1)
4134 (const_int 2) (const_int 3)
4135 (const_int 4) (const_int 5)
4136 (const_int 6) (const_int 7)
4137 (const_int 8) (const_int 9)
4138 (const_int 10) (const_int 11)
4139 (const_int 12) (const_int 13)
4140 (const_int 14) (const_int 15)])))]
4141 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4143 "&& reload_completed"
4146 rtx op1 = operands[1];
4148 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4150 op1 = gen_lowpart (V16QImode, op1);
4151 emit_move_insn (operands[0], op1);
4155 (define_insn "vec_extract_hi_v32qi"
4156 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4158 (match_operand:V32QI 1 "register_operand" "x,x")
4159 (parallel [(const_int 16) (const_int 17)
4160 (const_int 18) (const_int 19)
4161 (const_int 20) (const_int 21)
4162 (const_int 22) (const_int 23)
4163 (const_int 24) (const_int 25)
4164 (const_int 26) (const_int 27)
4165 (const_int 28) (const_int 29)
4166 (const_int 30) (const_int 31)])))]
4168 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4169 [(set_attr "type" "sselog")
4170 (set_attr "prefix_extra" "1")
4171 (set_attr "length_immediate" "1")
4172 (set_attr "memory" "none,store")
4173 (set_attr "prefix" "vex")
4174 (set_attr "mode" "OI")])
4176 ;; Modes handled by vec_extract patterns.
4177 (define_mode_iterator VEC_EXTRACT_MODE
4178 [(V32QI "TARGET_AVX") V16QI
4179 (V16HI "TARGET_AVX") V8HI
4180 (V8SI "TARGET_AVX") V4SI
4181 (V4DI "TARGET_AVX") V2DI
4182 (V8SF "TARGET_AVX") V4SF
4183 (V4DF "TARGET_AVX") V2DF])
4185 (define_expand "vec_extract<mode>"
4186 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4187 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4188 (match_operand 2 "const_int_operand" "")]
4191 ix86_expand_vector_extract (false, operands[0], operands[1],
4192 INTVAL (operands[2]));
4196 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4198 ;; Parallel double-precision floating point element swizzling
4200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4202 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4203 (define_insn "avx_unpckhpd256"
4204 [(set (match_operand:V4DF 0 "register_operand" "=x")
4207 (match_operand:V4DF 1 "register_operand" "x")
4208 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4209 (parallel [(const_int 1) (const_int 5)
4210 (const_int 3) (const_int 7)])))]
4212 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4213 [(set_attr "type" "sselog")
4214 (set_attr "prefix" "vex")
4215 (set_attr "mode" "V4DF")])
4217 (define_expand "vec_interleave_highv4df"
4221 (match_operand:V4DF 1 "register_operand" "x")
4222 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4223 (parallel [(const_int 0) (const_int 4)
4224 (const_int 2) (const_int 6)])))
4230 (parallel [(const_int 1) (const_int 5)
4231 (const_int 3) (const_int 7)])))
4232 (set (match_operand:V4DF 0 "register_operand" "")
4237 (parallel [(const_int 2) (const_int 3)
4238 (const_int 6) (const_int 7)])))]
4241 operands[3] = gen_reg_rtx (V4DFmode);
4242 operands[4] = gen_reg_rtx (V4DFmode);
4246 (define_expand "vec_interleave_highv2df"
4247 [(set (match_operand:V2DF 0 "register_operand" "")
4250 (match_operand:V2DF 1 "nonimmediate_operand" "")
4251 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4252 (parallel [(const_int 1)
4256 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4257 operands[2] = force_reg (V2DFmode, operands[2]);
4260 (define_insn "*vec_interleave_highv2df"
4261 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4264 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4265 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4266 (parallel [(const_int 1)
4268 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4270 unpckhpd\t{%2, %0|%0, %2}
4271 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4272 %vmovddup\t{%H1, %0|%0, %H1}
4273 movlpd\t{%H1, %0|%0, %H1}
4274 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4275 %vmovhpd\t{%1, %0|%0, %1}"
4276 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4277 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4278 (set_attr "prefix_data16" "*,*,*,1,*,1")
4279 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4280 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4282 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4283 (define_expand "avx_movddup256"
4284 [(set (match_operand:V4DF 0 "register_operand" "")
4287 (match_operand:V4DF 1 "nonimmediate_operand" "")
4289 (parallel [(const_int 0) (const_int 4)
4290 (const_int 2) (const_int 6)])))]
4293 (define_expand "avx_unpcklpd256"
4294 [(set (match_operand:V4DF 0 "register_operand" "")
4297 (match_operand:V4DF 1 "register_operand" "")
4298 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4299 (parallel [(const_int 0) (const_int 4)
4300 (const_int 2) (const_int 6)])))]
4303 (define_insn "*avx_unpcklpd256"
4304 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4307 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4308 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4309 (parallel [(const_int 0) (const_int 4)
4310 (const_int 2) (const_int 6)])))]
4313 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4314 vmovddup\t{%1, %0|%0, %1}"
4315 [(set_attr "type" "sselog")
4316 (set_attr "prefix" "vex")
4317 (set_attr "mode" "V4DF")])
4319 (define_expand "vec_interleave_lowv4df"
4323 (match_operand:V4DF 1 "register_operand" "x")
4324 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4325 (parallel [(const_int 0) (const_int 4)
4326 (const_int 2) (const_int 6)])))
4332 (parallel [(const_int 1) (const_int 5)
4333 (const_int 3) (const_int 7)])))
4334 (set (match_operand:V4DF 0 "register_operand" "")
4339 (parallel [(const_int 0) (const_int 1)
4340 (const_int 4) (const_int 5)])))]
4343 operands[3] = gen_reg_rtx (V4DFmode);
4344 operands[4] = gen_reg_rtx (V4DFmode);
4347 (define_expand "vec_interleave_lowv2df"
4348 [(set (match_operand:V2DF 0 "register_operand" "")
4351 (match_operand:V2DF 1 "nonimmediate_operand" "")
4352 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4353 (parallel [(const_int 0)
4357 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4358 operands[1] = force_reg (V2DFmode, operands[1]);
4361 (define_insn "*vec_interleave_lowv2df"
4362 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4365 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4366 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4367 (parallel [(const_int 0)
4369 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4371 unpcklpd\t{%2, %0|%0, %2}
4372 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4373 %vmovddup\t{%1, %0|%0, %1}
4374 movhpd\t{%2, %0|%0, %2}
4375 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4376 %vmovlpd\t{%2, %H0|%H0, %2}"
4377 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4378 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4379 (set_attr "prefix_data16" "*,*,*,1,*,1")
4380 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4381 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4384 [(set (match_operand:V2DF 0 "memory_operand" "")
4387 (match_operand:V2DF 1 "register_operand" "")
4389 (parallel [(const_int 0)
4391 "TARGET_SSE3 && reload_completed"
4394 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4395 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4396 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4401 [(set (match_operand:V2DF 0 "register_operand" "")
4404 (match_operand:V2DF 1 "memory_operand" "")
4406 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4407 (match_operand:SI 3 "const_int_operand" "")])))]
4408 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4409 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4411 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4414 (define_expand "avx_shufpd256"
4415 [(match_operand:V4DF 0 "register_operand" "")
4416 (match_operand:V4DF 1 "register_operand" "")
4417 (match_operand:V4DF 2 "nonimmediate_operand" "")
4418 (match_operand:SI 3 "const_int_operand" "")]
4421 int mask = INTVAL (operands[3]);
4422 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4424 GEN_INT (mask & 2 ? 5 : 4),
4425 GEN_INT (mask & 4 ? 3 : 2),
4426 GEN_INT (mask & 8 ? 7 : 6)));
4430 (define_insn "avx_shufpd256_1"
4431 [(set (match_operand:V4DF 0 "register_operand" "=x")
4434 (match_operand:V4DF 1 "register_operand" "x")
4435 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4436 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4437 (match_operand 4 "const_4_to_5_operand" "")
4438 (match_operand 5 "const_2_to_3_operand" "")
4439 (match_operand 6 "const_6_to_7_operand" "")])))]
4443 mask = INTVAL (operands[3]);
4444 mask |= (INTVAL (operands[4]) - 4) << 1;
4445 mask |= (INTVAL (operands[5]) - 2) << 2;
4446 mask |= (INTVAL (operands[6]) - 6) << 3;
4447 operands[3] = GEN_INT (mask);
4449 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4451 [(set_attr "type" "sselog")
4452 (set_attr "length_immediate" "1")
4453 (set_attr "prefix" "vex")
4454 (set_attr "mode" "V4DF")])
4456 (define_expand "sse2_shufpd"
4457 [(match_operand:V2DF 0 "register_operand" "")
4458 (match_operand:V2DF 1 "register_operand" "")
4459 (match_operand:V2DF 2 "nonimmediate_operand" "")
4460 (match_operand:SI 3 "const_int_operand" "")]
4463 int mask = INTVAL (operands[3]);
4464 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4466 GEN_INT (mask & 2 ? 3 : 2)));
4470 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4471 (define_insn "avx2_interleave_highv4di"
4472 [(set (match_operand:V4DI 0 "register_operand" "=x")
4475 (match_operand:V4DI 1 "register_operand" "x")
4476 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4477 (parallel [(const_int 1)
4482 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4483 [(set_attr "type" "sselog")
4484 (set_attr "prefix" "vex")
4485 (set_attr "mode" "OI")])
4487 (define_insn "vec_interleave_highv2di"
4488 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4491 (match_operand:V2DI 1 "register_operand" "0,x")
4492 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4493 (parallel [(const_int 1)
4497 punpckhqdq\t{%2, %0|%0, %2}
4498 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4499 [(set_attr "isa" "noavx,avx")
4500 (set_attr "type" "sselog")
4501 (set_attr "prefix_data16" "1,*")
4502 (set_attr "prefix" "orig,vex")
4503 (set_attr "mode" "TI")])
4505 (define_insn "avx2_interleave_lowv4di"
4506 [(set (match_operand:V4DI 0 "register_operand" "=x")
4509 (match_operand:V4DI 1 "register_operand" "x")
4510 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4511 (parallel [(const_int 0)
4516 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4517 [(set_attr "type" "sselog")
4518 (set_attr "prefix" "vex")
4519 (set_attr "mode" "OI")])
4521 (define_insn "vec_interleave_lowv2di"
4522 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4525 (match_operand:V2DI 1 "register_operand" "0,x")
4526 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4527 (parallel [(const_int 0)
4531 punpcklqdq\t{%2, %0|%0, %2}
4532 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4533 [(set_attr "isa" "noavx,avx")
4534 (set_attr "type" "sselog")
4535 (set_attr "prefix_data16" "1,*")
4536 (set_attr "prefix" "orig,vex")
4537 (set_attr "mode" "TI")])
4539 (define_insn "sse2_shufpd_<mode>"
4540 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4541 (vec_select:VI8F_128
4542 (vec_concat:<ssedoublevecmode>
4543 (match_operand:VI8F_128 1 "register_operand" "0,x")
4544 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4545 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4546 (match_operand 4 "const_2_to_3_operand" "")])))]
4550 mask = INTVAL (operands[3]);
4551 mask |= (INTVAL (operands[4]) - 2) << 1;
4552 operands[3] = GEN_INT (mask);
4554 switch (which_alternative)
4557 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4559 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4564 [(set_attr "isa" "noavx,avx")
4565 (set_attr "type" "sselog")
4566 (set_attr "length_immediate" "1")
4567 (set_attr "prefix" "orig,vex")
4568 (set_attr "mode" "V2DF")])
4570 ;; Avoid combining registers from different units in a single alternative,
4571 ;; see comment above inline_secondary_memory_needed function in i386.c
4572 (define_insn "sse2_storehpd"
4573 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4575 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4576 (parallel [(const_int 1)])))]
4577 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4579 %vmovhpd\t{%1, %0|%0, %1}
4581 vunpckhpd\t{%d1, %0|%0, %d1}
4585 [(set_attr "isa" "*,noavx,avx,*,*,*")
4586 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4587 (set (attr "prefix_data16")
4589 (and (eq_attr "alternative" "0")
4590 (not (match_test "TARGET_AVX")))
4592 (const_string "*")))
4593 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4594 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4597 [(set (match_operand:DF 0 "register_operand" "")
4599 (match_operand:V2DF 1 "memory_operand" "")
4600 (parallel [(const_int 1)])))]
4601 "TARGET_SSE2 && reload_completed"
4602 [(set (match_dup 0) (match_dup 1))]
4603 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4605 (define_insn "*vec_extractv2df_1_sse"
4606 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4608 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4609 (parallel [(const_int 1)])))]
4610 "!TARGET_SSE2 && TARGET_SSE
4611 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4613 movhps\t{%1, %0|%0, %1}
4614 movhlps\t{%1, %0|%0, %1}
4615 movlps\t{%H1, %0|%0, %H1}"
4616 [(set_attr "type" "ssemov")
4617 (set_attr "mode" "V2SF,V4SF,V2SF")])
4619 ;; Avoid combining registers from different units in a single alternative,
4620 ;; see comment above inline_secondary_memory_needed function in i386.c
4621 (define_insn "sse2_storelpd"
4622 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4624 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4625 (parallel [(const_int 0)])))]
4626 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4628 %vmovlpd\t{%1, %0|%0, %1}
4633 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4634 (set_attr "prefix_data16" "1,*,*,*,*")
4635 (set_attr "prefix" "maybe_vex")
4636 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4639 [(set (match_operand:DF 0 "register_operand" "")
4641 (match_operand:V2DF 1 "nonimmediate_operand" "")
4642 (parallel [(const_int 0)])))]
4643 "TARGET_SSE2 && reload_completed"
4646 rtx op1 = operands[1];
4648 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4650 op1 = gen_lowpart (DFmode, op1);
4651 emit_move_insn (operands[0], op1);
4655 (define_insn "*vec_extractv2df_0_sse"
4656 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4658 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4659 (parallel [(const_int 0)])))]
4660 "!TARGET_SSE2 && TARGET_SSE
4661 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4663 movlps\t{%1, %0|%0, %1}
4664 movaps\t{%1, %0|%0, %1}
4665 movlps\t{%1, %0|%0, %1}"
4666 [(set_attr "type" "ssemov")
4667 (set_attr "mode" "V2SF,V4SF,V2SF")])
4669 (define_expand "sse2_loadhpd_exp"
4670 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4673 (match_operand:V2DF 1 "nonimmediate_operand" "")
4674 (parallel [(const_int 0)]))
4675 (match_operand:DF 2 "nonimmediate_operand" "")))]
4678 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4680 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4682 /* Fix up the destination if needed. */
4683 if (dst != operands[0])
4684 emit_move_insn (operands[0], dst);
4689 ;; Avoid combining registers from different units in a single alternative,
4690 ;; see comment above inline_secondary_memory_needed function in i386.c
4691 (define_insn "sse2_loadhpd"
4692 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4696 (match_operand:V2DF 1 "nonimmediate_operand"
4698 (parallel [(const_int 0)]))
4699 (match_operand:DF 2 "nonimmediate_operand"
4700 " m,m,x,x,x,*f,r")))]
4701 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4703 movhpd\t{%2, %0|%0, %2}
4704 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4705 unpcklpd\t{%2, %0|%0, %2}
4706 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4710 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4711 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4712 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4713 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4714 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4717 [(set (match_operand:V2DF 0 "memory_operand" "")
4719 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4720 (match_operand:DF 1 "register_operand" "")))]
4721 "TARGET_SSE2 && reload_completed"
4722 [(set (match_dup 0) (match_dup 1))]
4723 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4725 (define_expand "sse2_loadlpd_exp"
4726 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4728 (match_operand:DF 2 "nonimmediate_operand" "")
4730 (match_operand:V2DF 1 "nonimmediate_operand" "")
4731 (parallel [(const_int 1)]))))]
4734 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4736 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4738 /* Fix up the destination if needed. */
4739 if (dst != operands[0])
4740 emit_move_insn (operands[0], dst);
4745 ;; Avoid combining registers from different units in a single alternative,
4746 ;; see comment above inline_secondary_memory_needed function in i386.c
4747 (define_insn "sse2_loadlpd"
4748 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4749 "=x,x,x,x,x,x,x,x,m,m ,m")
4751 (match_operand:DF 2 "nonimmediate_operand"
4752 " m,m,m,x,x,0,0,x,x,*f,r")
4754 (match_operand:V2DF 1 "vector_move_operand"
4755 " C,0,x,0,x,x,o,o,0,0 ,0")
4756 (parallel [(const_int 1)]))))]
4757 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4759 %vmovsd\t{%2, %0|%0, %2}
4760 movlpd\t{%2, %0|%0, %2}
4761 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4762 movsd\t{%2, %0|%0, %2}
4763 vmovsd\t{%2, %1, %0|%0, %1, %2}
4764 shufpd\t{$2, %1, %0|%0, %1, 2}
4765 movhpd\t{%H1, %0|%0, %H1}
4766 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4770 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4772 (cond [(eq_attr "alternative" "5")
4773 (const_string "sselog")
4774 (eq_attr "alternative" "9")
4775 (const_string "fmov")
4776 (eq_attr "alternative" "10")
4777 (const_string "imov")
4779 (const_string "ssemov")))
4780 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4781 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4782 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4783 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4786 [(set (match_operand:V2DF 0 "memory_operand" "")
4788 (match_operand:DF 1 "register_operand" "")
4789 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4790 "TARGET_SSE2 && reload_completed"
4791 [(set (match_dup 0) (match_dup 1))]
4792 "operands[0] = adjust_address (operands[0], DFmode, 0);")
4794 (define_insn "sse2_movsd"
4795 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4797 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4798 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4802 movsd\t{%2, %0|%0, %2}
4803 vmovsd\t{%2, %1, %0|%0, %1, %2}
4804 movlpd\t{%2, %0|%0, %2}
4805 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4806 %vmovlpd\t{%2, %0|%0, %2}
4807 shufpd\t{$2, %1, %0|%0, %1, 2}
4808 movhps\t{%H1, %0|%0, %H1}
4809 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4810 %vmovhps\t{%1, %H0|%H0, %1}"
4811 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4814 (eq_attr "alternative" "5")
4815 (const_string "sselog")
4816 (const_string "ssemov")))
4817 (set (attr "prefix_data16")
4819 (and (eq_attr "alternative" "2,4")
4820 (not (match_test "TARGET_AVX")))
4822 (const_string "*")))
4823 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4824 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4825 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4827 (define_insn "vec_dupv2df"
4828 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4830 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4834 %vmovddup\t{%1, %0|%0, %1}"
4835 [(set_attr "isa" "noavx,sse3")
4836 (set_attr "type" "sselog1")
4837 (set_attr "prefix" "orig,maybe_vex")
4838 (set_attr "mode" "V2DF")])
4840 (define_insn "*vec_concatv2df"
4841 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
4843 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4844 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
4847 unpcklpd\t{%2, %0|%0, %2}
4848 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4849 %vmovddup\t{%1, %0|%0, %1}
4850 movhpd\t{%2, %0|%0, %2}
4851 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4852 %vmovsd\t{%1, %0|%0, %1}
4853 movlhps\t{%2, %0|%0, %2}
4854 movhps\t{%2, %0|%0, %2}"
4855 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4858 (eq_attr "alternative" "0,1,2")
4859 (const_string "sselog")
4860 (const_string "ssemov")))
4861 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4862 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4863 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4867 ;; Parallel integral arithmetic
4869 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4871 (define_expand "neg<mode>2"
4872 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4875 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4877 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4879 (define_expand "<plusminus_insn><mode>3"
4880 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4882 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4883 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4885 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4887 (define_insn "*<plusminus_insn><mode>3"
4888 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4890 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4891 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4892 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4894 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4895 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4896 [(set_attr "isa" "noavx,avx")
4897 (set_attr "type" "sseiadd")
4898 (set_attr "prefix_data16" "1,*")
4899 (set_attr "prefix" "orig,vex")
4900 (set_attr "mode" "<sseinsnmode>")])
4902 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4903 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4904 (sat_plusminus:VI12_AVX2
4905 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4906 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4908 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4910 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4911 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4912 (sat_plusminus:VI12_AVX2
4913 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4914 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4915 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4917 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4918 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4919 [(set_attr "isa" "noavx,avx")
4920 (set_attr "type" "sseiadd")
4921 (set_attr "prefix_data16" "1,*")
4922 (set_attr "prefix" "orig,vex")
4923 (set_attr "mode" "TI")])
4925 (define_insn_and_split "mul<mode>3"
4926 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4927 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4928 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4930 && can_create_pseudo_p ()"
4937 enum machine_mode mulmode = <sseunpackmode>mode;
4939 for (i = 0; i < 6; ++i)
4940 t[i] = gen_reg_rtx (<MODE>mode);
4942 /* Unpack data such that we've got a source byte in each low byte of
4943 each word. We don't care what goes into the high byte of each word.
4944 Rather than trying to get zero in there, most convenient is to let
4945 it be a copy of the low byte. */
4946 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4948 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4950 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4952 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4955 /* Multiply words. The end-of-line annotations here give a picture of what
4956 the output of that instruction looks like. Dot means don't care; the
4957 letters are the bytes of the result with A being the most significant. */
4958 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4959 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4960 gen_lowpart (mulmode, t[0]),
4961 gen_lowpart (mulmode, t[1]))));
4962 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4963 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4964 gen_lowpart (mulmode, t[2]),
4965 gen_lowpart (mulmode, t[3]))));
4967 /* Extract the even bytes and merge them back together. */
4968 if (<MODE>mode == V16QImode)
4969 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4972 /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
4973 this can't be normal even extraction, but one where additionally
4974 the second and third quarter are swapped. That is even one insn
4975 shorter than even extraction. */
4976 rtvec v = rtvec_alloc (32);
4977 for (i = 0; i < 32; ++i)
4979 = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
4983 t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
4984 ix86_expand_vec_perm_const (t);
4987 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4988 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4992 (define_expand "mul<mode>3"
4993 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4994 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4995 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4997 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4999 (define_insn "*mul<mode>3"
5000 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5001 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5002 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5003 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5005 pmullw\t{%2, %0|%0, %2}
5006 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5007 [(set_attr "isa" "noavx,avx")
5008 (set_attr "type" "sseimul")
5009 (set_attr "prefix_data16" "1,*")
5010 (set_attr "prefix" "orig,vex")
5011 (set_attr "mode" "<sseinsnmode>")])
5013 (define_expand "<s>mul<mode>3_highpart"
5014 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5016 (lshiftrt:<ssedoublemode>
5017 (mult:<ssedoublemode>
5018 (any_extend:<ssedoublemode>
5019 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5020 (any_extend:<ssedoublemode>
5021 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5024 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5026 (define_insn "*<s>mul<mode>3_highpart"
5027 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5029 (lshiftrt:<ssedoublemode>
5030 (mult:<ssedoublemode>
5031 (any_extend:<ssedoublemode>
5032 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5033 (any_extend:<ssedoublemode>
5034 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5036 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5038 pmulh<u>w\t{%2, %0|%0, %2}
5039 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5040 [(set_attr "isa" "noavx,avx")
5041 (set_attr "type" "sseimul")
5042 (set_attr "prefix_data16" "1,*")
5043 (set_attr "prefix" "orig,vex")
5044 (set_attr "mode" "<sseinsnmode>")])
5046 (define_expand "avx2_umulv4siv4di3"
5047 [(set (match_operand:V4DI 0 "register_operand" "")
5051 (match_operand:V8SI 1 "nonimmediate_operand" "")
5052 (parallel [(const_int 0) (const_int 2)
5053 (const_int 4) (const_int 6)])))
5056 (match_operand:V8SI 2 "nonimmediate_operand" "")
5057 (parallel [(const_int 0) (const_int 2)
5058 (const_int 4) (const_int 6)])))))]
5060 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5062 (define_insn "*avx_umulv4siv4di3"
5063 [(set (match_operand:V4DI 0 "register_operand" "=x")
5067 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5068 (parallel [(const_int 0) (const_int 2)
5069 (const_int 4) (const_int 6)])))
5072 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5073 (parallel [(const_int 0) (const_int 2)
5074 (const_int 4) (const_int 6)])))))]
5075 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5076 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5077 [(set_attr "type" "sseimul")
5078 (set_attr "prefix" "vex")
5079 (set_attr "mode" "OI")])
5081 (define_expand "sse2_umulv2siv2di3"
5082 [(set (match_operand:V2DI 0 "register_operand" "")
5086 (match_operand:V4SI 1 "nonimmediate_operand" "")
5087 (parallel [(const_int 0) (const_int 2)])))
5090 (match_operand:V4SI 2 "nonimmediate_operand" "")
5091 (parallel [(const_int 0) (const_int 2)])))))]
5093 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5095 (define_insn "*sse2_umulv2siv2di3"
5096 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5100 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5101 (parallel [(const_int 0) (const_int 2)])))
5104 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5105 (parallel [(const_int 0) (const_int 2)])))))]
5106 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5108 pmuludq\t{%2, %0|%0, %2}
5109 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5110 [(set_attr "isa" "noavx,avx")
5111 (set_attr "type" "sseimul")
5112 (set_attr "prefix_data16" "1,*")
5113 (set_attr "prefix" "orig,vex")
5114 (set_attr "mode" "TI")])
5116 (define_expand "avx2_mulv4siv4di3"
5117 [(set (match_operand:V4DI 0 "register_operand" "")
5121 (match_operand:V8SI 1 "nonimmediate_operand" "")
5122 (parallel [(const_int 0) (const_int 2)
5123 (const_int 4) (const_int 6)])))
5126 (match_operand:V8SI 2 "nonimmediate_operand" "")
5127 (parallel [(const_int 0) (const_int 2)
5128 (const_int 4) (const_int 6)])))))]
5130 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5132 (define_insn "*avx2_mulv4siv4di3"
5133 [(set (match_operand:V4DI 0 "register_operand" "=x")
5137 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5138 (parallel [(const_int 0) (const_int 2)
5139 (const_int 4) (const_int 6)])))
5142 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5143 (parallel [(const_int 0) (const_int 2)
5144 (const_int 4) (const_int 6)])))))]
5145 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5146 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5147 [(set_attr "isa" "avx")
5148 (set_attr "type" "sseimul")
5149 (set_attr "prefix_extra" "1")
5150 (set_attr "prefix" "vex")
5151 (set_attr "mode" "OI")])
5153 (define_expand "sse4_1_mulv2siv2di3"
5154 [(set (match_operand:V2DI 0 "register_operand" "")
5158 (match_operand:V4SI 1 "nonimmediate_operand" "")
5159 (parallel [(const_int 0) (const_int 2)])))
5162 (match_operand:V4SI 2 "nonimmediate_operand" "")
5163 (parallel [(const_int 0) (const_int 2)])))))]
5165 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5167 (define_insn "*sse4_1_mulv2siv2di3"
5168 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5172 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5173 (parallel [(const_int 0) (const_int 2)])))
5176 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5177 (parallel [(const_int 0) (const_int 2)])))))]
5178 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5180 pmuldq\t{%2, %0|%0, %2}
5181 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5182 [(set_attr "isa" "noavx,avx")
5183 (set_attr "type" "sseimul")
5184 (set_attr "prefix_data16" "1,*")
5185 (set_attr "prefix_extra" "1")
5186 (set_attr "prefix" "orig,vex")
5187 (set_attr "mode" "TI")])
5189 (define_expand "avx2_pmaddwd"
5190 [(set (match_operand:V8SI 0 "register_operand" "")
5195 (match_operand:V16HI 1 "nonimmediate_operand" "")
5196 (parallel [(const_int 0)
5206 (match_operand:V16HI 2 "nonimmediate_operand" "")
5207 (parallel [(const_int 0)
5217 (vec_select:V8HI (match_dup 1)
5218 (parallel [(const_int 1)
5227 (vec_select:V8HI (match_dup 2)
5228 (parallel [(const_int 1)
5235 (const_int 15)]))))))]
5237 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5239 (define_expand "sse2_pmaddwd"
5240 [(set (match_operand:V4SI 0 "register_operand" "")
5245 (match_operand:V8HI 1 "nonimmediate_operand" "")
5246 (parallel [(const_int 0)
5252 (match_operand:V8HI 2 "nonimmediate_operand" "")
5253 (parallel [(const_int 0)
5259 (vec_select:V4HI (match_dup 1)
5260 (parallel [(const_int 1)
5265 (vec_select:V4HI (match_dup 2)
5266 (parallel [(const_int 1)
5269 (const_int 7)]))))))]
5271 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5273 (define_insn "*avx2_pmaddwd"
5274 [(set (match_operand:V8SI 0 "register_operand" "=x")
5279 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5280 (parallel [(const_int 0)
5290 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5291 (parallel [(const_int 0)
5301 (vec_select:V8HI (match_dup 1)
5302 (parallel [(const_int 1)
5311 (vec_select:V8HI (match_dup 2)
5312 (parallel [(const_int 1)
5319 (const_int 15)]))))))]
5320 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5321 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5322 [(set_attr "type" "sseiadd")
5323 (set_attr "prefix" "vex")
5324 (set_attr "mode" "OI")])
5326 (define_insn "*sse2_pmaddwd"
5327 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5332 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5333 (parallel [(const_int 0)
5339 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5340 (parallel [(const_int 0)
5346 (vec_select:V4HI (match_dup 1)
5347 (parallel [(const_int 1)
5352 (vec_select:V4HI (match_dup 2)
5353 (parallel [(const_int 1)
5356 (const_int 7)]))))))]
5357 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5359 pmaddwd\t{%2, %0|%0, %2}
5360 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5361 [(set_attr "isa" "noavx,avx")
5362 (set_attr "type" "sseiadd")
5363 (set_attr "atom_unit" "simul")
5364 (set_attr "prefix_data16" "1,*")
5365 (set_attr "prefix" "orig,vex")
5366 (set_attr "mode" "TI")])
5368 (define_expand "mul<mode>3"
5369 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5370 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5371 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5374 if (TARGET_SSE4_1 || TARGET_AVX)
5375 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5378 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5379 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5380 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5381 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5382 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5384 pmulld\t{%2, %0|%0, %2}
5385 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5386 [(set_attr "isa" "noavx,avx")
5387 (set_attr "type" "sseimul")
5388 (set_attr "prefix_extra" "1")
5389 (set_attr "prefix" "orig,vex")
5390 (set_attr "mode" "<sseinsnmode>")])
5392 (define_insn_and_split "*sse2_mulv4si3"
5393 [(set (match_operand:V4SI 0 "register_operand" "")
5394 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5395 (match_operand:V4SI 2 "register_operand" "")))]
5396 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5397 && can_create_pseudo_p ()"
5402 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5408 t1 = gen_reg_rtx (V4SImode);
5409 t2 = gen_reg_rtx (V4SImode);
5410 t3 = gen_reg_rtx (V4SImode);
5411 t4 = gen_reg_rtx (V4SImode);
5412 t5 = gen_reg_rtx (V4SImode);
5413 t6 = gen_reg_rtx (V4SImode);
5414 thirtytwo = GEN_INT (32);
5416 /* Multiply elements 2 and 0. */
5417 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5420 /* Shift both input vectors down one element, so that elements 3
5421 and 1 are now in the slots for elements 2 and 0. For K8, at
5422 least, this is faster than using a shuffle. */
5423 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5424 gen_lowpart (V1TImode, op1),
5426 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5427 gen_lowpart (V1TImode, op2),
5429 /* Multiply elements 3 and 1. */
5430 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5433 /* Move the results in element 2 down to element 1; we don't care
5434 what goes in elements 2 and 3. */
5435 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5436 const0_rtx, const0_rtx));
5437 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5438 const0_rtx, const0_rtx));
5440 /* Merge the parts back together. */
5441 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5443 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5444 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5448 (define_insn_and_split "mul<mode>3"
5449 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5450 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5451 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5453 && can_create_pseudo_p ()"
5458 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5465 if (TARGET_XOP && <MODE>mode == V2DImode)
5467 /* op1: A,B,C,D, op2: E,F,G,H */
5468 op1 = gen_lowpart (V4SImode, op1);
5469 op2 = gen_lowpart (V4SImode, op2);
5471 t1 = gen_reg_rtx (V4SImode);
5472 t2 = gen_reg_rtx (V4SImode);
5473 t3 = gen_reg_rtx (V2DImode);
5474 t4 = gen_reg_rtx (V2DImode);
5477 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5483 /* t2: (B*E),(A*F),(D*G),(C*H) */
5484 emit_insn (gen_mulv4si3 (t2, t1, op2));
5486 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5487 emit_insn (gen_xop_phadddq (t3, t2));
5489 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5490 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5492 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5493 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5497 t1 = gen_reg_rtx (<MODE>mode);
5498 t2 = gen_reg_rtx (<MODE>mode);
5499 t3 = gen_reg_rtx (<MODE>mode);
5500 t4 = gen_reg_rtx (<MODE>mode);
5501 t5 = gen_reg_rtx (<MODE>mode);
5502 t6 = gen_reg_rtx (<MODE>mode);
5503 thirtytwo = GEN_INT (32);
5505 /* Multiply low parts. */
5506 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5507 (t1, gen_lowpart (<ssepackmode>mode, op1),
5508 gen_lowpart (<ssepackmode>mode, op2)));
5510 /* Shift input vectors right 32 bits so we can multiply high parts. */
5511 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5512 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5514 /* Multiply high parts by low parts. */
5515 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5516 (t4, gen_lowpart (<ssepackmode>mode, op1),
5517 gen_lowpart (<ssepackmode>mode, t3)));
5518 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5519 (t5, gen_lowpart (<ssepackmode>mode, op2),
5520 gen_lowpart (<ssepackmode>mode, t2)));
5522 /* Shift them back. */
5523 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5524 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5526 /* Add the three parts together. */
5527 emit_insn (gen_add<mode>3 (t6, t1, t4));
5528 emit_insn (gen_add<mode>3 (op0, t6, t5));
5531 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5532 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5536 (define_expand "vec_widen_<s>mult_hi_<mode>"
5537 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5538 (any_extend:<sseunpackmode>
5539 (match_operand:VI2_AVX2 1 "register_operand" ""))
5540 (match_operand:VI2_AVX2 2 "register_operand" "")]
5543 rtx op1, op2, t1, t2, dest;
5547 t1 = gen_reg_rtx (<MODE>mode);
5548 t2 = gen_reg_rtx (<MODE>mode);
5549 dest = gen_lowpart (<MODE>mode, operands[0]);
5551 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5552 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5553 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5557 (define_expand "vec_widen_<s>mult_lo_<mode>"
5558 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5559 (any_extend:<sseunpackmode>
5560 (match_operand:VI2_AVX2 1 "register_operand" ""))
5561 (match_operand:VI2_AVX2 2 "register_operand" "")]
5564 rtx op1, op2, t1, t2, dest;
5568 t1 = gen_reg_rtx (<MODE>mode);
5569 t2 = gen_reg_rtx (<MODE>mode);
5570 dest = gen_lowpart (<MODE>mode, operands[0]);
5572 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5573 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5574 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5578 (define_expand "vec_widen_<s>mult_hi_v8si"
5579 [(match_operand:V4DI 0 "register_operand" "")
5580 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5581 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5586 t1 = gen_reg_rtx (V4DImode);
5587 t2 = gen_reg_rtx (V4DImode);
5588 t3 = gen_reg_rtx (V8SImode);
5589 t4 = gen_reg_rtx (V8SImode);
5590 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5591 const0_rtx, const2_rtx,
5592 const1_rtx, GEN_INT (3)));
5593 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5594 const0_rtx, const2_rtx,
5595 const1_rtx, GEN_INT (3)));
5596 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5597 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5598 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5599 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5600 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5604 (define_expand "vec_widen_<s>mult_lo_v8si"
5605 [(match_operand:V4DI 0 "register_operand" "")
5606 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5607 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5612 t1 = gen_reg_rtx (V4DImode);
5613 t2 = gen_reg_rtx (V4DImode);
5614 t3 = gen_reg_rtx (V8SImode);
5615 t4 = gen_reg_rtx (V8SImode);
5616 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5617 const0_rtx, const2_rtx,
5618 const1_rtx, GEN_INT (3)));
5619 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5620 const0_rtx, const2_rtx,
5621 const1_rtx, GEN_INT (3)));
5622 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5623 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5624 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5625 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5626 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5630 (define_expand "vec_widen_smult_hi_v4si"
5631 [(match_operand:V2DI 0 "register_operand" "")
5632 (match_operand:V4SI 1 "register_operand" "")
5633 (match_operand:V4SI 2 "register_operand" "")]
5636 rtx op1, op2, t1, t2;
5640 t1 = gen_reg_rtx (V4SImode);
5641 t2 = gen_reg_rtx (V4SImode);
5645 rtx t3 = gen_reg_rtx (V2DImode);
5647 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5648 GEN_INT (1), GEN_INT (3)));
5649 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5650 GEN_INT (1), GEN_INT (3)));
5651 emit_move_insn (t3, CONST0_RTX (V2DImode));
5653 emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
5657 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5658 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5659 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5663 (define_expand "vec_widen_smult_lo_v4si"
5664 [(match_operand:V2DI 0 "register_operand" "")
5665 (match_operand:V4SI 1 "register_operand" "")
5666 (match_operand:V4SI 2 "register_operand" "")]
5669 rtx op1, op2, t1, t2;
5673 t1 = gen_reg_rtx (V4SImode);
5674 t2 = gen_reg_rtx (V4SImode);
5678 rtx t3 = gen_reg_rtx (V2DImode);
5680 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5681 GEN_INT (1), GEN_INT (3)));
5682 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5683 GEN_INT (1), GEN_INT (3)));
5684 emit_move_insn (t3, CONST0_RTX (V2DImode));
5686 emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
5690 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5691 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5692 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5696 (define_expand "vec_widen_umult_hi_v4si"
5697 [(match_operand:V2DI 0 "register_operand" "")
5698 (match_operand:V4SI 1 "register_operand" "")
5699 (match_operand:V4SI 2 "register_operand" "")]
5702 rtx op1, op2, t1, t2;
5706 t1 = gen_reg_rtx (V4SImode);
5707 t2 = gen_reg_rtx (V4SImode);
5709 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5710 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5711 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5715 (define_expand "vec_widen_umult_lo_v4si"
5716 [(match_operand:V2DI 0 "register_operand" "")
5717 (match_operand:V4SI 1 "register_operand" "")
5718 (match_operand:V4SI 2 "register_operand" "")]
5721 rtx op1, op2, t1, t2;
5725 t1 = gen_reg_rtx (V4SImode);
5726 t2 = gen_reg_rtx (V4SImode);
5728 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5729 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5730 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5734 (define_expand "sdot_prod<mode>"
5735 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5736 (match_operand:VI2_AVX2 1 "register_operand" "")
5737 (match_operand:VI2_AVX2 2 "register_operand" "")
5738 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5741 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5742 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5743 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5744 gen_rtx_PLUS (<sseunpackmode>mode,
5749 (define_code_attr sse2_sse4_1
5750 [(zero_extend "sse2") (sign_extend "sse4_1")])
5752 (define_expand "<s>dot_prodv4si"
5753 [(match_operand:V2DI 0 "register_operand" "")
5754 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5755 (match_operand:V4SI 2 "register_operand" "")
5756 (match_operand:V2DI 3 "register_operand" "")]
5757 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5761 t1 = gen_reg_rtx (V2DImode);
5762 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5763 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5765 t2 = gen_reg_rtx (V4SImode);
5766 t3 = gen_reg_rtx (V4SImode);
5767 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5768 gen_lowpart (V1TImode, operands[1]),
5770 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5771 gen_lowpart (V1TImode, operands[2]),
5774 t4 = gen_reg_rtx (V2DImode);
5775 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5777 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5781 (define_expand "<s>dot_prodv8si"
5782 [(match_operand:V4DI 0 "register_operand" "")
5783 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5784 (match_operand:V8SI 2 "register_operand" "")
5785 (match_operand:V4DI 3 "register_operand" "")]
5790 t1 = gen_reg_rtx (V4DImode);
5791 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5792 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5794 t2 = gen_reg_rtx (V8SImode);
5795 t3 = gen_reg_rtx (V8SImode);
5796 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5797 gen_lowpart (V2TImode, operands[1]),
5799 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5800 gen_lowpart (V2TImode, operands[2]),
5803 t4 = gen_reg_rtx (V4DImode);
5804 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5806 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5810 (define_insn "ashr<mode>3"
5811 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5813 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5814 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5817 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5818 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5819 [(set_attr "isa" "noavx,avx")
5820 (set_attr "type" "sseishft")
5821 (set (attr "length_immediate")
5822 (if_then_else (match_operand 2 "const_int_operand" "")
5824 (const_string "0")))
5825 (set_attr "prefix_data16" "1,*")
5826 (set_attr "prefix" "orig,vex")
5827 (set_attr "mode" "<sseinsnmode>")])
5829 (define_insn "<shift_insn><mode>3"
5830 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5831 (any_lshift:VI248_AVX2
5832 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5833 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5836 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5837 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5838 [(set_attr "isa" "noavx,avx")
5839 (set_attr "type" "sseishft")
5840 (set (attr "length_immediate")
5841 (if_then_else (match_operand 2 "const_int_operand" "")
5843 (const_string "0")))
5844 (set_attr "prefix_data16" "1,*")
5845 (set_attr "prefix" "orig,vex")
5846 (set_attr "mode" "<sseinsnmode>")])
5848 (define_expand "vec_shl_<mode>"
5849 [(set (match_operand:VI_128 0 "register_operand" "")
5851 (match_operand:VI_128 1 "register_operand" "")
5852 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5855 operands[0] = gen_lowpart (V1TImode, operands[0]);
5856 operands[1] = gen_lowpart (V1TImode, operands[1]);
5859 (define_insn "<sse2_avx2>_ashl<mode>3"
5860 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5862 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5863 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5866 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5868 switch (which_alternative)
5871 return "pslldq\t{%2, %0|%0, %2}";
5873 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5878 [(set_attr "isa" "noavx,avx")
5879 (set_attr "type" "sseishft")
5880 (set_attr "length_immediate" "1")
5881 (set_attr "prefix_data16" "1,*")
5882 (set_attr "prefix" "orig,vex")
5883 (set_attr "mode" "<sseinsnmode>")])
5885 (define_expand "vec_shr_<mode>"
5886 [(set (match_operand:VI_128 0 "register_operand" "")
5888 (match_operand:VI_128 1 "register_operand" "")
5889 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5892 operands[0] = gen_lowpart (V1TImode, operands[0]);
5893 operands[1] = gen_lowpart (V1TImode, operands[1]);
5896 (define_insn "<sse2_avx2>_lshr<mode>3"
5897 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5898 (lshiftrt:VIMAX_AVX2
5899 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5900 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5903 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5905 switch (which_alternative)
5908 return "psrldq\t{%2, %0|%0, %2}";
5910 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5915 [(set_attr "isa" "noavx,avx")
5916 (set_attr "type" "sseishft")
5917 (set_attr "length_immediate" "1")
5918 (set_attr "atom_unit" "sishuf")
5919 (set_attr "prefix_data16" "1,*")
5920 (set_attr "prefix" "orig,vex")
5921 (set_attr "mode" "<sseinsnmode>")])
5924 (define_expand "<code><mode>3"
5925 [(set (match_operand:VI124_256 0 "register_operand" "")
5927 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5928 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5930 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5932 (define_insn "*avx2_<code><mode>3"
5933 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5935 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5936 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5937 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5938 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5939 [(set_attr "type" "sseiadd")
5940 (set_attr "prefix_extra" "1")
5941 (set_attr "prefix" "vex")
5942 (set_attr "mode" "OI")])
5944 (define_expand "<code><mode>3"
5945 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5947 (match_operand:VI8_AVX2 1 "register_operand" "")
5948 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5955 xops[0] = operands[0];
5957 if (<CODE> == SMAX || <CODE> == UMAX)
5959 xops[1] = operands[1];
5960 xops[2] = operands[2];
5964 xops[1] = operands[2];
5965 xops[2] = operands[1];
5968 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5970 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5971 xops[4] = operands[1];
5972 xops[5] = operands[2];
5974 ok = ix86_expand_int_vcond (xops);
5979 (define_expand "<code><mode>3"
5980 [(set (match_operand:VI124_128 0 "register_operand" "")
5982 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5983 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5986 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5987 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5993 xops[0] = operands[0];
5994 operands[1] = force_reg (<MODE>mode, operands[1]);
5995 operands[2] = force_reg (<MODE>mode, operands[2]);
5999 xops[1] = operands[1];
6000 xops[2] = operands[2];
6004 xops[1] = operands[2];
6005 xops[2] = operands[1];
6008 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6009 xops[4] = operands[1];
6010 xops[5] = operands[2];
6012 ok = ix86_expand_int_vcond (xops);
6018 (define_insn "*sse4_1_<code><mode>3"
6019 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6021 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6022 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6023 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6025 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6026 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6027 [(set_attr "isa" "noavx,avx")
6028 (set_attr "type" "sseiadd")
6029 (set_attr "prefix_extra" "1,*")
6030 (set_attr "prefix" "orig,vex")
6031 (set_attr "mode" "TI")])
6033 (define_insn "*<code>v8hi3"
6034 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6036 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6037 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6038 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6040 p<maxmin_int>w\t{%2, %0|%0, %2}
6041 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6042 [(set_attr "isa" "noavx,avx")
6043 (set_attr "type" "sseiadd")
6044 (set_attr "prefix_data16" "1,*")
6045 (set_attr "prefix_extra" "*,1")
6046 (set_attr "prefix" "orig,vex")
6047 (set_attr "mode" "TI")])
6049 (define_expand "<code><mode>3"
6050 [(set (match_operand:VI124_128 0 "register_operand" "")
6052 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6053 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6056 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6057 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6058 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6060 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6061 operands[1] = force_reg (<MODE>mode, operands[1]);
6062 if (rtx_equal_p (op3, op2))
6063 op3 = gen_reg_rtx (V8HImode);
6064 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6065 emit_insn (gen_addv8hi3 (op0, op3, op2));
6073 operands[1] = force_reg (<MODE>mode, operands[1]);
6074 operands[2] = force_reg (<MODE>mode, operands[2]);
6076 xops[0] = operands[0];
6080 xops[1] = operands[1];
6081 xops[2] = operands[2];
6085 xops[1] = operands[2];
6086 xops[2] = operands[1];
6089 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6090 xops[4] = operands[1];
6091 xops[5] = operands[2];
6093 ok = ix86_expand_int_vcond (xops);
6099 (define_insn "*sse4_1_<code><mode>3"
6100 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6102 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6103 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6104 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6106 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6107 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6108 [(set_attr "isa" "noavx,avx")
6109 (set_attr "type" "sseiadd")
6110 (set_attr "prefix_extra" "1,*")
6111 (set_attr "prefix" "orig,vex")
6112 (set_attr "mode" "TI")])
6114 (define_insn "*<code>v16qi3"
6115 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6117 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6118 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6119 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6121 p<maxmin_int>b\t{%2, %0|%0, %2}
6122 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6123 [(set_attr "isa" "noavx,avx")
6124 (set_attr "type" "sseiadd")
6125 (set_attr "prefix_data16" "1,*")
6126 (set_attr "prefix_extra" "*,1")
6127 (set_attr "prefix" "orig,vex")
6128 (set_attr "mode" "TI")])
6130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6132 ;; Parallel integral comparisons
6134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6136 (define_expand "avx2_eq<mode>3"
6137 [(set (match_operand:VI_256 0 "register_operand" "")
6139 (match_operand:VI_256 1 "nonimmediate_operand" "")
6140 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6142 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6144 (define_insn "*avx2_eq<mode>3"
6145 [(set (match_operand:VI_256 0 "register_operand" "=x")
6147 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6148 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6149 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6150 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6151 [(set_attr "type" "ssecmp")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "prefix" "vex")
6154 (set_attr "mode" "OI")])
6156 (define_insn "*sse4_1_eqv2di3"
6157 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6159 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6160 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6161 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6163 pcmpeqq\t{%2, %0|%0, %2}
6164 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6165 [(set_attr "isa" "noavx,avx")
6166 (set_attr "type" "ssecmp")
6167 (set_attr "prefix_extra" "1")
6168 (set_attr "prefix" "orig,vex")
6169 (set_attr "mode" "TI")])
6171 (define_insn "*sse2_eq<mode>3"
6172 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6174 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6175 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6176 "TARGET_SSE2 && !TARGET_XOP
6177 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6179 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6180 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6181 [(set_attr "isa" "noavx,avx")
6182 (set_attr "type" "ssecmp")
6183 (set_attr "prefix_data16" "1,*")
6184 (set_attr "prefix" "orig,vex")
6185 (set_attr "mode" "TI")])
6187 (define_expand "sse2_eq<mode>3"
6188 [(set (match_operand:VI124_128 0 "register_operand" "")
6190 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6191 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6192 "TARGET_SSE2 && !TARGET_XOP "
6193 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6195 (define_expand "sse4_1_eqv2di3"
6196 [(set (match_operand:V2DI 0 "register_operand" "")
6198 (match_operand:V2DI 1 "nonimmediate_operand" "")
6199 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6201 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6203 (define_insn "sse4_2_gtv2di3"
6204 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6206 (match_operand:V2DI 1 "register_operand" "0,x")
6207 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6210 pcmpgtq\t{%2, %0|%0, %2}
6211 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6212 [(set_attr "isa" "noavx,avx")
6213 (set_attr "type" "ssecmp")
6214 (set_attr "prefix_extra" "1")
6215 (set_attr "prefix" "orig,vex")
6216 (set_attr "mode" "TI")])
6218 (define_insn "avx2_gt<mode>3"
6219 [(set (match_operand:VI_256 0 "register_operand" "=x")
6221 (match_operand:VI_256 1 "register_operand" "x")
6222 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6224 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6225 [(set_attr "type" "ssecmp")
6226 (set_attr "prefix_extra" "1")
6227 (set_attr "prefix" "vex")
6228 (set_attr "mode" "OI")])
6230 (define_insn "sse2_gt<mode>3"
6231 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6233 (match_operand:VI124_128 1 "register_operand" "0,x")
6234 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6235 "TARGET_SSE2 && !TARGET_XOP"
6237 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6238 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6239 [(set_attr "isa" "noavx,avx")
6240 (set_attr "type" "ssecmp")
6241 (set_attr "prefix_data16" "1,*")
6242 (set_attr "prefix" "orig,vex")
6243 (set_attr "mode" "TI")])
6245 (define_expand "vcond<V_256:mode><VI_256:mode>"
6246 [(set (match_operand:V_256 0 "register_operand" "")
6248 (match_operator 3 ""
6249 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6250 (match_operand:VI_256 5 "general_operand" "")])
6251 (match_operand:V_256 1 "" "")
6252 (match_operand:V_256 2 "" "")))]
6254 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6255 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6257 bool ok = ix86_expand_int_vcond (operands);
6262 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6263 [(set (match_operand:V_128 0 "register_operand" "")
6265 (match_operator 3 ""
6266 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6267 (match_operand:VI124_128 5 "general_operand" "")])
6268 (match_operand:V_128 1 "" "")
6269 (match_operand:V_128 2 "" "")))]
6271 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6272 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6274 bool ok = ix86_expand_int_vcond (operands);
6279 (define_expand "vcond<VI8F_128:mode>v2di"
6280 [(set (match_operand:VI8F_128 0 "register_operand" "")
6281 (if_then_else:VI8F_128
6282 (match_operator 3 ""
6283 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6284 (match_operand:V2DI 5 "general_operand" "")])
6285 (match_operand:VI8F_128 1 "" "")
6286 (match_operand:VI8F_128 2 "" "")))]
6289 bool ok = ix86_expand_int_vcond (operands);
6294 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6295 [(set (match_operand:V_256 0 "register_operand" "")
6297 (match_operator 3 ""
6298 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6299 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6300 (match_operand:V_256 1 "general_operand" "")
6301 (match_operand:V_256 2 "general_operand" "")))]
6303 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6304 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6306 bool ok = ix86_expand_int_vcond (operands);
6311 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6312 [(set (match_operand:V_128 0 "register_operand" "")
6314 (match_operator 3 ""
6315 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6316 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6317 (match_operand:V_128 1 "general_operand" "")
6318 (match_operand:V_128 2 "general_operand" "")))]
6320 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6321 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6323 bool ok = ix86_expand_int_vcond (operands);
6328 (define_expand "vcondu<VI8F_128:mode>v2di"
6329 [(set (match_operand:VI8F_128 0 "register_operand" "")
6330 (if_then_else:VI8F_128
6331 (match_operator 3 ""
6332 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6333 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6334 (match_operand:VI8F_128 1 "general_operand" "")
6335 (match_operand:VI8F_128 2 "general_operand" "")))]
6338 bool ok = ix86_expand_int_vcond (operands);
6343 (define_mode_iterator VEC_PERM_AVX2
6344 [V16QI V8HI V4SI V2DI V4SF V2DF
6345 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6346 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6347 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6349 (define_expand "vec_perm<mode>"
6350 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6351 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6352 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6353 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6354 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6356 ix86_expand_vec_perm (operands);
6360 (define_mode_iterator VEC_PERM_CONST
6361 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6362 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6363 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6364 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6365 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6366 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6368 (define_expand "vec_perm_const<mode>"
6369 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6370 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6371 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6372 (match_operand:<sseintvecmode> 3 "" "")]
6375 if (ix86_expand_vec_perm_const (operands))
6381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6383 ;; Parallel bitwise logical operations
6385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6387 (define_expand "one_cmpl<mode>2"
6388 [(set (match_operand:VI 0 "register_operand" "")
6389 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6393 int i, n = GET_MODE_NUNITS (<MODE>mode);
6394 rtvec v = rtvec_alloc (n);
6396 for (i = 0; i < n; ++i)
6397 RTVEC_ELT (v, i) = constm1_rtx;
6399 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6402 (define_expand "<sse2_avx2>_andnot<mode>3"
6403 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6405 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6406 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6409 (define_insn "*andnot<mode>3"
6410 [(set (match_operand:VI 0 "register_operand" "=x,x")
6412 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6413 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6416 static char buf[32];
6420 switch (get_attr_mode (insn))
6423 gcc_assert (TARGET_AVX2);
6425 gcc_assert (TARGET_SSE2);
6431 gcc_assert (TARGET_AVX);
6433 gcc_assert (TARGET_SSE);
6442 switch (which_alternative)
6445 ops = "%s\t{%%2, %%0|%%0, %%2}";
6448 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6454 snprintf (buf, sizeof (buf), ops, tmp);
6457 [(set_attr "isa" "noavx,avx")
6458 (set_attr "type" "sselog")
6459 (set (attr "prefix_data16")
6461 (and (eq_attr "alternative" "0")
6462 (eq_attr "mode" "TI"))
6464 (const_string "*")))
6465 (set_attr "prefix" "orig,vex")
6467 (cond [(and (not (match_test "TARGET_AVX2"))
6468 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6469 (const_string "V8SF")
6470 (not (match_test "TARGET_SSE2"))
6471 (const_string "V4SF")
6473 (const_string "<sseinsnmode>")))])
6475 (define_expand "<code><mode>3"
6476 [(set (match_operand:VI 0 "register_operand" "")
6478 (match_operand:VI 1 "nonimmediate_operand" "")
6479 (match_operand:VI 2 "nonimmediate_operand" "")))]
6481 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6483 (define_insn "*<code><mode>3"
6484 [(set (match_operand:VI 0 "register_operand" "=x,x")
6486 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6487 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6489 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6491 static char buf[32];
6495 switch (get_attr_mode (insn))
6498 gcc_assert (TARGET_AVX2);
6500 gcc_assert (TARGET_SSE2);
6506 gcc_assert (TARGET_AVX);
6508 gcc_assert (TARGET_SSE);
6517 switch (which_alternative)
6520 ops = "%s\t{%%2, %%0|%%0, %%2}";
6523 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6529 snprintf (buf, sizeof (buf), ops, tmp);
6532 [(set_attr "isa" "noavx,avx")
6533 (set_attr "type" "sselog")
6534 (set (attr "prefix_data16")
6536 (and (eq_attr "alternative" "0")
6537 (eq_attr "mode" "TI"))
6539 (const_string "*")))
6540 (set_attr "prefix" "orig,vex")
6542 (cond [(and (not (match_test "TARGET_AVX2"))
6543 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6544 (const_string "V8SF")
6545 (not (match_test "TARGET_SSE2"))
6546 (const_string "V4SF")
6548 (const_string "<sseinsnmode>")))])
6550 (define_insn "*andnottf3"
6551 [(set (match_operand:TF 0 "register_operand" "=x,x")
6553 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6554 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6557 pandn\t{%2, %0|%0, %2}
6558 vpandn\t{%2, %1, %0|%0, %1, %2}"
6559 [(set_attr "isa" "noavx,avx")
6560 (set_attr "type" "sselog")
6561 (set_attr "prefix_data16" "1,*")
6562 (set_attr "prefix" "orig,vex")
6563 (set_attr "mode" "TI")])
6565 (define_expand "<code>tf3"
6566 [(set (match_operand:TF 0 "register_operand" "")
6568 (match_operand:TF 1 "nonimmediate_operand" "")
6569 (match_operand:TF 2 "nonimmediate_operand" "")))]
6571 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6573 (define_insn "*<code>tf3"
6574 [(set (match_operand:TF 0 "register_operand" "=x,x")
6576 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6577 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6579 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6581 p<logic>\t{%2, %0|%0, %2}
6582 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6583 [(set_attr "isa" "noavx,avx")
6584 (set_attr "type" "sselog")
6585 (set_attr "prefix_data16" "1,*")
6586 (set_attr "prefix" "orig,vex")
6587 (set_attr "mode" "TI")])
6589 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6591 ;; Parallel integral element swizzling
6593 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6595 (define_expand "vec_pack_trunc_<mode>"
6596 [(match_operand:<ssepackmode> 0 "register_operand" "")
6597 (match_operand:VI248_AVX2 1 "register_operand" "")
6598 (match_operand:VI248_AVX2 2 "register_operand" "")]
6601 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6602 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6603 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6607 (define_insn "<sse2_avx2>_packsswb"
6608 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6609 (vec_concat:VI1_AVX2
6610 (ss_truncate:<ssehalfvecmode>
6611 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6612 (ss_truncate:<ssehalfvecmode>
6613 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6616 packsswb\t{%2, %0|%0, %2}
6617 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6618 [(set_attr "isa" "noavx,avx")
6619 (set_attr "type" "sselog")
6620 (set_attr "prefix_data16" "1,*")
6621 (set_attr "prefix" "orig,vex")
6622 (set_attr "mode" "<sseinsnmode>")])
6624 (define_insn "<sse2_avx2>_packssdw"
6625 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6626 (vec_concat:VI2_AVX2
6627 (ss_truncate:<ssehalfvecmode>
6628 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6629 (ss_truncate:<ssehalfvecmode>
6630 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6633 packssdw\t{%2, %0|%0, %2}
6634 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6635 [(set_attr "isa" "noavx,avx")
6636 (set_attr "type" "sselog")
6637 (set_attr "prefix_data16" "1,*")
6638 (set_attr "prefix" "orig,vex")
6639 (set_attr "mode" "<sseinsnmode>")])
6641 (define_insn "<sse2_avx2>_packuswb"
6642 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6643 (vec_concat:VI1_AVX2
6644 (us_truncate:<ssehalfvecmode>
6645 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6646 (us_truncate:<ssehalfvecmode>
6647 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6650 packuswb\t{%2, %0|%0, %2}
6651 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6652 [(set_attr "isa" "noavx,avx")
6653 (set_attr "type" "sselog")
6654 (set_attr "prefix_data16" "1,*")
6655 (set_attr "prefix" "orig,vex")
6656 (set_attr "mode" "<sseinsnmode>")])
6658 (define_insn "avx2_interleave_highv32qi"
6659 [(set (match_operand:V32QI 0 "register_operand" "=x")
6662 (match_operand:V32QI 1 "register_operand" "x")
6663 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6664 (parallel [(const_int 8) (const_int 40)
6665 (const_int 9) (const_int 41)
6666 (const_int 10) (const_int 42)
6667 (const_int 11) (const_int 43)
6668 (const_int 12) (const_int 44)
6669 (const_int 13) (const_int 45)
6670 (const_int 14) (const_int 46)
6671 (const_int 15) (const_int 47)
6672 (const_int 24) (const_int 56)
6673 (const_int 25) (const_int 57)
6674 (const_int 26) (const_int 58)
6675 (const_int 27) (const_int 59)
6676 (const_int 28) (const_int 60)
6677 (const_int 29) (const_int 61)
6678 (const_int 30) (const_int 62)
6679 (const_int 31) (const_int 63)])))]
6681 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6682 [(set_attr "type" "sselog")
6683 (set_attr "prefix" "vex")
6684 (set_attr "mode" "OI")])
6686 (define_insn "vec_interleave_highv16qi"
6687 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6690 (match_operand:V16QI 1 "register_operand" "0,x")
6691 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6692 (parallel [(const_int 8) (const_int 24)
6693 (const_int 9) (const_int 25)
6694 (const_int 10) (const_int 26)
6695 (const_int 11) (const_int 27)
6696 (const_int 12) (const_int 28)
6697 (const_int 13) (const_int 29)
6698 (const_int 14) (const_int 30)
6699 (const_int 15) (const_int 31)])))]
6702 punpckhbw\t{%2, %0|%0, %2}
6703 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6704 [(set_attr "isa" "noavx,avx")
6705 (set_attr "type" "sselog")
6706 (set_attr "prefix_data16" "1,*")
6707 (set_attr "prefix" "orig,vex")
6708 (set_attr "mode" "TI")])
6710 (define_insn "avx2_interleave_lowv32qi"
6711 [(set (match_operand:V32QI 0 "register_operand" "=x")
6714 (match_operand:V32QI 1 "register_operand" "x")
6715 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6716 (parallel [(const_int 0) (const_int 32)
6717 (const_int 1) (const_int 33)
6718 (const_int 2) (const_int 34)
6719 (const_int 3) (const_int 35)
6720 (const_int 4) (const_int 36)
6721 (const_int 5) (const_int 37)
6722 (const_int 6) (const_int 38)
6723 (const_int 7) (const_int 39)
6724 (const_int 16) (const_int 48)
6725 (const_int 17) (const_int 49)
6726 (const_int 18) (const_int 50)
6727 (const_int 19) (const_int 51)
6728 (const_int 20) (const_int 52)
6729 (const_int 21) (const_int 53)
6730 (const_int 22) (const_int 54)
6731 (const_int 23) (const_int 55)])))]
6733 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6734 [(set_attr "type" "sselog")
6735 (set_attr "prefix" "vex")
6736 (set_attr "mode" "OI")])
6738 (define_insn "vec_interleave_lowv16qi"
6739 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6742 (match_operand:V16QI 1 "register_operand" "0,x")
6743 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6744 (parallel [(const_int 0) (const_int 16)
6745 (const_int 1) (const_int 17)
6746 (const_int 2) (const_int 18)
6747 (const_int 3) (const_int 19)
6748 (const_int 4) (const_int 20)
6749 (const_int 5) (const_int 21)
6750 (const_int 6) (const_int 22)
6751 (const_int 7) (const_int 23)])))]
6754 punpcklbw\t{%2, %0|%0, %2}
6755 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6756 [(set_attr "isa" "noavx,avx")
6757 (set_attr "type" "sselog")
6758 (set_attr "prefix_data16" "1,*")
6759 (set_attr "prefix" "orig,vex")
6760 (set_attr "mode" "TI")])
6762 (define_insn "avx2_interleave_highv16hi"
6763 [(set (match_operand:V16HI 0 "register_operand" "=x")
6766 (match_operand:V16HI 1 "register_operand" "x")
6767 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6768 (parallel [(const_int 4) (const_int 20)
6769 (const_int 5) (const_int 21)
6770 (const_int 6) (const_int 22)
6771 (const_int 7) (const_int 23)
6772 (const_int 12) (const_int 28)
6773 (const_int 13) (const_int 29)
6774 (const_int 14) (const_int 30)
6775 (const_int 15) (const_int 31)])))]
6777 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6778 [(set_attr "type" "sselog")
6779 (set_attr "prefix" "vex")
6780 (set_attr "mode" "OI")])
6782 (define_insn "vec_interleave_highv8hi"
6783 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6786 (match_operand:V8HI 1 "register_operand" "0,x")
6787 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6788 (parallel [(const_int 4) (const_int 12)
6789 (const_int 5) (const_int 13)
6790 (const_int 6) (const_int 14)
6791 (const_int 7) (const_int 15)])))]
6794 punpckhwd\t{%2, %0|%0, %2}
6795 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6796 [(set_attr "isa" "noavx,avx")
6797 (set_attr "type" "sselog")
6798 (set_attr "prefix_data16" "1,*")
6799 (set_attr "prefix" "orig,vex")
6800 (set_attr "mode" "TI")])
6802 (define_insn "avx2_interleave_lowv16hi"
6803 [(set (match_operand:V16HI 0 "register_operand" "=x")
6806 (match_operand:V16HI 1 "register_operand" "x")
6807 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6808 (parallel [(const_int 0) (const_int 16)
6809 (const_int 1) (const_int 17)
6810 (const_int 2) (const_int 18)
6811 (const_int 3) (const_int 19)
6812 (const_int 8) (const_int 24)
6813 (const_int 9) (const_int 25)
6814 (const_int 10) (const_int 26)
6815 (const_int 11) (const_int 27)])))]
6817 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6818 [(set_attr "type" "sselog")
6819 (set_attr "prefix" "vex")
6820 (set_attr "mode" "OI")])
6822 (define_insn "vec_interleave_lowv8hi"
6823 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6826 (match_operand:V8HI 1 "register_operand" "0,x")
6827 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6828 (parallel [(const_int 0) (const_int 8)
6829 (const_int 1) (const_int 9)
6830 (const_int 2) (const_int 10)
6831 (const_int 3) (const_int 11)])))]
6834 punpcklwd\t{%2, %0|%0, %2}
6835 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6836 [(set_attr "isa" "noavx,avx")
6837 (set_attr "type" "sselog")
6838 (set_attr "prefix_data16" "1,*")
6839 (set_attr "prefix" "orig,vex")
6840 (set_attr "mode" "TI")])
6842 (define_insn "avx2_interleave_highv8si"
6843 [(set (match_operand:V8SI 0 "register_operand" "=x")
6846 (match_operand:V8SI 1 "register_operand" "x")
6847 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6848 (parallel [(const_int 2) (const_int 10)
6849 (const_int 3) (const_int 11)
6850 (const_int 6) (const_int 14)
6851 (const_int 7) (const_int 15)])))]
6853 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6854 [(set_attr "type" "sselog")
6855 (set_attr "prefix" "vex")
6856 (set_attr "mode" "OI")])
6858 (define_insn "vec_interleave_highv4si"
6859 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6862 (match_operand:V4SI 1 "register_operand" "0,x")
6863 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6864 (parallel [(const_int 2) (const_int 6)
6865 (const_int 3) (const_int 7)])))]
6868 punpckhdq\t{%2, %0|%0, %2}
6869 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6870 [(set_attr "isa" "noavx,avx")
6871 (set_attr "type" "sselog")
6872 (set_attr "prefix_data16" "1,*")
6873 (set_attr "prefix" "orig,vex")
6874 (set_attr "mode" "TI")])
6876 (define_insn "avx2_interleave_lowv8si"
6877 [(set (match_operand:V8SI 0 "register_operand" "=x")
6880 (match_operand:V8SI 1 "register_operand" "x")
6881 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6882 (parallel [(const_int 0) (const_int 8)
6883 (const_int 1) (const_int 9)
6884 (const_int 4) (const_int 12)
6885 (const_int 5) (const_int 13)])))]
6887 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6888 [(set_attr "type" "sselog")
6889 (set_attr "prefix" "vex")
6890 (set_attr "mode" "OI")])
6892 (define_insn "vec_interleave_lowv4si"
6893 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6896 (match_operand:V4SI 1 "register_operand" "0,x")
6897 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6898 (parallel [(const_int 0) (const_int 4)
6899 (const_int 1) (const_int 5)])))]
6902 punpckldq\t{%2, %0|%0, %2}
6903 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6904 [(set_attr "isa" "noavx,avx")
6905 (set_attr "type" "sselog")
6906 (set_attr "prefix_data16" "1,*")
6907 (set_attr "prefix" "orig,vex")
6908 (set_attr "mode" "TI")])
6910 (define_expand "vec_interleave_high<mode>"
6911 [(match_operand:VI_256 0 "register_operand" "=x")
6912 (match_operand:VI_256 1 "register_operand" "x")
6913 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6916 rtx t1 = gen_reg_rtx (<MODE>mode);
6917 rtx t2 = gen_reg_rtx (<MODE>mode);
6918 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6919 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6920 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6921 gen_lowpart (V4DImode, t1),
6922 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6926 (define_expand "vec_interleave_low<mode>"
6927 [(match_operand:VI_256 0 "register_operand" "=x")
6928 (match_operand:VI_256 1 "register_operand" "x")
6929 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6932 rtx t1 = gen_reg_rtx (<MODE>mode);
6933 rtx t2 = gen_reg_rtx (<MODE>mode);
6934 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6935 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6936 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6937 gen_lowpart (V4DImode, t1),
6938 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6942 ;; Modes handled by pinsr patterns.
6943 (define_mode_iterator PINSR_MODE
6944 [(V16QI "TARGET_SSE4_1") V8HI
6945 (V4SI "TARGET_SSE4_1")
6946 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6948 (define_mode_attr sse2p4_1
6949 [(V16QI "sse4_1") (V8HI "sse2")
6950 (V4SI "sse4_1") (V2DI "sse4_1")])
6952 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6953 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6954 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6955 (vec_merge:PINSR_MODE
6956 (vec_duplicate:PINSR_MODE
6957 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6958 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6959 (match_operand:SI 3 "const_int_operand" "")))]
6961 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6962 < GET_MODE_NUNITS (<MODE>mode))"
6964 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6966 switch (which_alternative)
6969 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6970 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6973 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6975 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6976 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6979 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6984 [(set_attr "isa" "noavx,noavx,avx,avx")
6985 (set_attr "type" "sselog")
6986 (set (attr "prefix_rex")
6988 (and (not (match_test "TARGET_AVX"))
6989 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6991 (const_string "*")))
6992 (set (attr "prefix_data16")
6994 (and (not (match_test "TARGET_AVX"))
6995 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6997 (const_string "*")))
6998 (set (attr "prefix_extra")
7000 (and (not (match_test "TARGET_AVX"))
7001 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7003 (const_string "1")))
7004 (set_attr "length_immediate" "1")
7005 (set_attr "prefix" "orig,orig,vex,vex")
7006 (set_attr "mode" "TI")])
7008 (define_insn "*sse4_1_pextrb_<mode>"
7009 [(set (match_operand:SWI48 0 "register_operand" "=r")
7012 (match_operand:V16QI 1 "register_operand" "x")
7013 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7015 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7016 [(set_attr "type" "sselog")
7017 (set_attr "prefix_extra" "1")
7018 (set_attr "length_immediate" "1")
7019 (set_attr "prefix" "maybe_vex")
7020 (set_attr "mode" "TI")])
7022 (define_insn "*sse4_1_pextrb_memory"
7023 [(set (match_operand:QI 0 "memory_operand" "=m")
7025 (match_operand:V16QI 1 "register_operand" "x")
7026 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7028 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7029 [(set_attr "type" "sselog")
7030 (set_attr "prefix_extra" "1")
7031 (set_attr "length_immediate" "1")
7032 (set_attr "prefix" "maybe_vex")
7033 (set_attr "mode" "TI")])
7035 (define_insn "*sse2_pextrw_<mode>"
7036 [(set (match_operand:SWI48 0 "register_operand" "=r")
7039 (match_operand:V8HI 1 "register_operand" "x")
7040 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7042 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7043 [(set_attr "type" "sselog")
7044 (set_attr "prefix_data16" "1")
7045 (set_attr "length_immediate" "1")
7046 (set_attr "prefix" "maybe_vex")
7047 (set_attr "mode" "TI")])
7049 (define_insn "*sse4_1_pextrw_memory"
7050 [(set (match_operand:HI 0 "memory_operand" "=m")
7052 (match_operand:V8HI 1 "register_operand" "x")
7053 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7055 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7056 [(set_attr "type" "sselog")
7057 (set_attr "prefix_extra" "1")
7058 (set_attr "length_immediate" "1")
7059 (set_attr "prefix" "maybe_vex")
7060 (set_attr "mode" "TI")])
7062 (define_insn "*sse4_1_pextrd"
7063 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7065 (match_operand:V4SI 1 "register_operand" "x")
7066 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7068 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7069 [(set_attr "type" "sselog")
7070 (set_attr "prefix_extra" "1")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "maybe_vex")
7073 (set_attr "mode" "TI")])
7075 (define_insn "*sse4_1_pextrd_zext"
7076 [(set (match_operand:DI 0 "register_operand" "=r")
7079 (match_operand:V4SI 1 "register_operand" "x")
7080 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7081 "TARGET_64BIT && TARGET_SSE4_1"
7082 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7083 [(set_attr "type" "sselog")
7084 (set_attr "prefix_extra" "1")
7085 (set_attr "length_immediate" "1")
7086 (set_attr "prefix" "maybe_vex")
7087 (set_attr "mode" "TI")])
7089 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7090 (define_insn "*sse4_1_pextrq"
7091 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7093 (match_operand:V2DI 1 "register_operand" "x")
7094 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7095 "TARGET_SSE4_1 && TARGET_64BIT"
7096 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7097 [(set_attr "type" "sselog")
7098 (set_attr "prefix_rex" "1")
7099 (set_attr "prefix_extra" "1")
7100 (set_attr "length_immediate" "1")
7101 (set_attr "prefix" "maybe_vex")
7102 (set_attr "mode" "TI")])
7104 (define_expand "avx2_pshufdv3"
7105 [(match_operand:V8SI 0 "register_operand" "")
7106 (match_operand:V8SI 1 "nonimmediate_operand" "")
7107 (match_operand:SI 2 "const_0_to_255_operand" "")]
7110 int mask = INTVAL (operands[2]);
7111 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7112 GEN_INT ((mask >> 0) & 3),
7113 GEN_INT ((mask >> 2) & 3),
7114 GEN_INT ((mask >> 4) & 3),
7115 GEN_INT ((mask >> 6) & 3),
7116 GEN_INT (((mask >> 0) & 3) + 4),
7117 GEN_INT (((mask >> 2) & 3) + 4),
7118 GEN_INT (((mask >> 4) & 3) + 4),
7119 GEN_INT (((mask >> 6) & 3) + 4)));
7123 (define_insn "avx2_pshufd_1"
7124 [(set (match_operand:V8SI 0 "register_operand" "=x")
7126 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7127 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7128 (match_operand 3 "const_0_to_3_operand" "")
7129 (match_operand 4 "const_0_to_3_operand" "")
7130 (match_operand 5 "const_0_to_3_operand" "")
7131 (match_operand 6 "const_4_to_7_operand" "")
7132 (match_operand 7 "const_4_to_7_operand" "")
7133 (match_operand 8 "const_4_to_7_operand" "")
7134 (match_operand 9 "const_4_to_7_operand" "")])))]
7136 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7137 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7138 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7139 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7142 mask |= INTVAL (operands[2]) << 0;
7143 mask |= INTVAL (operands[3]) << 2;
7144 mask |= INTVAL (operands[4]) << 4;
7145 mask |= INTVAL (operands[5]) << 6;
7146 operands[2] = GEN_INT (mask);
7148 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7150 [(set_attr "type" "sselog1")
7151 (set_attr "prefix" "vex")
7152 (set_attr "length_immediate" "1")
7153 (set_attr "mode" "OI")])
7155 (define_expand "sse2_pshufd"
7156 [(match_operand:V4SI 0 "register_operand" "")
7157 (match_operand:V4SI 1 "nonimmediate_operand" "")
7158 (match_operand:SI 2 "const_int_operand" "")]
7161 int mask = INTVAL (operands[2]);
7162 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7163 GEN_INT ((mask >> 0) & 3),
7164 GEN_INT ((mask >> 2) & 3),
7165 GEN_INT ((mask >> 4) & 3),
7166 GEN_INT ((mask >> 6) & 3)));
7170 (define_insn "sse2_pshufd_1"
7171 [(set (match_operand:V4SI 0 "register_operand" "=x")
7173 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7174 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7175 (match_operand 3 "const_0_to_3_operand" "")
7176 (match_operand 4 "const_0_to_3_operand" "")
7177 (match_operand 5 "const_0_to_3_operand" "")])))]
7181 mask |= INTVAL (operands[2]) << 0;
7182 mask |= INTVAL (operands[3]) << 2;
7183 mask |= INTVAL (operands[4]) << 4;
7184 mask |= INTVAL (operands[5]) << 6;
7185 operands[2] = GEN_INT (mask);
7187 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7189 [(set_attr "type" "sselog1")
7190 (set_attr "prefix_data16" "1")
7191 (set_attr "prefix" "maybe_vex")
7192 (set_attr "length_immediate" "1")
7193 (set_attr "mode" "TI")])
7195 (define_expand "avx2_pshuflwv3"
7196 [(match_operand:V16HI 0 "register_operand" "")
7197 (match_operand:V16HI 1 "nonimmediate_operand" "")
7198 (match_operand:SI 2 "const_0_to_255_operand" "")]
7201 int mask = INTVAL (operands[2]);
7202 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7203 GEN_INT ((mask >> 0) & 3),
7204 GEN_INT ((mask >> 2) & 3),
7205 GEN_INT ((mask >> 4) & 3),
7206 GEN_INT ((mask >> 6) & 3),
7207 GEN_INT (((mask >> 0) & 3) + 8),
7208 GEN_INT (((mask >> 2) & 3) + 8),
7209 GEN_INT (((mask >> 4) & 3) + 8),
7210 GEN_INT (((mask >> 6) & 3) + 8)));
7214 (define_insn "avx2_pshuflw_1"
7215 [(set (match_operand:V16HI 0 "register_operand" "=x")
7217 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7218 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7219 (match_operand 3 "const_0_to_3_operand" "")
7220 (match_operand 4 "const_0_to_3_operand" "")
7221 (match_operand 5 "const_0_to_3_operand" "")
7226 (match_operand 6 "const_8_to_11_operand" "")
7227 (match_operand 7 "const_8_to_11_operand" "")
7228 (match_operand 8 "const_8_to_11_operand" "")
7229 (match_operand 9 "const_8_to_11_operand" "")
7235 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7236 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7237 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7238 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7241 mask |= INTVAL (operands[2]) << 0;
7242 mask |= INTVAL (operands[3]) << 2;
7243 mask |= INTVAL (operands[4]) << 4;
7244 mask |= INTVAL (operands[5]) << 6;
7245 operands[2] = GEN_INT (mask);
7247 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7249 [(set_attr "type" "sselog")
7250 (set_attr "prefix" "vex")
7251 (set_attr "length_immediate" "1")
7252 (set_attr "mode" "OI")])
7254 (define_expand "sse2_pshuflw"
7255 [(match_operand:V8HI 0 "register_operand" "")
7256 (match_operand:V8HI 1 "nonimmediate_operand" "")
7257 (match_operand:SI 2 "const_int_operand" "")]
7260 int mask = INTVAL (operands[2]);
7261 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7262 GEN_INT ((mask >> 0) & 3),
7263 GEN_INT ((mask >> 2) & 3),
7264 GEN_INT ((mask >> 4) & 3),
7265 GEN_INT ((mask >> 6) & 3)));
7269 (define_insn "sse2_pshuflw_1"
7270 [(set (match_operand:V8HI 0 "register_operand" "=x")
7272 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7273 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7274 (match_operand 3 "const_0_to_3_operand" "")
7275 (match_operand 4 "const_0_to_3_operand" "")
7276 (match_operand 5 "const_0_to_3_operand" "")
7284 mask |= INTVAL (operands[2]) << 0;
7285 mask |= INTVAL (operands[3]) << 2;
7286 mask |= INTVAL (operands[4]) << 4;
7287 mask |= INTVAL (operands[5]) << 6;
7288 operands[2] = GEN_INT (mask);
7290 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7292 [(set_attr "type" "sselog")
7293 (set_attr "prefix_data16" "0")
7294 (set_attr "prefix_rep" "1")
7295 (set_attr "prefix" "maybe_vex")
7296 (set_attr "length_immediate" "1")
7297 (set_attr "mode" "TI")])
7299 (define_expand "avx2_pshufhwv3"
7300 [(match_operand:V16HI 0 "register_operand" "")
7301 (match_operand:V16HI 1 "nonimmediate_operand" "")
7302 (match_operand:SI 2 "const_0_to_255_operand" "")]
7305 int mask = INTVAL (operands[2]);
7306 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7307 GEN_INT (((mask >> 0) & 3) + 4),
7308 GEN_INT (((mask >> 2) & 3) + 4),
7309 GEN_INT (((mask >> 4) & 3) + 4),
7310 GEN_INT (((mask >> 6) & 3) + 4),
7311 GEN_INT (((mask >> 0) & 3) + 12),
7312 GEN_INT (((mask >> 2) & 3) + 12),
7313 GEN_INT (((mask >> 4) & 3) + 12),
7314 GEN_INT (((mask >> 6) & 3) + 12)));
7318 (define_insn "avx2_pshufhw_1"
7319 [(set (match_operand:V16HI 0 "register_operand" "=x")
7321 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7322 (parallel [(const_int 0)
7326 (match_operand 2 "const_4_to_7_operand" "")
7327 (match_operand 3 "const_4_to_7_operand" "")
7328 (match_operand 4 "const_4_to_7_operand" "")
7329 (match_operand 5 "const_4_to_7_operand" "")
7334 (match_operand 6 "const_12_to_15_operand" "")
7335 (match_operand 7 "const_12_to_15_operand" "")
7336 (match_operand 8 "const_12_to_15_operand" "")
7337 (match_operand 9 "const_12_to_15_operand" "")])))]
7339 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7340 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7341 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7342 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7345 mask |= (INTVAL (operands[2]) - 4) << 0;
7346 mask |= (INTVAL (operands[3]) - 4) << 2;
7347 mask |= (INTVAL (operands[4]) - 4) << 4;
7348 mask |= (INTVAL (operands[5]) - 4) << 6;
7349 operands[2] = GEN_INT (mask);
7351 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7353 [(set_attr "type" "sselog")
7354 (set_attr "prefix" "vex")
7355 (set_attr "length_immediate" "1")
7356 (set_attr "mode" "OI")])
7358 (define_expand "sse2_pshufhw"
7359 [(match_operand:V8HI 0 "register_operand" "")
7360 (match_operand:V8HI 1 "nonimmediate_operand" "")
7361 (match_operand:SI 2 "const_int_operand" "")]
7364 int mask = INTVAL (operands[2]);
7365 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7366 GEN_INT (((mask >> 0) & 3) + 4),
7367 GEN_INT (((mask >> 2) & 3) + 4),
7368 GEN_INT (((mask >> 4) & 3) + 4),
7369 GEN_INT (((mask >> 6) & 3) + 4)));
7373 (define_insn "sse2_pshufhw_1"
7374 [(set (match_operand:V8HI 0 "register_operand" "=x")
7376 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7377 (parallel [(const_int 0)
7381 (match_operand 2 "const_4_to_7_operand" "")
7382 (match_operand 3 "const_4_to_7_operand" "")
7383 (match_operand 4 "const_4_to_7_operand" "")
7384 (match_operand 5 "const_4_to_7_operand" "")])))]
7388 mask |= (INTVAL (operands[2]) - 4) << 0;
7389 mask |= (INTVAL (operands[3]) - 4) << 2;
7390 mask |= (INTVAL (operands[4]) - 4) << 4;
7391 mask |= (INTVAL (operands[5]) - 4) << 6;
7392 operands[2] = GEN_INT (mask);
7394 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7396 [(set_attr "type" "sselog")
7397 (set_attr "prefix_rep" "1")
7398 (set_attr "prefix_data16" "0")
7399 (set_attr "prefix" "maybe_vex")
7400 (set_attr "length_immediate" "1")
7401 (set_attr "mode" "TI")])
7403 (define_expand "sse2_loadd"
7404 [(set (match_operand:V4SI 0 "register_operand" "")
7407 (match_operand:SI 1 "nonimmediate_operand" ""))
7411 "operands[2] = CONST0_RTX (V4SImode);")
7413 (define_insn "sse2_loadld"
7414 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7417 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7418 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7422 %vmovd\t{%2, %0|%0, %2}
7423 %vmovd\t{%2, %0|%0, %2}
7424 movss\t{%2, %0|%0, %2}
7425 movss\t{%2, %0|%0, %2}
7426 vmovss\t{%2, %1, %0|%0, %1, %2}"
7427 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7428 (set_attr "type" "ssemov")
7429 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7430 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7432 (define_insn_and_split "sse2_stored"
7433 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7435 (match_operand:V4SI 1 "register_operand" "x,Yi")
7436 (parallel [(const_int 0)])))]
7439 "&& reload_completed
7440 && (TARGET_INTER_UNIT_MOVES
7441 || MEM_P (operands [0])
7442 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7443 [(set (match_dup 0) (match_dup 1))]
7444 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7446 (define_insn_and_split "*vec_ext_v4si_mem"
7447 [(set (match_operand:SI 0 "register_operand" "=r")
7449 (match_operand:V4SI 1 "memory_operand" "o")
7450 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7456 int i = INTVAL (operands[2]);
7458 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7462 (define_expand "sse_storeq"
7463 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7465 (match_operand:V2DI 1 "register_operand" "")
7466 (parallel [(const_int 0)])))]
7469 (define_insn "*sse2_storeq_rex64"
7470 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7472 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7473 (parallel [(const_int 0)])))]
7474 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7478 mov{q}\t{%1, %0|%0, %1}"
7479 [(set_attr "type" "*,*,imov")
7480 (set_attr "mode" "*,*,DI")])
7482 (define_insn "*sse2_storeq"
7483 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7485 (match_operand:V2DI 1 "register_operand" "x")
7486 (parallel [(const_int 0)])))]
7491 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7493 (match_operand:V2DI 1 "register_operand" "")
7494 (parallel [(const_int 0)])))]
7497 && (TARGET_INTER_UNIT_MOVES
7498 || MEM_P (operands [0])
7499 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7500 [(set (match_dup 0) (match_dup 1))]
7501 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7503 (define_insn "*vec_extractv2di_1_rex64"
7504 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7506 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7507 (parallel [(const_int 1)])))]
7508 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7510 %vmovhps\t{%1, %0|%0, %1}
7511 psrldq\t{$8, %0|%0, 8}
7512 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7513 %vmovq\t{%H1, %0|%0, %H1}
7514 mov{q}\t{%H1, %0|%0, %H1}"
7515 [(set_attr "isa" "*,noavx,avx,*,*")
7516 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7517 (set_attr "length_immediate" "*,1,1,*,*")
7518 (set_attr "memory" "*,none,none,*,*")
7519 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7520 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7522 (define_insn "*vec_extractv2di_1"
7523 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7525 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7526 (parallel [(const_int 1)])))]
7527 "!TARGET_64BIT && TARGET_SSE
7528 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7530 %vmovhps\t{%1, %0|%0, %1}
7531 psrldq\t{$8, %0|%0, 8}
7532 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7533 %vmovq\t{%H1, %0|%0, %H1}
7534 movhlps\t{%1, %0|%0, %1}
7535 movlps\t{%H1, %0|%0, %H1}"
7536 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7537 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7538 (set_attr "length_immediate" "*,1,1,*,*,*")
7539 (set_attr "memory" "*,none,none,*,*,*")
7540 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7541 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7543 (define_insn "*vec_dupv4si"
7544 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7546 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7549 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7550 vbroadcastss\t{%1, %0|%0, %1}
7551 shufps\t{$0, %0, %0|%0, %0, 0}"
7552 [(set_attr "isa" "sse2,avx,noavx")
7553 (set_attr "type" "sselog1,ssemov,sselog1")
7554 (set_attr "length_immediate" "1,0,1")
7555 (set_attr "prefix_extra" "0,1,*")
7556 (set_attr "prefix" "maybe_vex,vex,orig")
7557 (set_attr "mode" "TI,V4SF,V4SF")])
7559 (define_insn "*vec_dupv2di"
7560 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7562 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7566 vpunpcklqdq\t{%d1, %0|%0, %d1}
7567 %vmovddup\t{%1, %0|%0, %1}
7569 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7570 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7571 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7572 (set_attr "mode" "TI,TI,DF,V4SF")])
7574 (define_insn "*vec_concatv2si_sse4_1"
7575 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7577 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7578 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7581 pinsrd\t{$1, %2, %0|%0, %2, 1}
7582 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7583 punpckldq\t{%2, %0|%0, %2}
7584 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7585 %vmovd\t{%1, %0|%0, %1}
7586 punpckldq\t{%2, %0|%0, %2}
7587 movd\t{%1, %0|%0, %1}"
7588 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7589 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7590 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7591 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7592 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7593 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7595 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7596 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7597 ;; alternatives pretty much forces the MMX alternative to be chosen.
7598 (define_insn "*vec_concatv2si_sse2"
7599 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7601 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7602 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7605 punpckldq\t{%2, %0|%0, %2}
7606 movd\t{%1, %0|%0, %1}
7607 punpckldq\t{%2, %0|%0, %2}
7608 movd\t{%1, %0|%0, %1}"
7609 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7610 (set_attr "mode" "TI,TI,DI,DI")])
7612 (define_insn "*vec_concatv2si_sse"
7613 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7615 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7616 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7619 unpcklps\t{%2, %0|%0, %2}
7620 movss\t{%1, %0|%0, %1}
7621 punpckldq\t{%2, %0|%0, %2}
7622 movd\t{%1, %0|%0, %1}"
7623 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7624 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7626 (define_insn "*vec_concatv4si"
7627 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7629 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7630 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7633 punpcklqdq\t{%2, %0|%0, %2}
7634 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7635 movlhps\t{%2, %0|%0, %2}
7636 movhps\t{%2, %0|%0, %2}
7637 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7638 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7639 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7640 (set_attr "prefix" "orig,vex,orig,orig,vex")
7641 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7643 ;; movd instead of movq is required to handle broken assemblers.
7644 (define_insn "*vec_concatv2di_rex64"
7645 [(set (match_operand:V2DI 0 "register_operand"
7646 "=x,x ,x ,Yi,!x,x,x,x,x")
7648 (match_operand:DI 1 "nonimmediate_operand"
7649 " 0,x ,xm,r ,*y,0,x,0,x")
7650 (match_operand:DI 2 "vector_move_operand"
7651 "rm,rm,C ,C ,C ,x,x,m,m")))]
7654 pinsrq\t{$1, %2, %0|%0, %2, 1}
7655 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7656 %vmovq\t{%1, %0|%0, %1}
7657 %vmovd\t{%1, %0|%0, %1}
7658 movq2dq\t{%1, %0|%0, %1}
7659 punpcklqdq\t{%2, %0|%0, %2}
7660 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7661 movhps\t{%2, %0|%0, %2}
7662 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7663 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7666 (eq_attr "alternative" "0,1,5,6")
7667 (const_string "sselog")
7668 (const_string "ssemov")))
7669 (set (attr "prefix_rex")
7671 (and (eq_attr "alternative" "0,3")
7672 (not (match_test "TARGET_AVX")))
7674 (const_string "*")))
7675 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7676 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7677 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7678 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7680 (define_insn "vec_concatv2di"
7681 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7683 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7684 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7685 "!TARGET_64BIT && TARGET_SSE"
7687 %vmovq\t{%1, %0|%0, %1}
7688 movq2dq\t{%1, %0|%0, %1}
7689 punpcklqdq\t{%2, %0|%0, %2}
7690 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7691 movlhps\t{%2, %0|%0, %2}
7692 movhps\t{%2, %0|%0, %2}
7693 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7694 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7695 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7696 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7697 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7699 (define_expand "vec_unpacks_lo_<mode>"
7700 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7701 (match_operand:VI124_AVX2 1 "register_operand" "")]
7703 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7705 (define_expand "vec_unpacks_hi_<mode>"
7706 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7707 (match_operand:VI124_AVX2 1 "register_operand" "")]
7709 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7711 (define_expand "vec_unpacku_lo_<mode>"
7712 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7713 (match_operand:VI124_AVX2 1 "register_operand" "")]
7715 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7717 (define_expand "vec_unpacku_hi_<mode>"
7718 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7719 (match_operand:VI124_AVX2 1 "register_operand" "")]
7721 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7723 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7727 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7729 (define_expand "avx2_uavgv32qi3"
7730 [(set (match_operand:V32QI 0 "register_operand" "")
7736 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7738 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7739 (const_vector:V32QI [(const_int 1) (const_int 1)
7740 (const_int 1) (const_int 1)
7741 (const_int 1) (const_int 1)
7742 (const_int 1) (const_int 1)
7743 (const_int 1) (const_int 1)
7744 (const_int 1) (const_int 1)
7745 (const_int 1) (const_int 1)
7746 (const_int 1) (const_int 1)
7747 (const_int 1) (const_int 1)
7748 (const_int 1) (const_int 1)
7749 (const_int 1) (const_int 1)
7750 (const_int 1) (const_int 1)
7751 (const_int 1) (const_int 1)
7752 (const_int 1) (const_int 1)
7753 (const_int 1) (const_int 1)
7754 (const_int 1) (const_int 1)]))
7757 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7759 (define_expand "sse2_uavgv16qi3"
7760 [(set (match_operand:V16QI 0 "register_operand" "")
7766 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7768 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7769 (const_vector:V16QI [(const_int 1) (const_int 1)
7770 (const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)
7773 (const_int 1) (const_int 1)
7774 (const_int 1) (const_int 1)
7775 (const_int 1) (const_int 1)
7776 (const_int 1) (const_int 1)]))
7779 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7781 (define_insn "*avx2_uavgv32qi3"
7782 [(set (match_operand:V32QI 0 "register_operand" "=x")
7788 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7790 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7791 (const_vector:V32QI [(const_int 1) (const_int 1)
7792 (const_int 1) (const_int 1)
7793 (const_int 1) (const_int 1)
7794 (const_int 1) (const_int 1)
7795 (const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)
7798 (const_int 1) (const_int 1)
7799 (const_int 1) (const_int 1)
7800 (const_int 1) (const_int 1)
7801 (const_int 1) (const_int 1)
7802 (const_int 1) (const_int 1)
7803 (const_int 1) (const_int 1)
7804 (const_int 1) (const_int 1)
7805 (const_int 1) (const_int 1)
7806 (const_int 1) (const_int 1)]))
7808 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7809 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7810 [(set_attr "type" "sseiadd")
7811 (set_attr "prefix" "vex")
7812 (set_attr "mode" "OI")])
7814 (define_insn "*sse2_uavgv16qi3"
7815 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7821 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7823 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7824 (const_vector:V16QI [(const_int 1) (const_int 1)
7825 (const_int 1) (const_int 1)
7826 (const_int 1) (const_int 1)
7827 (const_int 1) (const_int 1)
7828 (const_int 1) (const_int 1)
7829 (const_int 1) (const_int 1)
7830 (const_int 1) (const_int 1)
7831 (const_int 1) (const_int 1)]))
7833 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7835 pavgb\t{%2, %0|%0, %2}
7836 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7837 [(set_attr "isa" "noavx,avx")
7838 (set_attr "type" "sseiadd")
7839 (set_attr "prefix_data16" "1,*")
7840 (set_attr "prefix" "orig,vex")
7841 (set_attr "mode" "TI")])
7843 (define_expand "avx2_uavgv16hi3"
7844 [(set (match_operand:V16HI 0 "register_operand" "")
7850 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7852 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7853 (const_vector:V16HI [(const_int 1) (const_int 1)
7854 (const_int 1) (const_int 1)
7855 (const_int 1) (const_int 1)
7856 (const_int 1) (const_int 1)
7857 (const_int 1) (const_int 1)
7858 (const_int 1) (const_int 1)
7859 (const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)]))
7863 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7865 (define_expand "sse2_uavgv8hi3"
7866 [(set (match_operand:V8HI 0 "register_operand" "")
7872 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7874 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7875 (const_vector:V8HI [(const_int 1) (const_int 1)
7876 (const_int 1) (const_int 1)
7877 (const_int 1) (const_int 1)
7878 (const_int 1) (const_int 1)]))
7881 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7883 (define_insn "*avx2_uavgv16hi3"
7884 [(set (match_operand:V16HI 0 "register_operand" "=x")
7890 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7892 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7893 (const_vector:V16HI [(const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)
7898 (const_int 1) (const_int 1)
7899 (const_int 1) (const_int 1)
7900 (const_int 1) (const_int 1)]))
7902 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7903 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7904 [(set_attr "type" "sseiadd")
7905 (set_attr "prefix" "vex")
7906 (set_attr "mode" "OI")])
7908 (define_insn "*sse2_uavgv8hi3"
7909 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7915 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7917 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7918 (const_vector:V8HI [(const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)
7920 (const_int 1) (const_int 1)
7921 (const_int 1) (const_int 1)]))
7923 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7925 pavgw\t{%2, %0|%0, %2}
7926 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7927 [(set_attr "isa" "noavx,avx")
7928 (set_attr "type" "sseiadd")
7929 (set_attr "prefix_data16" "1,*")
7930 (set_attr "prefix" "orig,vex")
7931 (set_attr "mode" "TI")])
7933 ;; The correct representation for this is absolutely enormous, and
7934 ;; surely not generally useful.
7935 (define_insn "<sse2_avx2>_psadbw"
7936 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7937 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7938 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7942 psadbw\t{%2, %0|%0, %2}
7943 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7944 [(set_attr "isa" "noavx,avx")
7945 (set_attr "type" "sseiadd")
7946 (set_attr "atom_unit" "simul")
7947 (set_attr "prefix_data16" "1,*")
7948 (set_attr "prefix" "orig,vex")
7949 (set_attr "mode" "<sseinsnmode>")])
7951 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7952 [(set (match_operand:SI 0 "register_operand" "=r")
7954 [(match_operand:VF 1 "register_operand" "x")]
7957 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7958 [(set_attr "type" "ssemov")
7959 (set_attr "prefix" "maybe_vex")
7960 (set_attr "mode" "<MODE>")])
7962 (define_insn "avx2_pmovmskb"
7963 [(set (match_operand:SI 0 "register_operand" "=r")
7964 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7967 "vpmovmskb\t{%1, %0|%0, %1}"
7968 [(set_attr "type" "ssemov")
7969 (set_attr "prefix" "vex")
7970 (set_attr "mode" "DI")])
7972 (define_insn "sse2_pmovmskb"
7973 [(set (match_operand:SI 0 "register_operand" "=r")
7974 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7977 "%vpmovmskb\t{%1, %0|%0, %1}"
7978 [(set_attr "type" "ssemov")
7979 (set_attr "prefix_data16" "1")
7980 (set_attr "prefix" "maybe_vex")
7981 (set_attr "mode" "SI")])
7983 (define_expand "sse2_maskmovdqu"
7984 [(set (match_operand:V16QI 0 "memory_operand" "")
7985 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7986 (match_operand:V16QI 2 "register_operand" "")
7991 (define_insn "*sse2_maskmovdqu"
7992 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7993 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7994 (match_operand:V16QI 2 "register_operand" "x")
7995 (mem:V16QI (match_dup 0))]
7998 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7999 [(set_attr "type" "ssemov")
8000 (set_attr "prefix_data16" "1")
8001 ;; The implicit %rdi operand confuses default length_vex computation.
8002 (set (attr "length_vex")
8003 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8004 (set_attr "prefix" "maybe_vex")
8005 (set_attr "mode" "TI")])
8007 (define_insn "sse_ldmxcsr"
8008 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8012 [(set_attr "type" "sse")
8013 (set_attr "atom_sse_attr" "mxcsr")
8014 (set_attr "prefix" "maybe_vex")
8015 (set_attr "memory" "load")])
8017 (define_insn "sse_stmxcsr"
8018 [(set (match_operand:SI 0 "memory_operand" "=m")
8019 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8022 [(set_attr "type" "sse")
8023 (set_attr "atom_sse_attr" "mxcsr")
8024 (set_attr "prefix" "maybe_vex")
8025 (set_attr "memory" "store")])
8027 (define_insn "sse2_clflush"
8028 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8032 [(set_attr "type" "sse")
8033 (set_attr "atom_sse_attr" "fence")
8034 (set_attr "memory" "unknown")])
8037 (define_insn "sse3_mwait"
8038 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8039 (match_operand:SI 1 "register_operand" "c")]
8042 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8043 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8044 ;; we only need to set up 32bit registers.
8046 [(set_attr "length" "3")])
8048 (define_insn "sse3_monitor"
8049 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8050 (match_operand:SI 1 "register_operand" "c")
8051 (match_operand:SI 2 "register_operand" "d")]
8053 "TARGET_SSE3 && !TARGET_64BIT"
8054 "monitor\t%0, %1, %2"
8055 [(set_attr "length" "3")])
8057 (define_insn "sse3_monitor64"
8058 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8059 (match_operand:SI 1 "register_operand" "c")
8060 (match_operand:SI 2 "register_operand" "d")]
8062 "TARGET_SSE3 && TARGET_64BIT"
8063 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8064 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8065 ;; zero extended to 64bit, we only need to set up 32bit registers.
8067 [(set_attr "length" "3")])
8069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8071 ;; SSSE3 instructions
8073 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8075 (define_insn "avx2_phaddwv16hi3"
8076 [(set (match_operand:V16HI 0 "register_operand" "=x")
8083 (match_operand:V16HI 1 "register_operand" "x")
8084 (parallel [(const_int 0)]))
8085 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8087 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8088 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8091 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8092 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8094 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8095 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8099 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8100 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8103 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8106 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8107 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8109 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8110 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8116 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8117 (parallel [(const_int 0)]))
8118 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8120 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8124 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8125 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8127 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8128 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8132 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8133 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8135 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8136 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8139 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8140 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8142 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8143 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8145 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8146 [(set_attr "type" "sseiadd")
8147 (set_attr "prefix_extra" "1")
8148 (set_attr "prefix" "vex")
8149 (set_attr "mode" "OI")])
8151 (define_insn "ssse3_phaddwv8hi3"
8152 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8158 (match_operand:V8HI 1 "register_operand" "0,x")
8159 (parallel [(const_int 0)]))
8160 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8162 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8163 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8166 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8167 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8169 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8170 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8175 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8176 (parallel [(const_int 0)]))
8177 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8179 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8180 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8183 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8184 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8186 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8187 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8190 phaddw\t{%2, %0|%0, %2}
8191 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8192 [(set_attr "isa" "noavx,avx")
8193 (set_attr "type" "sseiadd")
8194 (set_attr "atom_unit" "complex")
8195 (set_attr "prefix_data16" "1,*")
8196 (set_attr "prefix_extra" "1")
8197 (set_attr "prefix" "orig,vex")
8198 (set_attr "mode" "TI")])
8200 (define_insn "ssse3_phaddwv4hi3"
8201 [(set (match_operand:V4HI 0 "register_operand" "=y")
8206 (match_operand:V4HI 1 "register_operand" "0")
8207 (parallel [(const_int 0)]))
8208 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8210 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8211 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8215 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8216 (parallel [(const_int 0)]))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8219 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8220 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8222 "phaddw\t{%2, %0|%0, %2}"
8223 [(set_attr "type" "sseiadd")
8224 (set_attr "atom_unit" "complex")
8225 (set_attr "prefix_extra" "1")
8226 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8227 (set_attr "mode" "DI")])
8229 (define_insn "avx2_phadddv8si3"
8230 [(set (match_operand:V8SI 0 "register_operand" "=x")
8236 (match_operand:V8SI 1 "register_operand" "x")
8237 (parallel [(const_int 0)]))
8238 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8240 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8241 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8244 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8245 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8247 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8248 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8253 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8254 (parallel [(const_int 0)]))
8255 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8257 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8258 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8261 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8262 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8264 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8265 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8267 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8268 [(set_attr "type" "sseiadd")
8269 (set_attr "prefix_extra" "1")
8270 (set_attr "prefix" "vex")
8271 (set_attr "mode" "OI")])
8273 (define_insn "ssse3_phadddv4si3"
8274 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8279 (match_operand:V4SI 1 "register_operand" "0,x")
8280 (parallel [(const_int 0)]))
8281 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8283 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8284 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8288 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8289 (parallel [(const_int 0)]))
8290 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8292 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8293 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8296 phaddd\t{%2, %0|%0, %2}
8297 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8298 [(set_attr "isa" "noavx,avx")
8299 (set_attr "type" "sseiadd")
8300 (set_attr "atom_unit" "complex")
8301 (set_attr "prefix_data16" "1,*")
8302 (set_attr "prefix_extra" "1")
8303 (set_attr "prefix" "orig,vex")
8304 (set_attr "mode" "TI")])
8306 (define_insn "ssse3_phadddv2si3"
8307 [(set (match_operand:V2SI 0 "register_operand" "=y")
8311 (match_operand:V2SI 1 "register_operand" "0")
8312 (parallel [(const_int 0)]))
8313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8316 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8317 (parallel [(const_int 0)]))
8318 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8320 "phaddd\t{%2, %0|%0, %2}"
8321 [(set_attr "type" "sseiadd")
8322 (set_attr "atom_unit" "complex")
8323 (set_attr "prefix_extra" "1")
8324 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8325 (set_attr "mode" "DI")])
8327 (define_insn "avx2_phaddswv16hi3"
8328 [(set (match_operand:V16HI 0 "register_operand" "=x")
8335 (match_operand:V16HI 1 "register_operand" "x")
8336 (parallel [(const_int 0)]))
8337 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8339 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8340 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8343 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8344 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8346 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8347 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8351 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8352 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8355 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8358 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8359 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8361 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8362 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8368 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8369 (parallel [(const_int 0)]))
8370 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8372 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8376 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8377 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8379 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8380 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8384 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8385 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8387 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8388 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8391 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8392 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8395 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8397 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8398 [(set_attr "type" "sseiadd")
8399 (set_attr "prefix_extra" "1")
8400 (set_attr "prefix" "vex")
8401 (set_attr "mode" "OI")])
8403 (define_insn "ssse3_phaddswv8hi3"
8404 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8410 (match_operand:V8HI 1 "register_operand" "0,x")
8411 (parallel [(const_int 0)]))
8412 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8414 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8419 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8421 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8422 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8427 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8428 (parallel [(const_int 0)]))
8429 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8431 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8436 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8438 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8439 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8442 phaddsw\t{%2, %0|%0, %2}
8443 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8444 [(set_attr "isa" "noavx,avx")
8445 (set_attr "type" "sseiadd")
8446 (set_attr "atom_unit" "complex")
8447 (set_attr "prefix_data16" "1,*")
8448 (set_attr "prefix_extra" "1")
8449 (set_attr "prefix" "orig,vex")
8450 (set_attr "mode" "TI")])
8452 (define_insn "ssse3_phaddswv4hi3"
8453 [(set (match_operand:V4HI 0 "register_operand" "=y")
8458 (match_operand:V4HI 1 "register_operand" "0")
8459 (parallel [(const_int 0)]))
8460 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8462 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8463 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8467 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8468 (parallel [(const_int 0)]))
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8471 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8474 "phaddsw\t{%2, %0|%0, %2}"
8475 [(set_attr "type" "sseiadd")
8476 (set_attr "atom_unit" "complex")
8477 (set_attr "prefix_extra" "1")
8478 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8479 (set_attr "mode" "DI")])
8481 (define_insn "avx2_phsubwv16hi3"
8482 [(set (match_operand:V16HI 0 "register_operand" "=x")
8489 (match_operand:V16HI 1 "register_operand" "x")
8490 (parallel [(const_int 0)]))
8491 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8493 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8494 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8497 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8498 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8500 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8501 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8506 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8509 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8513 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8516 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8522 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8523 (parallel [(const_int 0)]))
8524 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8526 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8530 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8533 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8534 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8538 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8539 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8541 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8542 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8545 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8548 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8551 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8552 [(set_attr "type" "sseiadd")
8553 (set_attr "prefix_extra" "1")
8554 (set_attr "prefix" "vex")
8555 (set_attr "mode" "OI")])
8557 (define_insn "ssse3_phsubwv8hi3"
8558 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8564 (match_operand:V8HI 1 "register_operand" "0,x")
8565 (parallel [(const_int 0)]))
8566 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8568 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8569 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8572 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8573 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8575 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8576 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8581 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8582 (parallel [(const_int 0)]))
8583 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8585 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8586 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8589 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8590 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8592 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8593 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8596 phsubw\t{%2, %0|%0, %2}
8597 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8598 [(set_attr "isa" "noavx,avx")
8599 (set_attr "type" "sseiadd")
8600 (set_attr "atom_unit" "complex")
8601 (set_attr "prefix_data16" "1,*")
8602 (set_attr "prefix_extra" "1")
8603 (set_attr "prefix" "orig,vex")
8604 (set_attr "mode" "TI")])
8606 (define_insn "ssse3_phsubwv4hi3"
8607 [(set (match_operand:V4HI 0 "register_operand" "=y")
8612 (match_operand:V4HI 1 "register_operand" "0")
8613 (parallel [(const_int 0)]))
8614 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8616 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8617 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8621 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8622 (parallel [(const_int 0)]))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8625 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8628 "phsubw\t{%2, %0|%0, %2}"
8629 [(set_attr "type" "sseiadd")
8630 (set_attr "atom_unit" "complex")
8631 (set_attr "prefix_extra" "1")
8632 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8633 (set_attr "mode" "DI")])
8635 (define_insn "avx2_phsubdv8si3"
8636 [(set (match_operand:V8SI 0 "register_operand" "=x")
8642 (match_operand:V8SI 1 "register_operand" "x")
8643 (parallel [(const_int 0)]))
8644 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8646 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8647 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8650 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8651 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8653 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8654 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8659 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8660 (parallel [(const_int 0)]))
8661 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8663 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8664 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8667 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8668 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8670 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8671 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8673 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8674 [(set_attr "type" "sseiadd")
8675 (set_attr "prefix_extra" "1")
8676 (set_attr "prefix" "vex")
8677 (set_attr "mode" "OI")])
8679 (define_insn "ssse3_phsubdv4si3"
8680 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8685 (match_operand:V4SI 1 "register_operand" "0,x")
8686 (parallel [(const_int 0)]))
8687 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8689 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8690 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8694 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8695 (parallel [(const_int 0)]))
8696 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8698 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8699 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8702 phsubd\t{%2, %0|%0, %2}
8703 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8705 [(set_attr "isa" "noavx,avx")
8706 (set_attr "type" "sseiadd")
8707 (set_attr "atom_unit" "complex")
8708 (set_attr "prefix_data16" "1,*")
8709 (set_attr "prefix_extra" "1")
8710 (set_attr "prefix" "orig,vex")
8711 (set_attr "mode" "TI")])
8713 (define_insn "ssse3_phsubdv2si3"
8714 [(set (match_operand:V2SI 0 "register_operand" "=y")
8718 (match_operand:V2SI 1 "register_operand" "0")
8719 (parallel [(const_int 0)]))
8720 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8723 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8724 (parallel [(const_int 0)]))
8725 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8727 "phsubd\t{%2, %0|%0, %2}"
8728 [(set_attr "type" "sseiadd")
8729 (set_attr "atom_unit" "complex")
8730 (set_attr "prefix_extra" "1")
8731 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8732 (set_attr "mode" "DI")])
8734 (define_insn "avx2_phsubswv16hi3"
8735 [(set (match_operand:V16HI 0 "register_operand" "=x")
8742 (match_operand:V16HI 1 "register_operand" "x")
8743 (parallel [(const_int 0)]))
8744 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8746 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8747 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8750 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8751 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8753 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8754 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8758 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8759 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8761 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8762 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8765 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8766 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8768 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8769 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8775 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8776 (parallel [(const_int 0)]))
8777 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8779 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8780 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8783 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8784 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8786 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8787 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8791 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8792 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8794 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8795 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8798 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8799 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8801 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8802 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8804 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8805 [(set_attr "type" "sseiadd")
8806 (set_attr "prefix_extra" "1")
8807 (set_attr "prefix" "vex")
8808 (set_attr "mode" "OI")])
8810 (define_insn "ssse3_phsubswv8hi3"
8811 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8817 (match_operand:V8HI 1 "register_operand" "0,x")
8818 (parallel [(const_int 0)]))
8819 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8821 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8822 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8825 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8826 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8828 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8829 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8834 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8835 (parallel [(const_int 0)]))
8836 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8838 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8839 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8842 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8843 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8845 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8846 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8849 phsubsw\t{%2, %0|%0, %2}
8850 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8851 [(set_attr "isa" "noavx,avx")
8852 (set_attr "type" "sseiadd")
8853 (set_attr "atom_unit" "complex")
8854 (set_attr "prefix_data16" "1,*")
8855 (set_attr "prefix_extra" "1")
8856 (set_attr "prefix" "orig,vex")
8857 (set_attr "mode" "TI")])
8859 (define_insn "ssse3_phsubswv4hi3"
8860 [(set (match_operand:V4HI 0 "register_operand" "=y")
8865 (match_operand:V4HI 1 "register_operand" "0")
8866 (parallel [(const_int 0)]))
8867 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8869 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8870 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8874 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8875 (parallel [(const_int 0)]))
8876 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8878 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8879 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8881 "phsubsw\t{%2, %0|%0, %2}"
8882 [(set_attr "type" "sseiadd")
8883 (set_attr "atom_unit" "complex")
8884 (set_attr "prefix_extra" "1")
8885 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8886 (set_attr "mode" "DI")])
8888 (define_insn "avx2_pmaddubsw256"
8889 [(set (match_operand:V16HI 0 "register_operand" "=x")
8894 (match_operand:V32QI 1 "register_operand" "x")
8895 (parallel [(const_int 0)
8913 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8914 (parallel [(const_int 0)
8932 (vec_select:V16QI (match_dup 1)
8933 (parallel [(const_int 1)
8950 (vec_select:V16QI (match_dup 2)
8951 (parallel [(const_int 1)
8966 (const_int 31)]))))))]
8968 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8969 [(set_attr "type" "sseiadd")
8970 (set_attr "prefix_extra" "1")
8971 (set_attr "prefix" "vex")
8972 (set_attr "mode" "OI")])
8974 (define_insn "ssse3_pmaddubsw128"
8975 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8980 (match_operand:V16QI 1 "register_operand" "0,x")
8981 (parallel [(const_int 0)
8991 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8992 (parallel [(const_int 0)
9002 (vec_select:V8QI (match_dup 1)
9003 (parallel [(const_int 1)
9012 (vec_select:V8QI (match_dup 2)
9013 (parallel [(const_int 1)
9020 (const_int 15)]))))))]
9023 pmaddubsw\t{%2, %0|%0, %2}
9024 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9025 [(set_attr "isa" "noavx,avx")
9026 (set_attr "type" "sseiadd")
9027 (set_attr "atom_unit" "simul")
9028 (set_attr "prefix_data16" "1,*")
9029 (set_attr "prefix_extra" "1")
9030 (set_attr "prefix" "orig,vex")
9031 (set_attr "mode" "TI")])
9033 (define_insn "ssse3_pmaddubsw"
9034 [(set (match_operand:V4HI 0 "register_operand" "=y")
9039 (match_operand:V8QI 1 "register_operand" "0")
9040 (parallel [(const_int 0)
9046 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9047 (parallel [(const_int 0)
9053 (vec_select:V4QI (match_dup 1)
9054 (parallel [(const_int 1)
9059 (vec_select:V4QI (match_dup 2)
9060 (parallel [(const_int 1)
9063 (const_int 7)]))))))]
9065 "pmaddubsw\t{%2, %0|%0, %2}"
9066 [(set_attr "type" "sseiadd")
9067 (set_attr "atom_unit" "simul")
9068 (set_attr "prefix_extra" "1")
9069 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9070 (set_attr "mode" "DI")])
9072 (define_expand "avx2_umulhrswv16hi3"
9073 [(set (match_operand:V16HI 0 "register_operand" "")
9080 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9082 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9084 (const_vector:V16HI [(const_int 1) (const_int 1)
9085 (const_int 1) (const_int 1)
9086 (const_int 1) (const_int 1)
9087 (const_int 1) (const_int 1)
9088 (const_int 1) (const_int 1)
9089 (const_int 1) (const_int 1)
9090 (const_int 1) (const_int 1)
9091 (const_int 1) (const_int 1)]))
9094 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9096 (define_insn "*avx2_umulhrswv16hi3"
9097 [(set (match_operand:V16HI 0 "register_operand" "=x")
9104 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9106 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9108 (const_vector:V16HI [(const_int 1) (const_int 1)
9109 (const_int 1) (const_int 1)
9110 (const_int 1) (const_int 1)
9111 (const_int 1) (const_int 1)
9112 (const_int 1) (const_int 1)
9113 (const_int 1) (const_int 1)
9114 (const_int 1) (const_int 1)
9115 (const_int 1) (const_int 1)]))
9117 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9118 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9119 [(set_attr "type" "sseimul")
9120 (set_attr "prefix_extra" "1")
9121 (set_attr "prefix" "vex")
9122 (set_attr "mode" "OI")])
9124 (define_expand "ssse3_pmulhrswv8hi3"
9125 [(set (match_operand:V8HI 0 "register_operand" "")
9132 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9134 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9136 (const_vector:V8HI [(const_int 1) (const_int 1)
9137 (const_int 1) (const_int 1)
9138 (const_int 1) (const_int 1)
9139 (const_int 1) (const_int 1)]))
9142 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9144 (define_insn "*ssse3_pmulhrswv8hi3"
9145 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9152 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9154 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9156 (const_vector:V8HI [(const_int 1) (const_int 1)
9157 (const_int 1) (const_int 1)
9158 (const_int 1) (const_int 1)
9159 (const_int 1) (const_int 1)]))
9161 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9163 pmulhrsw\t{%2, %0|%0, %2}
9164 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9165 [(set_attr "isa" "noavx,avx")
9166 (set_attr "type" "sseimul")
9167 (set_attr "prefix_data16" "1,*")
9168 (set_attr "prefix_extra" "1")
9169 (set_attr "prefix" "orig,vex")
9170 (set_attr "mode" "TI")])
9172 (define_expand "ssse3_pmulhrswv4hi3"
9173 [(set (match_operand:V4HI 0 "register_operand" "")
9180 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9182 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9184 (const_vector:V4HI [(const_int 1) (const_int 1)
9185 (const_int 1) (const_int 1)]))
9188 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9190 (define_insn "*ssse3_pmulhrswv4hi3"
9191 [(set (match_operand:V4HI 0 "register_operand" "=y")
9198 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9200 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9202 (const_vector:V4HI [(const_int 1) (const_int 1)
9203 (const_int 1) (const_int 1)]))
9205 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9206 "pmulhrsw\t{%2, %0|%0, %2}"
9207 [(set_attr "type" "sseimul")
9208 (set_attr "prefix_extra" "1")
9209 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9210 (set_attr "mode" "DI")])
9212 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9213 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9214 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9215 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9219 pshufb\t{%2, %0|%0, %2}
9220 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9221 [(set_attr "isa" "noavx,avx")
9222 (set_attr "type" "sselog1")
9223 (set_attr "prefix_data16" "1,*")
9224 (set_attr "prefix_extra" "1")
9225 (set_attr "prefix" "orig,vex")
9226 (set_attr "mode" "<sseinsnmode>")])
9228 (define_insn "ssse3_pshufbv8qi3"
9229 [(set (match_operand:V8QI 0 "register_operand" "=y")
9230 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9231 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9234 "pshufb\t{%2, %0|%0, %2}";
9235 [(set_attr "type" "sselog1")
9236 (set_attr "prefix_extra" "1")
9237 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9238 (set_attr "mode" "DI")])
9240 (define_insn "<ssse3_avx2>_psign<mode>3"
9241 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9243 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9244 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9248 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9249 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9250 [(set_attr "isa" "noavx,avx")
9251 (set_attr "type" "sselog1")
9252 (set_attr "prefix_data16" "1,*")
9253 (set_attr "prefix_extra" "1")
9254 (set_attr "prefix" "orig,vex")
9255 (set_attr "mode" "<sseinsnmode>")])
9257 (define_insn "ssse3_psign<mode>3"
9258 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9260 [(match_operand:MMXMODEI 1 "register_operand" "0")
9261 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9264 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9265 [(set_attr "type" "sselog1")
9266 (set_attr "prefix_extra" "1")
9267 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9268 (set_attr "mode" "DI")])
9270 (define_insn "<ssse3_avx2>_palignr<mode>"
9271 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9272 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9273 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9274 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9278 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9280 switch (which_alternative)
9283 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9285 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9290 [(set_attr "isa" "noavx,avx")
9291 (set_attr "type" "sseishft")
9292 (set_attr "atom_unit" "sishuf")
9293 (set_attr "prefix_data16" "1,*")
9294 (set_attr "prefix_extra" "1")
9295 (set_attr "length_immediate" "1")
9296 (set_attr "prefix" "orig,vex")
9297 (set_attr "mode" "<sseinsnmode>")])
9299 (define_insn "ssse3_palignrdi"
9300 [(set (match_operand:DI 0 "register_operand" "=y")
9301 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9302 (match_operand:DI 2 "nonimmediate_operand" "ym")
9303 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9307 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9308 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9310 [(set_attr "type" "sseishft")
9311 (set_attr "atom_unit" "sishuf")
9312 (set_attr "prefix_extra" "1")
9313 (set_attr "length_immediate" "1")
9314 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9315 (set_attr "mode" "DI")])
9317 (define_insn "abs<mode>2"
9318 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9320 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9322 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9323 [(set_attr "type" "sselog1")
9324 (set_attr "prefix_data16" "1")
9325 (set_attr "prefix_extra" "1")
9326 (set_attr "prefix" "maybe_vex")
9327 (set_attr "mode" "<sseinsnmode>")])
9329 (define_insn "abs<mode>2"
9330 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9332 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9334 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9335 [(set_attr "type" "sselog1")
9336 (set_attr "prefix_rep" "0")
9337 (set_attr "prefix_extra" "1")
9338 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9339 (set_attr "mode" "DI")])
9341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9343 ;; AMD SSE4A instructions
9345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9347 (define_insn "sse4a_movnt<mode>"
9348 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9350 [(match_operand:MODEF 1 "register_operand" "x")]
9353 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9354 [(set_attr "type" "ssemov")
9355 (set_attr "mode" "<MODE>")])
9357 (define_insn "sse4a_vmmovnt<mode>"
9358 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9359 (unspec:<ssescalarmode>
9360 [(vec_select:<ssescalarmode>
9361 (match_operand:VF_128 1 "register_operand" "x")
9362 (parallel [(const_int 0)]))]
9365 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9366 [(set_attr "type" "ssemov")
9367 (set_attr "mode" "<ssescalarmode>")])
9369 (define_insn "sse4a_extrqi"
9370 [(set (match_operand:V2DI 0 "register_operand" "=x")
9371 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9372 (match_operand 2 "const_0_to_255_operand" "")
9373 (match_operand 3 "const_0_to_255_operand" "")]
9376 "extrq\t{%3, %2, %0|%0, %2, %3}"
9377 [(set_attr "type" "sse")
9378 (set_attr "prefix_data16" "1")
9379 (set_attr "length_immediate" "2")
9380 (set_attr "mode" "TI")])
9382 (define_insn "sse4a_extrq"
9383 [(set (match_operand:V2DI 0 "register_operand" "=x")
9384 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9385 (match_operand:V16QI 2 "register_operand" "x")]
9388 "extrq\t{%2, %0|%0, %2}"
9389 [(set_attr "type" "sse")
9390 (set_attr "prefix_data16" "1")
9391 (set_attr "mode" "TI")])
9393 (define_insn "sse4a_insertqi"
9394 [(set (match_operand:V2DI 0 "register_operand" "=x")
9395 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9396 (match_operand:V2DI 2 "register_operand" "x")
9397 (match_operand 3 "const_0_to_255_operand" "")
9398 (match_operand 4 "const_0_to_255_operand" "")]
9401 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9402 [(set_attr "type" "sseins")
9403 (set_attr "prefix_data16" "0")
9404 (set_attr "prefix_rep" "1")
9405 (set_attr "length_immediate" "2")
9406 (set_attr "mode" "TI")])
9408 (define_insn "sse4a_insertq"
9409 [(set (match_operand:V2DI 0 "register_operand" "=x")
9410 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9411 (match_operand:V2DI 2 "register_operand" "x")]
9414 "insertq\t{%2, %0|%0, %2}"
9415 [(set_attr "type" "sseins")
9416 (set_attr "prefix_data16" "0")
9417 (set_attr "prefix_rep" "1")
9418 (set_attr "mode" "TI")])
9420 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9422 ;; Intel SSE4.1 instructions
9424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9426 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9427 [(set (match_operand:VF 0 "register_operand" "=x,x")
9429 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9430 (match_operand:VF 1 "register_operand" "0,x")
9431 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9434 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9435 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9436 [(set_attr "isa" "noavx,avx")
9437 (set_attr "type" "ssemov")
9438 (set_attr "length_immediate" "1")
9439 (set_attr "prefix_data16" "1,*")
9440 (set_attr "prefix_extra" "1")
9441 (set_attr "prefix" "orig,vex")
9442 (set_attr "mode" "<MODE>")])
9444 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9445 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9447 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9448 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9449 (match_operand:VF 3 "register_operand" "Yz,x")]
9453 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9454 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9455 [(set_attr "isa" "noavx,avx")
9456 (set_attr "type" "ssemov")
9457 (set_attr "length_immediate" "1")
9458 (set_attr "prefix_data16" "1,*")
9459 (set_attr "prefix_extra" "1")
9460 (set_attr "prefix" "orig,vex")
9461 (set_attr "mode" "<MODE>")])
9463 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9464 [(set (match_operand:VF 0 "register_operand" "=x,x")
9466 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9467 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9468 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9472 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9473 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9474 [(set_attr "isa" "noavx,avx")
9475 (set_attr "type" "ssemul")
9476 (set_attr "length_immediate" "1")
9477 (set_attr "prefix_data16" "1,*")
9478 (set_attr "prefix_extra" "1")
9479 (set_attr "prefix" "orig,vex")
9480 (set_attr "mode" "<MODE>")])
9482 (define_insn "<sse4_1_avx2>_movntdqa"
9483 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9484 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9487 "%vmovntdqa\t{%1, %0|%0, %1}"
9488 [(set_attr "type" "ssemov")
9489 (set_attr "prefix_extra" "1")
9490 (set_attr "prefix" "maybe_vex")
9491 (set_attr "mode" "<sseinsnmode>")])
9493 (define_insn "<sse4_1_avx2>_mpsadbw"
9494 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9495 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9496 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9497 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9501 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9502 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9503 [(set_attr "isa" "noavx,avx")
9504 (set_attr "type" "sselog1")
9505 (set_attr "length_immediate" "1")
9506 (set_attr "prefix_extra" "1")
9507 (set_attr "prefix" "orig,vex")
9508 (set_attr "mode" "<sseinsnmode>")])
9510 (define_insn "avx2_packusdw"
9511 [(set (match_operand:V16HI 0 "register_operand" "=x")
9514 (match_operand:V8SI 1 "register_operand" "x"))
9516 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9518 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9519 [(set_attr "type" "sselog")
9520 (set_attr "prefix_extra" "1")
9521 (set_attr "prefix" "vex")
9522 (set_attr "mode" "OI")])
9524 (define_insn "sse4_1_packusdw"
9525 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9528 (match_operand:V4SI 1 "register_operand" "0,x"))
9530 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9533 packusdw\t{%2, %0|%0, %2}
9534 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9535 [(set_attr "isa" "noavx,avx")
9536 (set_attr "type" "sselog")
9537 (set_attr "prefix_extra" "1")
9538 (set_attr "prefix" "orig,vex")
9539 (set_attr "mode" "TI")])
9541 (define_insn "<sse4_1_avx2>_pblendvb"
9542 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9544 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9545 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9546 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9550 pblendvb\t{%3, %2, %0|%0, %2, %3}
9551 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9552 [(set_attr "isa" "noavx,avx")
9553 (set_attr "type" "ssemov")
9554 (set_attr "prefix_extra" "1")
9555 (set_attr "length_immediate" "*,1")
9556 (set_attr "prefix" "orig,vex")
9557 (set_attr "mode" "<sseinsnmode>")])
9559 (define_insn "sse4_1_pblendw"
9560 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9562 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9563 (match_operand:V8HI 1 "register_operand" "0,x")
9564 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9567 pblendw\t{%3, %2, %0|%0, %2, %3}
9568 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9569 [(set_attr "isa" "noavx,avx")
9570 (set_attr "type" "ssemov")
9571 (set_attr "prefix_extra" "1")
9572 (set_attr "length_immediate" "1")
9573 (set_attr "prefix" "orig,vex")
9574 (set_attr "mode" "TI")])
9576 ;; The builtin uses an 8-bit immediate. Expand that.
9577 (define_expand "avx2_pblendw"
9578 [(set (match_operand:V16HI 0 "register_operand" "")
9580 (match_operand:V16HI 2 "nonimmediate_operand" "")
9581 (match_operand:V16HI 1 "register_operand" "")
9582 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9585 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9586 operands[3] = GEN_INT (val << 8 | val);
9589 (define_insn "*avx2_pblendw"
9590 [(set (match_operand:V16HI 0 "register_operand" "=x")
9592 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9593 (match_operand:V16HI 1 "register_operand" "x")
9594 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9597 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9598 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9600 [(set_attr "type" "ssemov")
9601 (set_attr "prefix_extra" "1")
9602 (set_attr "length_immediate" "1")
9603 (set_attr "prefix" "vex")
9604 (set_attr "mode" "OI")])
9606 (define_insn "avx2_pblendd<mode>"
9607 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9609 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9610 (match_operand:VI4_AVX2 1 "register_operand" "x")
9611 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9613 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9614 [(set_attr "type" "ssemov")
9615 (set_attr "prefix_extra" "1")
9616 (set_attr "length_immediate" "1")
9617 (set_attr "prefix" "vex")
9618 (set_attr "mode" "<sseinsnmode>")])
9620 (define_insn "sse4_1_phminposuw"
9621 [(set (match_operand:V8HI 0 "register_operand" "=x")
9622 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9623 UNSPEC_PHMINPOSUW))]
9625 "%vphminposuw\t{%1, %0|%0, %1}"
9626 [(set_attr "type" "sselog1")
9627 (set_attr "prefix_extra" "1")
9628 (set_attr "prefix" "maybe_vex")
9629 (set_attr "mode" "TI")])
9631 (define_insn "avx2_<code>v16qiv16hi2"
9632 [(set (match_operand:V16HI 0 "register_operand" "=x")
9634 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9636 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9637 [(set_attr "type" "ssemov")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "prefix" "vex")
9640 (set_attr "mode" "OI")])
9642 (define_insn "sse4_1_<code>v8qiv8hi2"
9643 [(set (match_operand:V8HI 0 "register_operand" "=x")
9646 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9647 (parallel [(const_int 0)
9656 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9657 [(set_attr "type" "ssemov")
9658 (set_attr "prefix_extra" "1")
9659 (set_attr "prefix" "maybe_vex")
9660 (set_attr "mode" "TI")])
9662 (define_insn "avx2_<code>v8qiv8si2"
9663 [(set (match_operand:V8SI 0 "register_operand" "=x")
9666 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9667 (parallel [(const_int 0)
9676 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9677 [(set_attr "type" "ssemov")
9678 (set_attr "prefix_extra" "1")
9679 (set_attr "prefix" "vex")
9680 (set_attr "mode" "OI")])
9682 (define_insn "sse4_1_<code>v4qiv4si2"
9683 [(set (match_operand:V4SI 0 "register_operand" "=x")
9686 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9687 (parallel [(const_int 0)
9692 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9693 [(set_attr "type" "ssemov")
9694 (set_attr "prefix_extra" "1")
9695 (set_attr "prefix" "maybe_vex")
9696 (set_attr "mode" "TI")])
9698 (define_insn "avx2_<code>v8hiv8si2"
9699 [(set (match_operand:V8SI 0 "register_operand" "=x")
9701 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9703 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9704 [(set_attr "type" "ssemov")
9705 (set_attr "prefix_extra" "1")
9706 (set_attr "prefix" "vex")
9707 (set_attr "mode" "OI")])
9709 (define_insn "sse4_1_<code>v4hiv4si2"
9710 [(set (match_operand:V4SI 0 "register_operand" "=x")
9713 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9714 (parallel [(const_int 0)
9719 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9720 [(set_attr "type" "ssemov")
9721 (set_attr "prefix_extra" "1")
9722 (set_attr "prefix" "maybe_vex")
9723 (set_attr "mode" "TI")])
9725 (define_insn "avx2_<code>v4qiv4di2"
9726 [(set (match_operand:V4DI 0 "register_operand" "=x")
9729 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9730 (parallel [(const_int 0)
9735 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9736 [(set_attr "type" "ssemov")
9737 (set_attr "prefix_extra" "1")
9738 (set_attr "prefix" "vex")
9739 (set_attr "mode" "OI")])
9741 (define_insn "sse4_1_<code>v2qiv2di2"
9742 [(set (match_operand:V2DI 0 "register_operand" "=x")
9745 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9746 (parallel [(const_int 0)
9749 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9750 [(set_attr "type" "ssemov")
9751 (set_attr "prefix_extra" "1")
9752 (set_attr "prefix" "maybe_vex")
9753 (set_attr "mode" "TI")])
9755 (define_insn "avx2_<code>v4hiv4di2"
9756 [(set (match_operand:V4DI 0 "register_operand" "=x")
9759 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9760 (parallel [(const_int 0)
9765 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9766 [(set_attr "type" "ssemov")
9767 (set_attr "prefix_extra" "1")
9768 (set_attr "prefix" "vex")
9769 (set_attr "mode" "OI")])
9771 (define_insn "sse4_1_<code>v2hiv2di2"
9772 [(set (match_operand:V2DI 0 "register_operand" "=x")
9775 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9776 (parallel [(const_int 0)
9779 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9780 [(set_attr "type" "ssemov")
9781 (set_attr "prefix_extra" "1")
9782 (set_attr "prefix" "maybe_vex")
9783 (set_attr "mode" "TI")])
9785 (define_insn "avx2_<code>v4siv4di2"
9786 [(set (match_operand:V4DI 0 "register_operand" "=x")
9788 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9790 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9791 [(set_attr "type" "ssemov")
9792 (set_attr "prefix_extra" "1")
9793 (set_attr "mode" "OI")])
9795 (define_insn "sse4_1_<code>v2siv2di2"
9796 [(set (match_operand:V2DI 0 "register_operand" "=x")
9799 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9800 (parallel [(const_int 0)
9803 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9804 [(set_attr "type" "ssemov")
9805 (set_attr "prefix_extra" "1")
9806 (set_attr "prefix" "maybe_vex")
9807 (set_attr "mode" "TI")])
9809 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9810 ;; setting FLAGS_REG. But it is not a really compare instruction.
9811 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9812 [(set (reg:CC FLAGS_REG)
9813 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9814 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9817 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9818 [(set_attr "type" "ssecomi")
9819 (set_attr "prefix_extra" "1")
9820 (set_attr "prefix" "vex")
9821 (set_attr "mode" "<MODE>")])
9823 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9824 ;; But it is not a really compare instruction.
9825 (define_insn "avx_ptest256"
9826 [(set (reg:CC FLAGS_REG)
9827 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9828 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9831 "vptest\t{%1, %0|%0, %1}"
9832 [(set_attr "type" "ssecomi")
9833 (set_attr "prefix_extra" "1")
9834 (set_attr "prefix" "vex")
9835 (set_attr "mode" "OI")])
9837 (define_insn "sse4_1_ptest"
9838 [(set (reg:CC FLAGS_REG)
9839 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9840 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9843 "%vptest\t{%1, %0|%0, %1}"
9844 [(set_attr "type" "ssecomi")
9845 (set_attr "prefix_extra" "1")
9846 (set_attr "prefix" "maybe_vex")
9847 (set_attr "mode" "TI")])
9849 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9850 [(set (match_operand:VF 0 "register_operand" "=x")
9852 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9853 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9856 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9857 [(set_attr "type" "ssecvt")
9858 (set (attr "prefix_data16")
9860 (match_test "TARGET_AVX")
9862 (const_string "1")))
9863 (set_attr "prefix_extra" "1")
9864 (set_attr "length_immediate" "1")
9865 (set_attr "prefix" "maybe_vex")
9866 (set_attr "mode" "<MODE>")])
9868 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9869 [(match_operand:<sseintvecmode> 0 "register_operand" "")
9870 (match_operand:VF1 1 "nonimmediate_operand" "")
9871 (match_operand:SI 2 "const_0_to_15_operand" "")]
9874 rtx tmp = gen_reg_rtx (<MODE>mode);
9877 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9880 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9884 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9885 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9886 (match_operand:VF2 1 "nonimmediate_operand" "")
9887 (match_operand:VF2 2 "nonimmediate_operand" "")
9888 (match_operand:SI 3 "const_0_to_15_operand" "")]
9893 if (<MODE>mode == V2DFmode
9894 && TARGET_AVX && !TARGET_PREFER_AVX128)
9896 rtx tmp2 = gen_reg_rtx (V4DFmode);
9898 tmp0 = gen_reg_rtx (V4DFmode);
9899 tmp1 = force_reg (V2DFmode, operands[1]);
9901 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9902 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9903 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9907 tmp0 = gen_reg_rtx (<MODE>mode);
9908 tmp1 = gen_reg_rtx (<MODE>mode);
9911 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9914 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9917 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9922 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9923 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9926 [(match_operand:VF_128 2 "register_operand" "x,x")
9927 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9929 (match_operand:VF_128 1 "register_operand" "0,x")
9933 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9934 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9935 [(set_attr "isa" "noavx,avx")
9936 (set_attr "type" "ssecvt")
9937 (set_attr "length_immediate" "1")
9938 (set_attr "prefix_data16" "1,*")
9939 (set_attr "prefix_extra" "1")
9940 (set_attr "prefix" "orig,vex")
9941 (set_attr "mode" "<MODE>")])
9943 (define_expand "round<mode>2"
9946 (match_operand:VF 1 "register_operand" "")
9948 (set (match_operand:VF 0 "register_operand" "")
9950 [(match_dup 4) (match_dup 5)]
9952 "TARGET_ROUND && !flag_trapping_math"
9954 enum machine_mode scalar_mode;
9955 const struct real_format *fmt;
9956 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9959 scalar_mode = GET_MODE_INNER (<MODE>mode);
9961 /* load nextafter (0.5, 0.0) */
9962 fmt = REAL_MODE_FORMAT (scalar_mode);
9963 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9964 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9965 half = const_double_from_real_value (pred_half, scalar_mode);
9967 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9968 vec_half = force_reg (<MODE>mode, vec_half);
9970 operands[3] = gen_reg_rtx (<MODE>mode);
9971 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9973 operands[4] = gen_reg_rtx (<MODE>mode);
9974 operands[5] = GEN_INT (ROUND_TRUNC);
9977 (define_expand "round<mode>2_sfix"
9978 [(match_operand:<sseintvecmode> 0 "register_operand" "")
9979 (match_operand:VF1 1 "register_operand" "")]
9980 "TARGET_ROUND && !flag_trapping_math"
9982 rtx tmp = gen_reg_rtx (<MODE>mode);
9984 emit_insn (gen_round<mode>2 (tmp, operands[1]));
9987 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9991 (define_expand "round<mode>2_vec_pack_sfix"
9992 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9993 (match_operand:VF2 1 "register_operand" "")
9994 (match_operand:VF2 2 "register_operand" "")]
9995 "TARGET_ROUND && !flag_trapping_math"
9999 if (<MODE>mode == V2DFmode
10000 && TARGET_AVX && !TARGET_PREFER_AVX128)
10002 rtx tmp2 = gen_reg_rtx (V4DFmode);
10004 tmp0 = gen_reg_rtx (V4DFmode);
10005 tmp1 = force_reg (V2DFmode, operands[1]);
10007 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10008 emit_insn (gen_roundv4df2 (tmp2, tmp0));
10009 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10013 tmp0 = gen_reg_rtx (<MODE>mode);
10014 tmp1 = gen_reg_rtx (<MODE>mode);
10016 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10017 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10020 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10027 ;; Intel SSE4.2 string/text processing instructions
10029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10031 (define_insn_and_split "sse4_2_pcmpestr"
10032 [(set (match_operand:SI 0 "register_operand" "=c,c")
10034 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10035 (match_operand:SI 3 "register_operand" "a,a")
10036 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10037 (match_operand:SI 5 "register_operand" "d,d")
10038 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10040 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10048 (set (reg:CC FLAGS_REG)
10057 && can_create_pseudo_p ()"
10062 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10063 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10064 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10067 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10068 operands[3], operands[4],
10069 operands[5], operands[6]));
10071 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10072 operands[3], operands[4],
10073 operands[5], operands[6]));
10074 if (flags && !(ecx || xmm0))
10075 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10076 operands[2], operands[3],
10077 operands[4], operands[5],
10079 if (!(flags || ecx || xmm0))
10080 emit_note (NOTE_INSN_DELETED);
10084 [(set_attr "type" "sselog")
10085 (set_attr "prefix_data16" "1")
10086 (set_attr "prefix_extra" "1")
10087 (set_attr "length_immediate" "1")
10088 (set_attr "memory" "none,load")
10089 (set_attr "mode" "TI")])
10091 (define_insn "sse4_2_pcmpestri"
10092 [(set (match_operand:SI 0 "register_operand" "=c,c")
10094 [(match_operand:V16QI 1 "register_operand" "x,x")
10095 (match_operand:SI 2 "register_operand" "a,a")
10096 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10097 (match_operand:SI 4 "register_operand" "d,d")
10098 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10100 (set (reg:CC FLAGS_REG)
10109 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10110 [(set_attr "type" "sselog")
10111 (set_attr "prefix_data16" "1")
10112 (set_attr "prefix_extra" "1")
10113 (set_attr "prefix" "maybe_vex")
10114 (set_attr "length_immediate" "1")
10115 (set_attr "memory" "none,load")
10116 (set_attr "mode" "TI")])
10118 (define_insn "sse4_2_pcmpestrm"
10119 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10121 [(match_operand:V16QI 1 "register_operand" "x,x")
10122 (match_operand:SI 2 "register_operand" "a,a")
10123 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10124 (match_operand:SI 4 "register_operand" "d,d")
10125 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10127 (set (reg:CC FLAGS_REG)
10136 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10137 [(set_attr "type" "sselog")
10138 (set_attr "prefix_data16" "1")
10139 (set_attr "prefix_extra" "1")
10140 (set_attr "length_immediate" "1")
10141 (set_attr "prefix" "maybe_vex")
10142 (set_attr "memory" "none,load")
10143 (set_attr "mode" "TI")])
10145 (define_insn "sse4_2_pcmpestr_cconly"
10146 [(set (reg:CC FLAGS_REG)
10148 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10149 (match_operand:SI 3 "register_operand" "a,a,a,a")
10150 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10151 (match_operand:SI 5 "register_operand" "d,d,d,d")
10152 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10154 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10155 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10158 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10159 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10160 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10161 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10162 [(set_attr "type" "sselog")
10163 (set_attr "prefix_data16" "1")
10164 (set_attr "prefix_extra" "1")
10165 (set_attr "length_immediate" "1")
10166 (set_attr "memory" "none,load,none,load")
10167 (set_attr "prefix" "maybe_vex")
10168 (set_attr "mode" "TI")])
10170 (define_insn_and_split "sse4_2_pcmpistr"
10171 [(set (match_operand:SI 0 "register_operand" "=c,c")
10173 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10174 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10175 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10177 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10183 (set (reg:CC FLAGS_REG)
10190 && can_create_pseudo_p ()"
10195 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10196 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10197 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10200 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10201 operands[3], operands[4]));
10203 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10204 operands[3], operands[4]));
10205 if (flags && !(ecx || xmm0))
10206 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10207 operands[2], operands[3],
10209 if (!(flags || ecx || xmm0))
10210 emit_note (NOTE_INSN_DELETED);
10214 [(set_attr "type" "sselog")
10215 (set_attr "prefix_data16" "1")
10216 (set_attr "prefix_extra" "1")
10217 (set_attr "length_immediate" "1")
10218 (set_attr "memory" "none,load")
10219 (set_attr "mode" "TI")])
10221 (define_insn "sse4_2_pcmpistri"
10222 [(set (match_operand:SI 0 "register_operand" "=c,c")
10224 [(match_operand:V16QI 1 "register_operand" "x,x")
10225 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10226 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10228 (set (reg:CC FLAGS_REG)
10235 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10236 [(set_attr "type" "sselog")
10237 (set_attr "prefix_data16" "1")
10238 (set_attr "prefix_extra" "1")
10239 (set_attr "length_immediate" "1")
10240 (set_attr "prefix" "maybe_vex")
10241 (set_attr "memory" "none,load")
10242 (set_attr "mode" "TI")])
10244 (define_insn "sse4_2_pcmpistrm"
10245 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10247 [(match_operand:V16QI 1 "register_operand" "x,x")
10248 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10249 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10251 (set (reg:CC FLAGS_REG)
10258 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10259 [(set_attr "type" "sselog")
10260 (set_attr "prefix_data16" "1")
10261 (set_attr "prefix_extra" "1")
10262 (set_attr "length_immediate" "1")
10263 (set_attr "prefix" "maybe_vex")
10264 (set_attr "memory" "none,load")
10265 (set_attr "mode" "TI")])
10267 (define_insn "sse4_2_pcmpistr_cconly"
10268 [(set (reg:CC FLAGS_REG)
10270 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10271 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10272 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10274 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10275 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10278 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10279 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10280 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10281 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10282 [(set_attr "type" "sselog")
10283 (set_attr "prefix_data16" "1")
10284 (set_attr "prefix_extra" "1")
10285 (set_attr "length_immediate" "1")
10286 (set_attr "memory" "none,load,none,load")
10287 (set_attr "prefix" "maybe_vex")
10288 (set_attr "mode" "TI")])
10290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10292 ;; XOP instructions
10294 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10296 ;; XOP parallel integer multiply/add instructions.
10297 ;; Note the XOP multiply/add instructions
10298 ;; a[i] = b[i] * c[i] + d[i];
10299 ;; do not allow the value being added to be a memory operation.
10300 (define_insn "xop_pmacsww"
10301 [(set (match_operand:V8HI 0 "register_operand" "=x")
10304 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10305 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10306 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10308 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10309 [(set_attr "type" "ssemuladd")
10310 (set_attr "mode" "TI")])
10312 (define_insn "xop_pmacssww"
10313 [(set (match_operand:V8HI 0 "register_operand" "=x")
10315 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10316 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10317 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10319 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10320 [(set_attr "type" "ssemuladd")
10321 (set_attr "mode" "TI")])
10323 (define_insn "xop_pmacsdd"
10324 [(set (match_operand:V4SI 0 "register_operand" "=x")
10327 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10328 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10329 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10331 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10332 [(set_attr "type" "ssemuladd")
10333 (set_attr "mode" "TI")])
10335 (define_insn "xop_pmacssdd"
10336 [(set (match_operand:V4SI 0 "register_operand" "=x")
10338 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10339 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10340 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10342 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10343 [(set_attr "type" "ssemuladd")
10344 (set_attr "mode" "TI")])
10346 (define_insn "xop_pmacssdql"
10347 [(set (match_operand:V2DI 0 "register_operand" "=x")
10352 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10353 (parallel [(const_int 0)
10356 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10357 (parallel [(const_int 0)
10359 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10361 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10362 [(set_attr "type" "ssemuladd")
10363 (set_attr "mode" "TI")])
10365 (define_insn "xop_pmacssdqh"
10366 [(set (match_operand:V2DI 0 "register_operand" "=x")
10371 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10372 (parallel [(const_int 1)
10376 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10377 (parallel [(const_int 1)
10379 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10381 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10382 [(set_attr "type" "ssemuladd")
10383 (set_attr "mode" "TI")])
10385 (define_insn "xop_pmacsdql"
10386 [(set (match_operand:V2DI 0 "register_operand" "=x")
10391 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10392 (parallel [(const_int 0)
10396 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10397 (parallel [(const_int 0)
10399 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10401 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10402 [(set_attr "type" "ssemuladd")
10403 (set_attr "mode" "TI")])
10405 (define_insn "xop_pmacsdqh"
10406 [(set (match_operand:V2DI 0 "register_operand" "=x")
10411 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10412 (parallel [(const_int 1)
10416 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10417 (parallel [(const_int 1)
10419 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10421 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10422 [(set_attr "type" "ssemuladd")
10423 (set_attr "mode" "TI")])
10425 ;; XOP parallel integer multiply/add instructions for the intrinisics
10426 (define_insn "xop_pmacsswd"
10427 [(set (match_operand:V4SI 0 "register_operand" "=x")
10432 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10433 (parallel [(const_int 1)
10439 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10440 (parallel [(const_int 1)
10444 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10446 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10447 [(set_attr "type" "ssemuladd")
10448 (set_attr "mode" "TI")])
10450 (define_insn "xop_pmacswd"
10451 [(set (match_operand:V4SI 0 "register_operand" "=x")
10456 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10457 (parallel [(const_int 1)
10463 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10464 (parallel [(const_int 1)
10468 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10470 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10471 [(set_attr "type" "ssemuladd")
10472 (set_attr "mode" "TI")])
10474 (define_insn "xop_pmadcsswd"
10475 [(set (match_operand:V4SI 0 "register_operand" "=x")
10481 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10482 (parallel [(const_int 0)
10488 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10489 (parallel [(const_int 0)
10497 (parallel [(const_int 1)
10504 (parallel [(const_int 1)
10507 (const_int 7)])))))
10508 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10510 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10511 [(set_attr "type" "ssemuladd")
10512 (set_attr "mode" "TI")])
10514 (define_insn "xop_pmadcswd"
10515 [(set (match_operand:V4SI 0 "register_operand" "=x")
10521 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10522 (parallel [(const_int 0)
10528 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10529 (parallel [(const_int 0)
10537 (parallel [(const_int 1)
10544 (parallel [(const_int 1)
10547 (const_int 7)])))))
10548 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10550 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10551 [(set_attr "type" "ssemuladd")
10552 (set_attr "mode" "TI")])
10554 ;; XOP parallel XMM conditional moves
10555 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10556 [(set (match_operand:V 0 "register_operand" "=x,x")
10558 (match_operand:V 3 "nonimmediate_operand" "x,m")
10559 (match_operand:V 1 "register_operand" "x,x")
10560 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10562 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10563 [(set_attr "type" "sse4arg")])
10565 ;; XOP horizontal add/subtract instructions
10566 (define_insn "xop_phaddbw"
10567 [(set (match_operand:V8HI 0 "register_operand" "=x")
10571 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10572 (parallel [(const_int 0)
10583 (parallel [(const_int 1)
10590 (const_int 15)])))))]
10592 "vphaddbw\t{%1, %0|%0, %1}"
10593 [(set_attr "type" "sseiadd1")])
10595 (define_insn "xop_phaddbd"
10596 [(set (match_operand:V4SI 0 "register_operand" "=x")
10601 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10602 (parallel [(const_int 0)
10609 (parallel [(const_int 1)
10612 (const_int 13)]))))
10617 (parallel [(const_int 2)
10624 (parallel [(const_int 3)
10627 (const_int 15)]))))))]
10629 "vphaddbd\t{%1, %0|%0, %1}"
10630 [(set_attr "type" "sseiadd1")])
10632 (define_insn "xop_phaddbq"
10633 [(set (match_operand:V2DI 0 "register_operand" "=x")
10639 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10640 (parallel [(const_int 0)
10645 (parallel [(const_int 1)
10651 (parallel [(const_int 2)
10656 (parallel [(const_int 3)
10657 (const_int 11)])))))
10663 (parallel [(const_int 4)
10668 (parallel [(const_int 5)
10669 (const_int 13)]))))
10674 (parallel [(const_int 6)
10679 (parallel [(const_int 7)
10680 (const_int 15)])))))))]
10682 "vphaddbq\t{%1, %0|%0, %1}"
10683 [(set_attr "type" "sseiadd1")])
10685 (define_insn "xop_phaddwd"
10686 [(set (match_operand:V4SI 0 "register_operand" "=x")
10690 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10691 (parallel [(const_int 0)
10698 (parallel [(const_int 1)
10701 (const_int 7)])))))]
10703 "vphaddwd\t{%1, %0|%0, %1}"
10704 [(set_attr "type" "sseiadd1")])
10706 (define_insn "xop_phaddwq"
10707 [(set (match_operand:V2DI 0 "register_operand" "=x")
10712 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10713 (parallel [(const_int 0)
10718 (parallel [(const_int 1)
10724 (parallel [(const_int 2)
10729 (parallel [(const_int 3)
10730 (const_int 7)]))))))]
10732 "vphaddwq\t{%1, %0|%0, %1}"
10733 [(set_attr "type" "sseiadd1")])
10735 (define_insn "xop_phadddq"
10736 [(set (match_operand:V2DI 0 "register_operand" "=x")
10740 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10741 (parallel [(const_int 0)
10746 (parallel [(const_int 1)
10747 (const_int 3)])))))]
10749 "vphadddq\t{%1, %0|%0, %1}"
10750 [(set_attr "type" "sseiadd1")])
10752 (define_insn "xop_phaddubw"
10753 [(set (match_operand:V8HI 0 "register_operand" "=x")
10757 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10758 (parallel [(const_int 0)
10769 (parallel [(const_int 1)
10776 (const_int 15)])))))]
10778 "vphaddubw\t{%1, %0|%0, %1}"
10779 [(set_attr "type" "sseiadd1")])
10781 (define_insn "xop_phaddubd"
10782 [(set (match_operand:V4SI 0 "register_operand" "=x")
10787 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10788 (parallel [(const_int 0)
10795 (parallel [(const_int 1)
10798 (const_int 13)]))))
10803 (parallel [(const_int 2)
10810 (parallel [(const_int 3)
10813 (const_int 15)]))))))]
10815 "vphaddubd\t{%1, %0|%0, %1}"
10816 [(set_attr "type" "sseiadd1")])
10818 (define_insn "xop_phaddubq"
10819 [(set (match_operand:V2DI 0 "register_operand" "=x")
10825 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10826 (parallel [(const_int 0)
10831 (parallel [(const_int 1)
10837 (parallel [(const_int 2)
10842 (parallel [(const_int 3)
10843 (const_int 11)])))))
10849 (parallel [(const_int 4)
10854 (parallel [(const_int 5)
10855 (const_int 13)]))))
10860 (parallel [(const_int 6)
10865 (parallel [(const_int 7)
10866 (const_int 15)])))))))]
10868 "vphaddubq\t{%1, %0|%0, %1}"
10869 [(set_attr "type" "sseiadd1")])
10871 (define_insn "xop_phadduwd"
10872 [(set (match_operand:V4SI 0 "register_operand" "=x")
10876 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10877 (parallel [(const_int 0)
10884 (parallel [(const_int 1)
10887 (const_int 7)])))))]
10889 "vphadduwd\t{%1, %0|%0, %1}"
10890 [(set_attr "type" "sseiadd1")])
10892 (define_insn "xop_phadduwq"
10893 [(set (match_operand:V2DI 0 "register_operand" "=x")
10898 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10899 (parallel [(const_int 0)
10904 (parallel [(const_int 1)
10910 (parallel [(const_int 2)
10915 (parallel [(const_int 3)
10916 (const_int 7)]))))))]
10918 "vphadduwq\t{%1, %0|%0, %1}"
10919 [(set_attr "type" "sseiadd1")])
10921 (define_insn "xop_phaddudq"
10922 [(set (match_operand:V2DI 0 "register_operand" "=x")
10926 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10927 (parallel [(const_int 0)
10932 (parallel [(const_int 1)
10933 (const_int 3)])))))]
10935 "vphaddudq\t{%1, %0|%0, %1}"
10936 [(set_attr "type" "sseiadd1")])
10938 (define_insn "xop_phsubbw"
10939 [(set (match_operand:V8HI 0 "register_operand" "=x")
10943 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10944 (parallel [(const_int 0)
10955 (parallel [(const_int 1)
10962 (const_int 15)])))))]
10964 "vphsubbw\t{%1, %0|%0, %1}"
10965 [(set_attr "type" "sseiadd1")])
10967 (define_insn "xop_phsubwd"
10968 [(set (match_operand:V4SI 0 "register_operand" "=x")
10972 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10973 (parallel [(const_int 0)
10980 (parallel [(const_int 1)
10983 (const_int 7)])))))]
10985 "vphsubwd\t{%1, %0|%0, %1}"
10986 [(set_attr "type" "sseiadd1")])
10988 (define_insn "xop_phsubdq"
10989 [(set (match_operand:V2DI 0 "register_operand" "=x")
10993 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10994 (parallel [(const_int 0)
10999 (parallel [(const_int 1)
11000 (const_int 3)])))))]
11002 "vphsubdq\t{%1, %0|%0, %1}"
11003 [(set_attr "type" "sseiadd1")])
11005 ;; XOP permute instructions
11006 (define_insn "xop_pperm"
11007 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11009 [(match_operand:V16QI 1 "register_operand" "x,x")
11010 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11011 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11012 UNSPEC_XOP_PERMUTE))]
11013 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11014 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11015 [(set_attr "type" "sse4arg")
11016 (set_attr "mode" "TI")])
11018 ;; XOP pack instructions that combine two vectors into a smaller vector
11019 (define_insn "xop_pperm_pack_v2di_v4si"
11020 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11023 (match_operand:V2DI 1 "register_operand" "x,x"))
11025 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11026 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11027 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11028 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11029 [(set_attr "type" "sse4arg")
11030 (set_attr "mode" "TI")])
11032 (define_insn "xop_pperm_pack_v4si_v8hi"
11033 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11036 (match_operand:V4SI 1 "register_operand" "x,x"))
11038 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11039 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11040 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11041 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11042 [(set_attr "type" "sse4arg")
11043 (set_attr "mode" "TI")])
11045 (define_insn "xop_pperm_pack_v8hi_v16qi"
11046 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11049 (match_operand:V8HI 1 "register_operand" "x,x"))
11051 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11052 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11053 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11054 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11055 [(set_attr "type" "sse4arg")
11056 (set_attr "mode" "TI")])
11058 ;; XOP packed rotate instructions
11059 (define_expand "rotl<mode>3"
11060 [(set (match_operand:VI_128 0 "register_operand" "")
11062 (match_operand:VI_128 1 "nonimmediate_operand" "")
11063 (match_operand:SI 2 "general_operand")))]
11066 /* If we were given a scalar, convert it to parallel */
11067 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11069 rtvec vs = rtvec_alloc (<ssescalarnum>);
11070 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11071 rtx reg = gen_reg_rtx (<MODE>mode);
11072 rtx op2 = operands[2];
11075 if (GET_MODE (op2) != <ssescalarmode>mode)
11077 op2 = gen_reg_rtx (<ssescalarmode>mode);
11078 convert_move (op2, operands[2], false);
11081 for (i = 0; i < <ssescalarnum>; i++)
11082 RTVEC_ELT (vs, i) = op2;
11084 emit_insn (gen_vec_init<mode> (reg, par));
11085 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11090 (define_expand "rotr<mode>3"
11091 [(set (match_operand:VI_128 0 "register_operand" "")
11093 (match_operand:VI_128 1 "nonimmediate_operand" "")
11094 (match_operand:SI 2 "general_operand")))]
11097 /* If we were given a scalar, convert it to parallel */
11098 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11100 rtvec vs = rtvec_alloc (<ssescalarnum>);
11101 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11102 rtx neg = gen_reg_rtx (<MODE>mode);
11103 rtx reg = gen_reg_rtx (<MODE>mode);
11104 rtx op2 = operands[2];
11107 if (GET_MODE (op2) != <ssescalarmode>mode)
11109 op2 = gen_reg_rtx (<ssescalarmode>mode);
11110 convert_move (op2, operands[2], false);
11113 for (i = 0; i < <ssescalarnum>; i++)
11114 RTVEC_ELT (vs, i) = op2;
11116 emit_insn (gen_vec_init<mode> (reg, par));
11117 emit_insn (gen_neg<mode>2 (neg, reg));
11118 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11123 (define_insn "xop_rotl<mode>3"
11124 [(set (match_operand:VI_128 0 "register_operand" "=x")
11126 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11127 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11129 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11130 [(set_attr "type" "sseishft")
11131 (set_attr "length_immediate" "1")
11132 (set_attr "mode" "TI")])
11134 (define_insn "xop_rotr<mode>3"
11135 [(set (match_operand:VI_128 0 "register_operand" "=x")
11137 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11138 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11141 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11142 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11144 [(set_attr "type" "sseishft")
11145 (set_attr "length_immediate" "1")
11146 (set_attr "mode" "TI")])
11148 (define_expand "vrotr<mode>3"
11149 [(match_operand:VI_128 0 "register_operand" "")
11150 (match_operand:VI_128 1 "register_operand" "")
11151 (match_operand:VI_128 2 "register_operand" "")]
11154 rtx reg = gen_reg_rtx (<MODE>mode);
11155 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11156 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11160 (define_expand "vrotl<mode>3"
11161 [(match_operand:VI_128 0 "register_operand" "")
11162 (match_operand:VI_128 1 "register_operand" "")
11163 (match_operand:VI_128 2 "register_operand" "")]
11166 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11170 (define_insn "xop_vrotl<mode>3"
11171 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11172 (if_then_else:VI_128
11174 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11177 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11181 (neg:VI_128 (match_dup 2)))))]
11182 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11183 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11184 [(set_attr "type" "sseishft")
11185 (set_attr "prefix_data16" "0")
11186 (set_attr "prefix_extra" "2")
11187 (set_attr "mode" "TI")])
11189 ;; XOP packed shift instructions.
11190 (define_expand "vlshr<mode>3"
11191 [(set (match_operand:VI12_128 0 "register_operand" "")
11193 (match_operand:VI12_128 1 "register_operand" "")
11194 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11197 rtx neg = gen_reg_rtx (<MODE>mode);
11198 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11199 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11203 (define_expand "vlshr<mode>3"
11204 [(set (match_operand:VI48_128 0 "register_operand" "")
11206 (match_operand:VI48_128 1 "register_operand" "")
11207 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11208 "TARGET_AVX2 || TARGET_XOP"
11212 rtx neg = gen_reg_rtx (<MODE>mode);
11213 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11214 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11219 (define_expand "vlshr<mode>3"
11220 [(set (match_operand:VI48_256 0 "register_operand" "")
11222 (match_operand:VI48_256 1 "register_operand" "")
11223 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11226 (define_expand "vashr<mode>3"
11227 [(set (match_operand:VI128_128 0 "register_operand" "")
11228 (ashiftrt:VI128_128
11229 (match_operand:VI128_128 1 "register_operand" "")
11230 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11233 rtx neg = gen_reg_rtx (<MODE>mode);
11234 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11235 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11239 (define_expand "vashrv4si3"
11240 [(set (match_operand:V4SI 0 "register_operand" "")
11241 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11242 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11243 "TARGET_AVX2 || TARGET_XOP"
11247 rtx neg = gen_reg_rtx (V4SImode);
11248 emit_insn (gen_negv4si2 (neg, operands[2]));
11249 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11254 (define_expand "vashrv8si3"
11255 [(set (match_operand:V8SI 0 "register_operand" "")
11256 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11257 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11260 (define_expand "vashl<mode>3"
11261 [(set (match_operand:VI12_128 0 "register_operand" "")
11263 (match_operand:VI12_128 1 "register_operand" "")
11264 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11267 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11271 (define_expand "vashl<mode>3"
11272 [(set (match_operand:VI48_128 0 "register_operand" "")
11274 (match_operand:VI48_128 1 "register_operand" "")
11275 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11276 "TARGET_AVX2 || TARGET_XOP"
11280 operands[2] = force_reg (<MODE>mode, operands[2]);
11281 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11286 (define_expand "vashl<mode>3"
11287 [(set (match_operand:VI48_256 0 "register_operand" "")
11289 (match_operand:VI48_256 1 "register_operand" "")
11290 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11293 (define_insn "xop_sha<mode>3"
11294 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11295 (if_then_else:VI_128
11297 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11300 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11304 (neg:VI_128 (match_dup 2)))))]
11305 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11306 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11307 [(set_attr "type" "sseishft")
11308 (set_attr "prefix_data16" "0")
11309 (set_attr "prefix_extra" "2")
11310 (set_attr "mode" "TI")])
11312 (define_insn "xop_shl<mode>3"
11313 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11314 (if_then_else:VI_128
11316 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11319 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11323 (neg:VI_128 (match_dup 2)))))]
11324 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11325 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11326 [(set_attr "type" "sseishft")
11327 (set_attr "prefix_data16" "0")
11328 (set_attr "prefix_extra" "2")
11329 (set_attr "mode" "TI")])
11331 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11332 (define_expand "ashlv16qi3"
11333 [(set (match_operand:V16QI 0 "register_operand" "")
11335 (match_operand:V16QI 1 "register_operand" "")
11336 (match_operand:SI 2 "nonmemory_operand" "")))]
11339 rtx reg = gen_reg_rtx (V16QImode);
11343 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11344 for (i = 0; i < 16; i++)
11345 XVECEXP (par, 0, i) = operands[2];
11347 emit_insn (gen_vec_initv16qi (reg, par));
11348 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11352 (define_expand "<shift_insn>v16qi3"
11353 [(set (match_operand:V16QI 0 "register_operand" "")
11355 (match_operand:V16QI 1 "register_operand" "")
11356 (match_operand:SI 2 "nonmemory_operand" "")))]
11359 rtx reg = gen_reg_rtx (V16QImode);
11361 bool negate = false;
11362 rtx (*shift_insn)(rtx, rtx, rtx);
11365 if (CONST_INT_P (operands[2]))
11366 operands[2] = GEN_INT (-INTVAL (operands[2]));
11370 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11371 for (i = 0; i < 16; i++)
11372 XVECEXP (par, 0, i) = operands[2];
11374 emit_insn (gen_vec_initv16qi (reg, par));
11377 emit_insn (gen_negv16qi2 (reg, reg));
11379 if (<CODE> == LSHIFTRT)
11380 shift_insn = gen_xop_shlv16qi3;
11382 shift_insn = gen_xop_shav16qi3;
11384 emit_insn (shift_insn (operands[0], operands[1], reg));
11388 (define_expand "ashrv2di3"
11389 [(set (match_operand:V2DI 0 "register_operand" "")
11391 (match_operand:V2DI 1 "register_operand" "")
11392 (match_operand:DI 2 "nonmemory_operand" "")))]
11395 rtx reg = gen_reg_rtx (V2DImode);
11397 bool negate = false;
11400 if (CONST_INT_P (operands[2]))
11401 operands[2] = GEN_INT (-INTVAL (operands[2]));
11405 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11406 for (i = 0; i < 2; i++)
11407 XVECEXP (par, 0, i) = operands[2];
11409 emit_insn (gen_vec_initv2di (reg, par));
11412 emit_insn (gen_negv2di2 (reg, reg));
11414 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11418 ;; XOP FRCZ support
11419 (define_insn "xop_frcz<mode>2"
11420 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11422 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11425 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11426 [(set_attr "type" "ssecvt1")
11427 (set_attr "mode" "<MODE>")])
11430 (define_expand "xop_vmfrcz<mode>2"
11431 [(set (match_operand:VF_128 0 "register_operand")
11434 [(match_operand:VF_128 1 "nonimmediate_operand")]
11440 operands[3] = CONST0_RTX (<MODE>mode);
11443 (define_insn "*xop_vmfrcz_<mode>"
11444 [(set (match_operand:VF_128 0 "register_operand" "=x")
11447 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11449 (match_operand:VF_128 2 "const0_operand")
11452 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11453 [(set_attr "type" "ssecvt1")
11454 (set_attr "mode" "<MODE>")])
11456 (define_insn "xop_maskcmp<mode>3"
11457 [(set (match_operand:VI_128 0 "register_operand" "=x")
11458 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11459 [(match_operand:VI_128 2 "register_operand" "x")
11460 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11462 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11463 [(set_attr "type" "sse4arg")
11464 (set_attr "prefix_data16" "0")
11465 (set_attr "prefix_rep" "0")
11466 (set_attr "prefix_extra" "2")
11467 (set_attr "length_immediate" "1")
11468 (set_attr "mode" "TI")])
11470 (define_insn "xop_maskcmp_uns<mode>3"
11471 [(set (match_operand:VI_128 0 "register_operand" "=x")
11472 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11473 [(match_operand:VI_128 2 "register_operand" "x")
11474 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11476 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11477 [(set_attr "type" "ssecmp")
11478 (set_attr "prefix_data16" "0")
11479 (set_attr "prefix_rep" "0")
11480 (set_attr "prefix_extra" "2")
11481 (set_attr "length_immediate" "1")
11482 (set_attr "mode" "TI")])
11484 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11485 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11486 ;; the exact instruction generated for the intrinsic.
11487 (define_insn "xop_maskcmp_uns2<mode>3"
11488 [(set (match_operand:VI_128 0 "register_operand" "=x")
11490 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11491 [(match_operand:VI_128 2 "register_operand" "x")
11492 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11493 UNSPEC_XOP_UNSIGNED_CMP))]
11495 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11496 [(set_attr "type" "ssecmp")
11497 (set_attr "prefix_data16" "0")
11498 (set_attr "prefix_extra" "2")
11499 (set_attr "length_immediate" "1")
11500 (set_attr "mode" "TI")])
11502 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11503 ;; being added here to be complete.
11504 (define_insn "xop_pcom_tf<mode>3"
11505 [(set (match_operand:VI_128 0 "register_operand" "=x")
11507 [(match_operand:VI_128 1 "register_operand" "x")
11508 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11509 (match_operand:SI 3 "const_int_operand" "n")]
11510 UNSPEC_XOP_TRUEFALSE))]
11513 return ((INTVAL (operands[3]) != 0)
11514 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11515 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11517 [(set_attr "type" "ssecmp")
11518 (set_attr "prefix_data16" "0")
11519 (set_attr "prefix_extra" "2")
11520 (set_attr "length_immediate" "1")
11521 (set_attr "mode" "TI")])
11523 (define_insn "xop_vpermil2<mode>3"
11524 [(set (match_operand:VF 0 "register_operand" "=x")
11526 [(match_operand:VF 1 "register_operand" "x")
11527 (match_operand:VF 2 "nonimmediate_operand" "%x")
11528 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11529 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11532 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11533 [(set_attr "type" "sse4arg")
11534 (set_attr "length_immediate" "1")
11535 (set_attr "mode" "<MODE>")])
11537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11539 (define_insn "aesenc"
11540 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11541 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11542 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11546 aesenc\t{%2, %0|%0, %2}
11547 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11548 [(set_attr "isa" "noavx,avx")
11549 (set_attr "type" "sselog1")
11550 (set_attr "prefix_extra" "1")
11551 (set_attr "prefix" "orig,vex")
11552 (set_attr "mode" "TI")])
11554 (define_insn "aesenclast"
11555 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11556 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11557 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11558 UNSPEC_AESENCLAST))]
11561 aesenclast\t{%2, %0|%0, %2}
11562 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11563 [(set_attr "isa" "noavx,avx")
11564 (set_attr "type" "sselog1")
11565 (set_attr "prefix_extra" "1")
11566 (set_attr "prefix" "orig,vex")
11567 (set_attr "mode" "TI")])
11569 (define_insn "aesdec"
11570 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11571 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11572 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11576 aesdec\t{%2, %0|%0, %2}
11577 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11578 [(set_attr "isa" "noavx,avx")
11579 (set_attr "type" "sselog1")
11580 (set_attr "prefix_extra" "1")
11581 (set_attr "prefix" "orig,vex")
11582 (set_attr "mode" "TI")])
11584 (define_insn "aesdeclast"
11585 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11586 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11587 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11588 UNSPEC_AESDECLAST))]
11591 aesdeclast\t{%2, %0|%0, %2}
11592 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11593 [(set_attr "isa" "noavx,avx")
11594 (set_attr "type" "sselog1")
11595 (set_attr "prefix_extra" "1")
11596 (set_attr "prefix" "orig,vex")
11597 (set_attr "mode" "TI")])
11599 (define_insn "aesimc"
11600 [(set (match_operand:V2DI 0 "register_operand" "=x")
11601 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11604 "%vaesimc\t{%1, %0|%0, %1}"
11605 [(set_attr "type" "sselog1")
11606 (set_attr "prefix_extra" "1")
11607 (set_attr "prefix" "maybe_vex")
11608 (set_attr "mode" "TI")])
11610 (define_insn "aeskeygenassist"
11611 [(set (match_operand:V2DI 0 "register_operand" "=x")
11612 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11613 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11614 UNSPEC_AESKEYGENASSIST))]
11616 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11617 [(set_attr "type" "sselog1")
11618 (set_attr "prefix_extra" "1")
11619 (set_attr "length_immediate" "1")
11620 (set_attr "prefix" "maybe_vex")
11621 (set_attr "mode" "TI")])
11623 (define_insn "pclmulqdq"
11624 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11625 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11626 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11627 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11631 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11632 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11633 [(set_attr "isa" "noavx,avx")
11634 (set_attr "type" "sselog1")
11635 (set_attr "prefix_extra" "1")
11636 (set_attr "length_immediate" "1")
11637 (set_attr "prefix" "orig,vex")
11638 (set_attr "mode" "TI")])
11640 (define_expand "avx_vzeroall"
11641 [(match_par_dup 0 [(const_int 0)])]
11644 int nregs = TARGET_64BIT ? 16 : 8;
11647 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11649 XVECEXP (operands[0], 0, 0)
11650 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11653 for (regno = 0; regno < nregs; regno++)
11654 XVECEXP (operands[0], 0, regno + 1)
11655 = gen_rtx_SET (VOIDmode,
11656 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11657 CONST0_RTX (V8SImode));
11660 (define_insn "*avx_vzeroall"
11661 [(match_parallel 0 "vzeroall_operation"
11662 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11665 [(set_attr "type" "sse")
11666 (set_attr "modrm" "0")
11667 (set_attr "memory" "none")
11668 (set_attr "prefix" "vex")
11669 (set_attr "mode" "OI")])
11671 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11672 ;; if the upper 128bits are unused.
11673 (define_insn "avx_vzeroupper"
11674 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11675 UNSPECV_VZEROUPPER)]
11678 [(set_attr "type" "sse")
11679 (set_attr "modrm" "0")
11680 (set_attr "memory" "none")
11681 (set_attr "prefix" "vex")
11682 (set_attr "mode" "OI")])
11684 (define_mode_attr AVXTOSSEMODE
11685 [(V4DI "V2DI") (V2DI "V2DI")
11686 (V8SI "V4SI") (V4SI "V4SI")
11687 (V16HI "V8HI") (V8HI "V8HI")
11688 (V32QI "V16QI") (V16QI "V16QI")])
11690 (define_insn "avx2_pbroadcast<mode>"
11691 [(set (match_operand:VI 0 "register_operand" "=x")
11693 (vec_select:<ssescalarmode>
11694 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11695 (parallel [(const_int 0)]))))]
11697 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11698 [(set_attr "type" "ssemov")
11699 (set_attr "prefix_extra" "1")
11700 (set_attr "prefix" "vex")
11701 (set_attr "mode" "<sseinsnmode>")])
11703 (define_insn "avx2_permvarv8si"
11704 [(set (match_operand:V8SI 0 "register_operand" "=x")
11706 [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
11707 (match_operand:V8SI 2 "register_operand" "x")]
11710 "vpermd\t{%1, %2, %0|%0, %2, %1}"
11711 [(set_attr "type" "sselog")
11712 (set_attr "prefix" "vex")
11713 (set_attr "mode" "OI")])
11715 (define_insn "avx2_permv4df"
11716 [(set (match_operand:V4DF 0 "register_operand" "=x")
11718 [(match_operand:V4DF 1 "register_operand" "xm")
11719 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11722 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11723 [(set_attr "type" "sselog")
11724 (set_attr "prefix_extra" "1")
11725 (set_attr "prefix" "vex")
11726 (set_attr "mode" "OI")])
11728 (define_insn "avx2_permvarv8sf"
11729 [(set (match_operand:V8SF 0 "register_operand" "=x")
11731 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
11732 (match_operand:V8SI 2 "register_operand" "x")]
11735 "vpermps\t{%1, %2, %0|%0, %2, %1}"
11736 [(set_attr "type" "sselog")
11737 (set_attr "prefix" "vex")
11738 (set_attr "mode" "OI")])
11740 (define_expand "avx2_permv4di"
11741 [(match_operand:V4DI 0 "register_operand" "")
11742 (match_operand:V4DI 1 "nonimmediate_operand" "")
11743 (match_operand:SI 2 "const_0_to_255_operand" "")]
11746 int mask = INTVAL (operands[2]);
11747 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11748 GEN_INT ((mask >> 0) & 3),
11749 GEN_INT ((mask >> 2) & 3),
11750 GEN_INT ((mask >> 4) & 3),
11751 GEN_INT ((mask >> 6) & 3)));
11755 (define_insn "avx2_permv4di_1"
11756 [(set (match_operand:V4DI 0 "register_operand" "=x")
11758 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11759 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11760 (match_operand 3 "const_0_to_3_operand" "")
11761 (match_operand 4 "const_0_to_3_operand" "")
11762 (match_operand 5 "const_0_to_3_operand" "")])))]
11766 mask |= INTVAL (operands[2]) << 0;
11767 mask |= INTVAL (operands[3]) << 2;
11768 mask |= INTVAL (operands[4]) << 4;
11769 mask |= INTVAL (operands[5]) << 6;
11770 operands[2] = GEN_INT (mask);
11771 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11773 [(set_attr "type" "sselog")
11774 (set_attr "prefix" "vex")
11775 (set_attr "mode" "OI")])
11777 (define_insn "avx2_permv2ti"
11778 [(set (match_operand:V4DI 0 "register_operand" "=x")
11780 [(match_operand:V4DI 1 "register_operand" "x")
11781 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11782 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11785 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11786 [(set_attr "type" "sselog")
11787 (set_attr "prefix" "vex")
11788 (set_attr "mode" "OI")])
11790 (define_insn "avx2_vec_dupv4df"
11791 [(set (match_operand:V4DF 0 "register_operand" "=x")
11792 (vec_duplicate:V4DF
11794 (match_operand:V2DF 1 "register_operand" "x")
11795 (parallel [(const_int 0)]))))]
11797 "vbroadcastsd\t{%1, %0|%0, %1}"
11798 [(set_attr "type" "sselog1")
11799 (set_attr "prefix" "vex")
11800 (set_attr "mode" "V4DF")])
11802 ;; Modes handled by AVX vec_dup patterns.
11803 (define_mode_iterator AVX_VEC_DUP_MODE
11804 [V8SI V8SF V4DI V4DF])
11806 (define_insn "vec_dup<mode>"
11807 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11808 (vec_duplicate:AVX_VEC_DUP_MODE
11809 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11812 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11814 [(set_attr "type" "ssemov")
11815 (set_attr "prefix_extra" "1")
11816 (set_attr "prefix" "vex")
11817 (set_attr "mode" "V8SF")])
11819 (define_insn "avx2_vbroadcasti128_<mode>"
11820 [(set (match_operand:VI_256 0 "register_operand" "=x")
11822 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11825 "vbroadcasti128\t{%1, %0|%0, %1}"
11826 [(set_attr "type" "ssemov")
11827 (set_attr "prefix_extra" "1")
11828 (set_attr "prefix" "vex")
11829 (set_attr "mode" "OI")])
11832 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11833 (vec_duplicate:AVX_VEC_DUP_MODE
11834 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11835 "TARGET_AVX && reload_completed"
11836 [(set (match_dup 2)
11837 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11839 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11840 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11842 (define_insn "avx_vbroadcastf128_<mode>"
11843 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11845 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11849 vbroadcast<i128>\t{%1, %0|%0, %1}
11850 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11851 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11852 [(set_attr "type" "ssemov,sselog1,sselog1")
11853 (set_attr "prefix_extra" "1")
11854 (set_attr "length_immediate" "0,1,1")
11855 (set_attr "prefix" "vex")
11856 (set_attr "mode" "<sseinsnmode>")])
11858 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11859 ;; If it so happens that the input is in memory, use vbroadcast.
11860 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11861 (define_insn "*avx_vperm_broadcast_v4sf"
11862 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11864 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11865 (match_parallel 2 "avx_vbroadcast_operand"
11866 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11869 int elt = INTVAL (operands[3]);
11870 switch (which_alternative)
11874 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11875 return "vbroadcastss\t{%1, %0|%0, %1}";
11877 operands[2] = GEN_INT (elt * 0x55);
11878 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11880 gcc_unreachable ();
11883 [(set_attr "type" "ssemov,ssemov,sselog1")
11884 (set_attr "prefix_extra" "1")
11885 (set_attr "length_immediate" "0,0,1")
11886 (set_attr "prefix" "vex")
11887 (set_attr "mode" "SF,SF,V4SF")])
11889 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11890 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11892 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11893 (match_parallel 2 "avx_vbroadcast_operand"
11894 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11897 "&& reload_completed"
11898 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11900 rtx op0 = operands[0], op1 = operands[1];
11901 int elt = INTVAL (operands[3]);
11907 /* Shuffle element we care about into all elements of the 128-bit lane.
11908 The other lane gets shuffled too, but we don't care. */
11909 if (<MODE>mode == V4DFmode)
11910 mask = (elt & 1 ? 15 : 0);
11912 mask = (elt & 3) * 0x55;
11913 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11915 /* Shuffle the lane we care about into both lanes of the dest. */
11916 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11917 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11921 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11922 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11925 (define_expand "avx_vpermil<mode>"
11926 [(set (match_operand:VF2 0 "register_operand" "")
11928 (match_operand:VF2 1 "nonimmediate_operand" "")
11929 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11932 int mask = INTVAL (operands[2]);
11933 rtx perm[<ssescalarnum>];
11935 perm[0] = GEN_INT (mask & 1);
11936 perm[1] = GEN_INT ((mask >> 1) & 1);
11937 if (<MODE>mode == V4DFmode)
11939 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11940 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11944 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11947 (define_expand "avx_vpermil<mode>"
11948 [(set (match_operand:VF1 0 "register_operand" "")
11950 (match_operand:VF1 1 "nonimmediate_operand" "")
11951 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11954 int mask = INTVAL (operands[2]);
11955 rtx perm[<ssescalarnum>];
11957 perm[0] = GEN_INT (mask & 3);
11958 perm[1] = GEN_INT ((mask >> 2) & 3);
11959 perm[2] = GEN_INT ((mask >> 4) & 3);
11960 perm[3] = GEN_INT ((mask >> 6) & 3);
11961 if (<MODE>mode == V8SFmode)
11963 perm[4] = GEN_INT ((mask & 3) + 4);
11964 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11965 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11966 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11970 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11973 (define_insn "*avx_vpermilp<mode>"
11974 [(set (match_operand:VF 0 "register_operand" "=x")
11976 (match_operand:VF 1 "nonimmediate_operand" "xm")
11977 (match_parallel 2 ""
11978 [(match_operand 3 "const_int_operand" "")])))]
11980 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11982 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11983 operands[2] = GEN_INT (mask);
11984 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11986 [(set_attr "type" "sselog")
11987 (set_attr "prefix_extra" "1")
11988 (set_attr "length_immediate" "1")
11989 (set_attr "prefix" "vex")
11990 (set_attr "mode" "<MODE>")])
11992 (define_insn "avx_vpermilvar<mode>3"
11993 [(set (match_operand:VF 0 "register_operand" "=x")
11995 [(match_operand:VF 1 "register_operand" "x")
11996 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11999 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12000 [(set_attr "type" "sselog")
12001 (set_attr "prefix_extra" "1")
12002 (set_attr "prefix" "vex")
12003 (set_attr "mode" "<MODE>")])
12005 (define_expand "avx_vperm2f128<mode>3"
12006 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12007 (unspec:AVX256MODE2P
12008 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12009 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12010 (match_operand:SI 3 "const_0_to_255_operand" "")]
12011 UNSPEC_VPERMIL2F128))]
12014 int mask = INTVAL (operands[3]);
12015 if ((mask & 0x88) == 0)
12017 rtx perm[<ssescalarnum>], t1, t2;
12018 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12020 base = (mask & 3) * nelt2;
12021 for (i = 0; i < nelt2; ++i)
12022 perm[i] = GEN_INT (base + i);
12024 base = ((mask >> 4) & 3) * nelt2;
12025 for (i = 0; i < nelt2; ++i)
12026 perm[i + nelt2] = GEN_INT (base + i);
12028 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12029 operands[1], operands[2]);
12030 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12031 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12032 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12038 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12039 ;; means that in order to represent this properly in rtl we'd have to
12040 ;; nest *another* vec_concat with a zero operand and do the select from
12041 ;; a 4x wide vector. That doesn't seem very nice.
12042 (define_insn "*avx_vperm2f128<mode>_full"
12043 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12044 (unspec:AVX256MODE2P
12045 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12046 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12047 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12048 UNSPEC_VPERMIL2F128))]
12050 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12051 [(set_attr "type" "sselog")
12052 (set_attr "prefix_extra" "1")
12053 (set_attr "length_immediate" "1")
12054 (set_attr "prefix" "vex")
12055 (set_attr "mode" "<sseinsnmode>")])
12057 (define_insn "*avx_vperm2f128<mode>_nozero"
12058 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12059 (vec_select:AVX256MODE2P
12060 (vec_concat:<ssedoublevecmode>
12061 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12062 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12063 (match_parallel 3 ""
12064 [(match_operand 4 "const_int_operand" "")])))]
12066 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12068 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12070 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12072 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12073 operands[3] = GEN_INT (mask);
12074 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12076 [(set_attr "type" "sselog")
12077 (set_attr "prefix_extra" "1")
12078 (set_attr "length_immediate" "1")
12079 (set_attr "prefix" "vex")
12080 (set_attr "mode" "<sseinsnmode>")])
12082 (define_expand "avx_vinsertf128<mode>"
12083 [(match_operand:V_256 0 "register_operand" "")
12084 (match_operand:V_256 1 "register_operand" "")
12085 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12086 (match_operand:SI 3 "const_0_to_1_operand" "")]
12089 rtx (*insn)(rtx, rtx, rtx);
12091 switch (INTVAL (operands[3]))
12094 insn = gen_vec_set_lo_<mode>;
12097 insn = gen_vec_set_hi_<mode>;
12100 gcc_unreachable ();
12103 emit_insn (insn (operands[0], operands[1], operands[2]));
12107 (define_insn "avx2_vec_set_lo_v4di"
12108 [(set (match_operand:V4DI 0 "register_operand" "=x")
12110 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12112 (match_operand:V4DI 1 "register_operand" "x")
12113 (parallel [(const_int 2) (const_int 3)]))))]
12115 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12116 [(set_attr "type" "sselog")
12117 (set_attr "prefix_extra" "1")
12118 (set_attr "length_immediate" "1")
12119 (set_attr "prefix" "vex")
12120 (set_attr "mode" "OI")])
12122 (define_insn "avx2_vec_set_hi_v4di"
12123 [(set (match_operand:V4DI 0 "register_operand" "=x")
12126 (match_operand:V4DI 1 "register_operand" "x")
12127 (parallel [(const_int 0) (const_int 1)]))
12128 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12130 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12131 [(set_attr "type" "sselog")
12132 (set_attr "prefix_extra" "1")
12133 (set_attr "length_immediate" "1")
12134 (set_attr "prefix" "vex")
12135 (set_attr "mode" "OI")])
12137 (define_insn "vec_set_lo_<mode>"
12138 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12139 (vec_concat:VI8F_256
12140 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12141 (vec_select:<ssehalfvecmode>
12142 (match_operand:VI8F_256 1 "register_operand" "x")
12143 (parallel [(const_int 2) (const_int 3)]))))]
12145 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12146 [(set_attr "type" "sselog")
12147 (set_attr "prefix_extra" "1")
12148 (set_attr "length_immediate" "1")
12149 (set_attr "prefix" "vex")
12150 (set_attr "mode" "<sseinsnmode>")])
12152 (define_insn "vec_set_hi_<mode>"
12153 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12154 (vec_concat:VI8F_256
12155 (vec_select:<ssehalfvecmode>
12156 (match_operand:VI8F_256 1 "register_operand" "x")
12157 (parallel [(const_int 0) (const_int 1)]))
12158 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12160 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12161 [(set_attr "type" "sselog")
12162 (set_attr "prefix_extra" "1")
12163 (set_attr "length_immediate" "1")
12164 (set_attr "prefix" "vex")
12165 (set_attr "mode" "<sseinsnmode>")])
12167 (define_insn "vec_set_lo_<mode>"
12168 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12169 (vec_concat:VI4F_256
12170 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12171 (vec_select:<ssehalfvecmode>
12172 (match_operand:VI4F_256 1 "register_operand" "x")
12173 (parallel [(const_int 4) (const_int 5)
12174 (const_int 6) (const_int 7)]))))]
12176 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12177 [(set_attr "type" "sselog")
12178 (set_attr "prefix_extra" "1")
12179 (set_attr "length_immediate" "1")
12180 (set_attr "prefix" "vex")
12181 (set_attr "mode" "<sseinsnmode>")])
12183 (define_insn "vec_set_hi_<mode>"
12184 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12185 (vec_concat:VI4F_256
12186 (vec_select:<ssehalfvecmode>
12187 (match_operand:VI4F_256 1 "register_operand" "x")
12188 (parallel [(const_int 0) (const_int 1)
12189 (const_int 2) (const_int 3)]))
12190 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12192 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12193 [(set_attr "type" "sselog")
12194 (set_attr "prefix_extra" "1")
12195 (set_attr "length_immediate" "1")
12196 (set_attr "prefix" "vex")
12197 (set_attr "mode" "<sseinsnmode>")])
12199 (define_insn "vec_set_lo_v16hi"
12200 [(set (match_operand:V16HI 0 "register_operand" "=x")
12202 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12204 (match_operand:V16HI 1 "register_operand" "x")
12205 (parallel [(const_int 8) (const_int 9)
12206 (const_int 10) (const_int 11)
12207 (const_int 12) (const_int 13)
12208 (const_int 14) (const_int 15)]))))]
12210 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12211 [(set_attr "type" "sselog")
12212 (set_attr "prefix_extra" "1")
12213 (set_attr "length_immediate" "1")
12214 (set_attr "prefix" "vex")
12215 (set_attr "mode" "OI")])
12217 (define_insn "vec_set_hi_v16hi"
12218 [(set (match_operand:V16HI 0 "register_operand" "=x")
12221 (match_operand:V16HI 1 "register_operand" "x")
12222 (parallel [(const_int 0) (const_int 1)
12223 (const_int 2) (const_int 3)
12224 (const_int 4) (const_int 5)
12225 (const_int 6) (const_int 7)]))
12226 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12228 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12229 [(set_attr "type" "sselog")
12230 (set_attr "prefix_extra" "1")
12231 (set_attr "length_immediate" "1")
12232 (set_attr "prefix" "vex")
12233 (set_attr "mode" "OI")])
12235 (define_insn "vec_set_lo_v32qi"
12236 [(set (match_operand:V32QI 0 "register_operand" "=x")
12238 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12240 (match_operand:V32QI 1 "register_operand" "x")
12241 (parallel [(const_int 16) (const_int 17)
12242 (const_int 18) (const_int 19)
12243 (const_int 20) (const_int 21)
12244 (const_int 22) (const_int 23)
12245 (const_int 24) (const_int 25)
12246 (const_int 26) (const_int 27)
12247 (const_int 28) (const_int 29)
12248 (const_int 30) (const_int 31)]))))]
12250 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12251 [(set_attr "type" "sselog")
12252 (set_attr "prefix_extra" "1")
12253 (set_attr "length_immediate" "1")
12254 (set_attr "prefix" "vex")
12255 (set_attr "mode" "OI")])
12257 (define_insn "vec_set_hi_v32qi"
12258 [(set (match_operand:V32QI 0 "register_operand" "=x")
12261 (match_operand:V32QI 1 "register_operand" "x")
12262 (parallel [(const_int 0) (const_int 1)
12263 (const_int 2) (const_int 3)
12264 (const_int 4) (const_int 5)
12265 (const_int 6) (const_int 7)
12266 (const_int 8) (const_int 9)
12267 (const_int 10) (const_int 11)
12268 (const_int 12) (const_int 13)
12269 (const_int 14) (const_int 15)]))
12270 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12272 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12273 [(set_attr "type" "sselog")
12274 (set_attr "prefix_extra" "1")
12275 (set_attr "length_immediate" "1")
12276 (set_attr "prefix" "vex")
12277 (set_attr "mode" "OI")])
12279 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12280 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12282 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12283 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12286 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12287 [(set_attr "type" "sselog1")
12288 (set_attr "prefix_extra" "1")
12289 (set_attr "prefix" "vex")
12290 (set_attr "mode" "<sseinsnmode>")])
12292 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12293 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12295 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12296 (match_operand:V48_AVX2 2 "register_operand" "x")
12300 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12301 [(set_attr "type" "sselog1")
12302 (set_attr "prefix_extra" "1")
12303 (set_attr "prefix" "vex")
12304 (set_attr "mode" "<sseinsnmode>")])
12306 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12307 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12308 (unspec:AVX256MODE2P
12309 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12313 "&& reload_completed"
12316 rtx op0 = operands[0];
12317 rtx op1 = operands[1];
12319 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12321 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12322 emit_move_insn (op0, op1);
12326 (define_expand "vec_init<mode>"
12327 [(match_operand:V_256 0 "register_operand" "")
12328 (match_operand 1 "" "")]
12331 ix86_expand_vector_init (false, operands[0], operands[1]);
12335 (define_expand "avx2_extracti128"
12336 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12337 (match_operand:V4DI 1 "register_operand" "")
12338 (match_operand:SI 2 "const_0_to_1_operand" "")]
12341 rtx (*insn)(rtx, rtx);
12343 switch (INTVAL (operands[2]))
12346 insn = gen_vec_extract_lo_v4di;
12349 insn = gen_vec_extract_hi_v4di;
12352 gcc_unreachable ();
12355 emit_insn (insn (operands[0], operands[1]));
12359 (define_expand "avx2_inserti128"
12360 [(match_operand:V4DI 0 "register_operand" "")
12361 (match_operand:V4DI 1 "register_operand" "")
12362 (match_operand:V2DI 2 "nonimmediate_operand" "")
12363 (match_operand:SI 3 "const_0_to_1_operand" "")]
12366 rtx (*insn)(rtx, rtx, rtx);
12368 switch (INTVAL (operands[3]))
12371 insn = gen_avx2_vec_set_lo_v4di;
12374 insn = gen_avx2_vec_set_hi_v4di;
12377 gcc_unreachable ();
12380 emit_insn (insn (operands[0], operands[1], operands[2]));
12384 (define_insn "avx2_ashrv<mode>"
12385 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12387 (match_operand:VI4_AVX2 1 "register_operand" "x")
12388 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12390 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12391 [(set_attr "type" "sseishft")
12392 (set_attr "prefix" "vex")
12393 (set_attr "mode" "<sseinsnmode>")])
12395 (define_insn "avx2_<shift_insn>v<mode>"
12396 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12397 (any_lshift:VI48_AVX2
12398 (match_operand:VI48_AVX2 1 "register_operand" "x")
12399 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12401 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12402 [(set_attr "type" "sseishft")
12403 (set_attr "prefix" "vex")
12404 (set_attr "mode" "<sseinsnmode>")])
12406 (define_insn "avx_vec_concat<mode>"
12407 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12409 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12410 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12413 switch (which_alternative)
12416 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12418 switch (get_attr_mode (insn))
12421 return "vmovaps\t{%1, %x0|%x0, %1}";
12423 return "vmovapd\t{%1, %x0|%x0, %1}";
12425 return "vmovdqa\t{%1, %x0|%x0, %1}";
12428 gcc_unreachable ();
12431 [(set_attr "type" "sselog,ssemov")
12432 (set_attr "prefix_extra" "1,*")
12433 (set_attr "length_immediate" "1,*")
12434 (set_attr "prefix" "vex")
12435 (set_attr "mode" "<sseinsnmode>")])
12437 (define_insn "vcvtph2ps"
12438 [(set (match_operand:V4SF 0 "register_operand" "=x")
12440 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12442 (parallel [(const_int 0) (const_int 1)
12443 (const_int 2) (const_int 3)])))]
12445 "vcvtph2ps\t{%1, %0|%0, %1}"
12446 [(set_attr "type" "ssecvt")
12447 (set_attr "prefix" "vex")
12448 (set_attr "mode" "V4SF")])
12450 (define_insn "*vcvtph2ps_load"
12451 [(set (match_operand:V4SF 0 "register_operand" "=x")
12452 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12453 UNSPEC_VCVTPH2PS))]
12455 "vcvtph2ps\t{%1, %0|%0, %1}"
12456 [(set_attr "type" "ssecvt")
12457 (set_attr "prefix" "vex")
12458 (set_attr "mode" "V8SF")])
12460 (define_insn "vcvtph2ps256"
12461 [(set (match_operand:V8SF 0 "register_operand" "=x")
12462 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12463 UNSPEC_VCVTPH2PS))]
12465 "vcvtph2ps\t{%1, %0|%0, %1}"
12466 [(set_attr "type" "ssecvt")
12467 (set_attr "prefix" "vex")
12468 (set_attr "mode" "V8SF")])
12470 (define_expand "vcvtps2ph"
12471 [(set (match_operand:V8HI 0 "register_operand" "")
12473 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12474 (match_operand:SI 2 "const_0_to_255_operand" "")]
12478 "operands[3] = CONST0_RTX (V4HImode);")
12480 (define_insn "*vcvtps2ph"
12481 [(set (match_operand:V8HI 0 "register_operand" "=x")
12483 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12484 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12486 (match_operand:V4HI 3 "const0_operand" "")))]
12488 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12489 [(set_attr "type" "ssecvt")
12490 (set_attr "prefix" "vex")
12491 (set_attr "mode" "V4SF")])
12493 (define_insn "*vcvtps2ph_store"
12494 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12495 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12496 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12497 UNSPEC_VCVTPS2PH))]
12499 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12500 [(set_attr "type" "ssecvt")
12501 (set_attr "prefix" "vex")
12502 (set_attr "mode" "V4SF")])
12504 (define_insn "vcvtps2ph256"
12505 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12506 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12507 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12508 UNSPEC_VCVTPS2PH))]
12510 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12511 [(set_attr "type" "ssecvt")
12512 (set_attr "prefix" "vex")
12513 (set_attr "mode" "V8SF")])
12515 ;; For gather* insn patterns
12516 (define_mode_iterator VEC_GATHER_MODE
12517 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12518 (define_mode_attr VEC_GATHER_IDXSI
12519 [(V2DI "V4SI") (V2DF "V4SI")
12520 (V4DI "V4SI") (V4DF "V4SI")
12521 (V4SI "V4SI") (V4SF "V4SI")
12522 (V8SI "V8SI") (V8SF "V8SI")])
12523 (define_mode_attr VEC_GATHER_IDXDI
12524 [(V2DI "V2DI") (V2DF "V2DI")
12525 (V4DI "V4DI") (V4DF "V4DI")
12526 (V4SI "V2DI") (V4SF "V2DI")
12527 (V8SI "V4DI") (V8SF "V4DI")])
12528 (define_mode_attr VEC_GATHER_SRCDI
12529 [(V2DI "V2DI") (V2DF "V2DF")
12530 (V4DI "V4DI") (V4DF "V4DF")
12531 (V4SI "V4SI") (V4SF "V4SF")
12532 (V8SI "V4SI") (V8SF "V4SF")])
12534 (define_expand "avx2_gathersi<mode>"
12535 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12536 (unspec:VEC_GATHER_MODE
12537 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12538 (mem:<ssescalarmode>
12540 [(match_operand 2 "vsib_address_operand" "")
12541 (match_operand:<VEC_GATHER_IDXSI>
12542 3 "register_operand" "")
12543 (match_operand:SI 5 "const1248_operand " "")]))
12544 (mem:BLK (scratch))
12545 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12547 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12551 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12552 operands[5]), UNSPEC_VSIBADDR);
12555 (define_insn "*avx2_gathersi<mode>"
12556 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12557 (unspec:VEC_GATHER_MODE
12558 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12559 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12561 [(match_operand:P 3 "vsib_address_operand" "p")
12562 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12563 (match_operand:SI 6 "const1248_operand" "n")]
12565 (mem:BLK (scratch))
12566 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12568 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12570 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12571 [(set_attr "type" "ssemov")
12572 (set_attr "prefix" "vex")
12573 (set_attr "mode" "<sseinsnmode>")])
12575 (define_insn "*avx2_gathersi<mode>_2"
12576 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12577 (unspec:VEC_GATHER_MODE
12579 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12581 [(match_operand:P 2 "vsib_address_operand" "p")
12582 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12583 (match_operand:SI 5 "const1248_operand" "n")]
12585 (mem:BLK (scratch))
12586 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12588 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12590 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12591 [(set_attr "type" "ssemov")
12592 (set_attr "prefix" "vex")
12593 (set_attr "mode" "<sseinsnmode>")])
12595 (define_expand "avx2_gatherdi<mode>"
12596 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12597 (unspec:VEC_GATHER_MODE
12598 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12599 (mem:<ssescalarmode>
12601 [(match_operand 2 "vsib_address_operand" "")
12602 (match_operand:<VEC_GATHER_IDXDI>
12603 3 "register_operand" "")
12604 (match_operand:SI 5 "const1248_operand " "")]))
12605 (mem:BLK (scratch))
12606 (match_operand:<VEC_GATHER_SRCDI>
12607 4 "register_operand" "")]
12609 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12613 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12614 operands[5]), UNSPEC_VSIBADDR);
12617 (define_insn "*avx2_gatherdi<mode>"
12618 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12619 (unspec:VEC_GATHER_MODE
12620 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12621 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12623 [(match_operand:P 3 "vsib_address_operand" "p")
12624 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12625 (match_operand:SI 6 "const1248_operand" "n")]
12627 (mem:BLK (scratch))
12628 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12630 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12632 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12633 [(set_attr "type" "ssemov")
12634 (set_attr "prefix" "vex")
12635 (set_attr "mode" "<sseinsnmode>")])
12637 (define_insn "*avx2_gatherdi<mode>_2"
12638 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12639 (unspec:VEC_GATHER_MODE
12641 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12643 [(match_operand:P 2 "vsib_address_operand" "p")
12644 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12645 (match_operand:SI 5 "const1248_operand" "n")]
12647 (mem:BLK (scratch))
12648 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12650 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12653 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12654 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12655 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12657 [(set_attr "type" "ssemov")
12658 (set_attr "prefix" "vex")
12659 (set_attr "mode" "<sseinsnmode>")])
12661 (define_insn "*avx2_gatherdi<mode>_3"
12662 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12663 (vec_select:<VEC_GATHER_SRCDI>
12665 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12666 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12668 [(match_operand:P 3 "vsib_address_operand" "p")
12669 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12670 (match_operand:SI 6 "const1248_operand" "n")]
12672 (mem:BLK (scratch))
12673 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12675 (parallel [(const_int 0) (const_int 1)
12676 (const_int 2) (const_int 3)])))
12677 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12679 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
12680 [(set_attr "type" "ssemov")
12681 (set_attr "prefix" "vex")
12682 (set_attr "mode" "<sseinsnmode>")])
12684 (define_insn "*avx2_gatherdi<mode>_4"
12685 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12686 (vec_select:<VEC_GATHER_SRCDI>
12689 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12691 [(match_operand:P 2 "vsib_address_operand" "p")
12692 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12693 (match_operand:SI 5 "const1248_operand" "n")]
12695 (mem:BLK (scratch))
12696 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12698 (parallel [(const_int 0) (const_int 1)
12699 (const_int 2) (const_int 3)])))
12700 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12702 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
12703 [(set_attr "type" "ssemov")
12704 (set_attr "prefix" "vex")
12705 (set_attr "mode" "<sseinsnmode>")])