1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
56 UNSPEC_XOP_UNSIGNED_CMP
67 UNSPEC_AESKEYGENASSIST
91 (define_c_enum "unspecv" [
101 ;; All vector modes including V?TImode, used in move patterns.
102 (define_mode_iterator V16
103 [(V32QI "TARGET_AVX") V16QI
104 (V16HI "TARGET_AVX") V8HI
105 (V8SI "TARGET_AVX") V4SI
106 (V4DI "TARGET_AVX") V2DI
107 (V2TI "TARGET_AVX") V1TI
108 (V8SF "TARGET_AVX") V4SF
109 (V4DF "TARGET_AVX") V2DF])
112 (define_mode_iterator V
113 [(V32QI "TARGET_AVX") V16QI
114 (V16HI "TARGET_AVX") V8HI
115 (V8SI "TARGET_AVX") V4SI
116 (V4DI "TARGET_AVX") V2DI
117 (V8SF "TARGET_AVX") V4SF
118 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
120 ;; All 128bit vector modes
121 (define_mode_iterator V_128
122 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
124 ;; All 256bit vector modes
125 (define_mode_iterator V_256
126 [V32QI V16HI V8SI V4DI V8SF V4DF])
128 ;; All vector float modes
129 (define_mode_iterator VF
130 [(V8SF "TARGET_AVX") V4SF
131 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
133 ;; All SFmode vector float modes
134 (define_mode_iterator VF1
135 [(V8SF "TARGET_AVX") V4SF])
137 ;; All DFmode vector float modes
138 (define_mode_iterator VF2
139 [(V4DF "TARGET_AVX") V2DF])
141 ;; All 128bit vector float modes
142 (define_mode_iterator VF_128
143 [V4SF (V2DF "TARGET_SSE2")])
145 ;; All 256bit vector float modes
146 (define_mode_iterator VF_256
149 ;; All vector integer modes
150 (define_mode_iterator VI
151 [(V32QI "TARGET_AVX") V16QI
152 (V16HI "TARGET_AVX") V8HI
153 (V8SI "TARGET_AVX") V4SI
154 (V4DI "TARGET_AVX") V2DI])
156 (define_mode_iterator VI_AVX2
157 [(V32QI "TARGET_AVX2") V16QI
158 (V16HI "TARGET_AVX2") V8HI
159 (V8SI "TARGET_AVX2") V4SI
160 (V4DI "TARGET_AVX2") V2DI])
162 ;; All QImode vector integer modes
163 (define_mode_iterator VI1
164 [(V32QI "TARGET_AVX") V16QI])
166 ;; All DImode vector integer modes
167 (define_mode_iterator VI8
168 [(V4DI "TARGET_AVX") V2DI])
170 (define_mode_iterator VI1_AVX2
171 [(V32QI "TARGET_AVX2") V16QI])
173 (define_mode_iterator VI2_AVX2
174 [(V16HI "TARGET_AVX2") V8HI])
176 (define_mode_iterator VI4_AVX2
177 [(V8SI "TARGET_AVX2") V4SI])
179 (define_mode_iterator VI8_AVX2
180 [(V4DI "TARGET_AVX2") V2DI])
182 ;; ??? We should probably use TImode instead.
183 (define_mode_iterator VIMAX_AVX2
184 [(V2TI "TARGET_AVX2") V1TI])
186 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
187 (define_mode_iterator SSESCALARMODE
188 [(V2TI "TARGET_AVX2") TI])
190 (define_mode_iterator VI12_AVX2
191 [(V32QI "TARGET_AVX2") V16QI
192 (V16HI "TARGET_AVX2") V8HI])
194 (define_mode_iterator VI24_AVX2
195 [(V16HI "TARGET_AVX2") V8HI
196 (V8SI "TARGET_AVX2") V4SI])
198 (define_mode_iterator VI124_AVX2
199 [(V32QI "TARGET_AVX2") V16QI
200 (V16HI "TARGET_AVX2") V8HI
201 (V8SI "TARGET_AVX2") V4SI])
203 (define_mode_iterator VI248_AVX2
204 [(V16HI "TARGET_AVX2") V8HI
205 (V8SI "TARGET_AVX2") V4SI
206 (V4DI "TARGET_AVX2") V2DI])
208 (define_mode_iterator VI48_AVX2
209 [(V8SI "TARGET_AVX2") V4SI
210 (V4DI "TARGET_AVX2") V2DI])
212 (define_mode_iterator V48_AVX2
215 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
216 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
218 (define_mode_attr sse2_avx2
219 [(V16QI "sse2") (V32QI "avx2")
220 (V8HI "sse2") (V16HI "avx2")
221 (V4SI "sse2") (V8SI "avx2")
222 (V2DI "sse2") (V4DI "avx2")
223 (V1TI "sse2") (V2TI "avx2")])
225 (define_mode_attr ssse3_avx2
226 [(V16QI "ssse3") (V32QI "avx2")
227 (V8HI "ssse3") (V16HI "avx2")
228 (V4SI "ssse3") (V8SI "avx2")
229 (V2DI "ssse3") (V4DI "avx2")
230 (TI "ssse3") (V2TI "avx2")])
232 (define_mode_attr sse4_1_avx2
233 [(V16QI "sse4_1") (V32QI "avx2")
234 (V8HI "sse4_1") (V16HI "avx2")
235 (V4SI "sse4_1") (V8SI "avx2")
236 (V2DI "sse4_1") (V4DI "avx2")])
238 (define_mode_attr avx_avx2
239 [(V4SF "avx") (V2DF "avx")
240 (V8SF "avx") (V4DF "avx")
241 (V4SI "avx2") (V2DI "avx2")
242 (V8SI "avx2") (V4DI "avx2")])
244 (define_mode_attr vec_avx2
245 [(V16QI "vec") (V32QI "avx2")
246 (V8HI "vec") (V16HI "avx2")
247 (V4SI "vec") (V8SI "avx2")
248 (V2DI "vec") (V4DI "avx2")])
250 (define_mode_attr ssedoublemode
251 [(V16HI "V16SI") (V8HI "V8SI")])
253 (define_mode_attr ssebytemode
254 [(V4DI "V32QI") (V2DI "V16QI")])
256 ;; All 128bit vector integer modes
257 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
259 ;; All 256bit vector integer modes
260 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
262 ;; Random 128bit vector integer mode combinations
263 (define_mode_iterator VI12_128 [V16QI V8HI])
264 (define_mode_iterator VI14_128 [V16QI V4SI])
265 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
266 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
267 (define_mode_iterator VI24_128 [V8HI V4SI])
268 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
269 (define_mode_iterator VI48_128 [V4SI V2DI])
271 ;; Random 256bit vector integer mode combinations
272 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
273 (define_mode_iterator VI48_256 [V8SI V4DI])
275 ;; Int-float size matches
276 (define_mode_iterator VI4F_128 [V4SI V4SF])
277 (define_mode_iterator VI8F_128 [V2DI V2DF])
278 (define_mode_iterator VI4F_256 [V8SI V8SF])
279 (define_mode_iterator VI8F_256 [V4DI V4DF])
281 ;; Mapping from float mode to required SSE level
282 (define_mode_attr sse
283 [(SF "sse") (DF "sse2")
284 (V4SF "sse") (V2DF "sse2")
285 (V8SF "avx") (V4DF "avx")])
287 (define_mode_attr sse2
288 [(V16QI "sse2") (V32QI "avx")
289 (V2DI "sse2") (V4DI "avx")])
291 (define_mode_attr sse3
292 [(V16QI "sse3") (V32QI "avx")])
294 (define_mode_attr sse4_1
295 [(V4SF "sse4_1") (V2DF "sse4_1")
296 (V8SF "avx") (V4DF "avx")])
298 (define_mode_attr avxsizesuffix
299 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
300 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
301 (V8SF "256") (V4DF "256")
302 (V4SF "") (V2DF "")])
304 ;; SSE instruction mode
305 (define_mode_attr sseinsnmode
306 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
307 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
308 (V8SF "V8SF") (V4DF "V4DF")
309 (V4SF "V4SF") (V2DF "V2DF")
312 ;; Mapping of vector float modes to an integer mode of the same size
313 (define_mode_attr sseintvecmode
314 [(V8SF "V8SI") (V4DF "V4DI")
315 (V4SF "V4SI") (V2DF "V2DI")
316 (V8SI "V8SI") (V4DI "V4DI")
317 (V4SI "V4SI") (V2DI "V2DI")
318 (V16HI "V16HI") (V8HI "V8HI")
319 (V32QI "V32QI") (V16QI "V16QI")])
321 (define_mode_attr sseintvecmodelower
322 [(V8SF "v8si") (V4DF "v4di")
323 (V4SF "v4si") (V2DF "v2di")
324 (V8SI "v8si") (V4DI "v4di")
325 (V4SI "v4si") (V2DI "v2di")
326 (V16HI "v16hi") (V8HI "v8hi")
327 (V32QI "v32qi") (V16QI "v16qi")])
329 ;; Mapping of vector modes to a vector mode of double size
330 (define_mode_attr ssedoublevecmode
331 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
332 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
333 (V8SF "V16SF") (V4DF "V8DF")
334 (V4SF "V8SF") (V2DF "V4DF")])
336 ;; Mapping of vector modes to a vector mode of half size
337 (define_mode_attr ssehalfvecmode
338 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
339 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
340 (V8SF "V4SF") (V4DF "V2DF")
343 ;; Mapping of vector modes back to the scalar modes
344 (define_mode_attr ssescalarmode
345 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
346 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
347 (V8SF "SF") (V4DF "DF")
348 (V4SF "SF") (V2DF "DF")])
350 ;; Number of scalar elements in each vector type
351 (define_mode_attr ssescalarnum
352 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
353 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
354 (V8SF "8") (V4DF "4")
355 (V4SF "4") (V2DF "2")])
357 ;; SSE prefix for integer vector modes
358 (define_mode_attr sseintprefix
359 [(V2DI "p") (V2DF "")
362 (V8SI "p") (V8SF "")])
364 ;; SSE scalar suffix for vector modes
365 (define_mode_attr ssescalarmodesuffix
367 (V8SF "ss") (V4DF "sd")
368 (V4SF "ss") (V2DF "sd")
369 (V8SI "ss") (V4DI "sd")
372 ;; Pack/unpack vector modes
373 (define_mode_attr sseunpackmode
374 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
375 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
377 (define_mode_attr ssepackmode
378 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
379 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
381 ;; Mapping of the max integer size for xop rotate immediate constraint
382 (define_mode_attr sserotatemax
383 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
385 ;; Mapping of mode to cast intrinsic name
386 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
388 ;; Instruction suffix for sign and zero extensions.
389 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
391 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
392 (define_mode_attr i128
393 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
394 (V8SI "%~128") (V4DI "%~128")])
397 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
399 ;; Mapping of immediate bits for blend instructions
400 (define_mode_attr blendbits
401 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
403 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
405 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
409 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411 ;; All of these patterns are enabled for SSE1 as well as SSE2.
412 ;; This is essential for maintaining stable calling conventions.
414 (define_expand "mov<mode>"
415 [(set (match_operand:V16 0 "nonimmediate_operand" "")
416 (match_operand:V16 1 "nonimmediate_operand" ""))]
419 ix86_expand_vector_move (<MODE>mode, operands);
423 (define_insn "*mov<mode>_internal"
424 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
425 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
427 && (register_operand (operands[0], <MODE>mode)
428 || register_operand (operands[1], <MODE>mode))"
430 switch (which_alternative)
433 return standard_sse_constant_opcode (insn, operands[1]);
436 switch (get_attr_mode (insn))
441 && (misaligned_operand (operands[0], <MODE>mode)
442 || misaligned_operand (operands[1], <MODE>mode)))
443 return "vmovups\t{%1, %0|%0, %1}";
445 return "%vmovaps\t{%1, %0|%0, %1}";
450 && (misaligned_operand (operands[0], <MODE>mode)
451 || misaligned_operand (operands[1], <MODE>mode)))
452 return "vmovupd\t{%1, %0|%0, %1}";
453 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
454 return "%vmovaps\t{%1, %0|%0, %1}";
456 return "%vmovapd\t{%1, %0|%0, %1}";
461 && (misaligned_operand (operands[0], <MODE>mode)
462 || misaligned_operand (operands[1], <MODE>mode)))
463 return "vmovdqu\t{%1, %0|%0, %1}";
464 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
465 return "%vmovaps\t{%1, %0|%0, %1}";
467 return "%vmovdqa\t{%1, %0|%0, %1}";
476 [(set_attr "type" "sselog1,ssemov,ssemov")
477 (set_attr "prefix" "maybe_vex")
479 (cond [(match_test "TARGET_AVX")
480 (const_string "<sseinsnmode>")
481 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
482 (not (match_test "TARGET_SSE2")))
483 (and (eq_attr "alternative" "2")
484 (match_test "TARGET_SSE_TYPELESS_STORES")))
485 (const_string "V4SF")
486 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
487 (const_string "V4SF")
488 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
489 (const_string "V2DF")
491 (const_string "TI")))])
493 (define_insn "sse2_movq128"
494 [(set (match_operand:V2DI 0 "register_operand" "=x")
497 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
498 (parallel [(const_int 0)]))
501 "%vmovq\t{%1, %0|%0, %1}"
502 [(set_attr "type" "ssemov")
503 (set_attr "prefix" "maybe_vex")
504 (set_attr "mode" "TI")])
506 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
507 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
508 ;; from memory, we'd prefer to load the memory directly into the %xmm
509 ;; register. To facilitate this happy circumstance, this pattern won't
510 ;; split until after register allocation. If the 64-bit value didn't
511 ;; come from memory, this is the best we can do. This is much better
512 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
515 (define_insn_and_split "movdi_to_sse"
517 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
518 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
519 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
520 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
522 "&& reload_completed"
525 if (register_operand (operands[1], DImode))
527 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
528 Assemble the 64-bit DImode value in an xmm register. */
529 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
530 gen_rtx_SUBREG (SImode, operands[1], 0)));
531 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
532 gen_rtx_SUBREG (SImode, operands[1], 4)));
533 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
536 else if (memory_operand (operands[1], DImode))
537 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
538 operands[1], const0_rtx));
544 [(set (match_operand:V4SF 0 "register_operand" "")
545 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
546 "TARGET_SSE && reload_completed"
549 (vec_duplicate:V4SF (match_dup 1))
553 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
554 operands[2] = CONST0_RTX (V4SFmode);
558 [(set (match_operand:V2DF 0 "register_operand" "")
559 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
560 "TARGET_SSE2 && reload_completed"
561 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
563 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
564 operands[2] = CONST0_RTX (DFmode);
567 (define_expand "push<mode>1"
568 [(match_operand:V16 0 "register_operand" "")]
571 ix86_expand_push (<MODE>mode, operands[0]);
575 (define_expand "movmisalign<mode>"
576 [(set (match_operand:V16 0 "nonimmediate_operand" "")
577 (match_operand:V16 1 "nonimmediate_operand" ""))]
580 ix86_expand_vector_move_misalign (<MODE>mode, operands);
584 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
585 [(set (match_operand:VF 0 "register_operand" "=x")
587 [(match_operand:VF 1 "memory_operand" "m")]
590 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
591 [(set_attr "type" "ssemov")
592 (set_attr "movu" "1")
593 (set_attr "prefix" "maybe_vex")
594 (set_attr "mode" "<MODE>")])
596 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
597 [(set (match_operand:VF 0 "memory_operand" "=m")
599 [(match_operand:VF 1 "register_operand" "x")]
602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssemov")
604 (set_attr "movu" "1")
605 (set_attr "prefix" "maybe_vex")
606 (set_attr "mode" "<MODE>")])
608 (define_insn "<sse2>_loaddqu<avxsizesuffix>"
609 [(set (match_operand:VI1 0 "register_operand" "=x")
610 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
613 "%vmovdqu\t{%1, %0|%0, %1}"
614 [(set_attr "type" "ssemov")
615 (set_attr "movu" "1")
616 (set (attr "prefix_data16")
618 (match_test "TARGET_AVX")
621 (set_attr "prefix" "maybe_vex")
622 (set_attr "mode" "<sseinsnmode>")])
624 (define_insn "<sse2>_storedqu<avxsizesuffix>"
625 [(set (match_operand:VI1 0 "memory_operand" "=m")
626 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
629 "%vmovdqu\t{%1, %0|%0, %1}"
630 [(set_attr "type" "ssemov")
631 (set_attr "movu" "1")
632 (set (attr "prefix_data16")
634 (match_test "TARGET_AVX")
637 (set_attr "prefix" "maybe_vex")
638 (set_attr "mode" "<sseinsnmode>")])
640 (define_insn "<sse3>_lddqu<avxsizesuffix>"
641 [(set (match_operand:VI1 0 "register_operand" "=x")
642 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
645 "%vlddqu\t{%1, %0|%0, %1}"
646 [(set_attr "type" "ssemov")
647 (set_attr "movu" "1")
648 (set (attr "prefix_data16")
650 (match_test "TARGET_AVX")
653 (set (attr "prefix_rep")
655 (match_test "TARGET_AVX")
658 (set_attr "prefix" "maybe_vex")
659 (set_attr "mode" "<sseinsnmode>")])
661 (define_insn "sse2_movnti<mode>"
662 [(set (match_operand:SWI48 0 "memory_operand" "=m")
663 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
666 "movnti\t{%1, %0|%0, %1}"
667 [(set_attr "type" "ssemov")
668 (set_attr "prefix_data16" "0")
669 (set_attr "mode" "<MODE>")])
671 (define_insn "<sse>_movnt<mode>"
672 [(set (match_operand:VF 0 "memory_operand" "=m")
673 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
676 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
677 [(set_attr "type" "ssemov")
678 (set_attr "prefix" "maybe_vex")
679 (set_attr "mode" "<MODE>")])
681 (define_insn "<sse2>_movnt<mode>"
682 [(set (match_operand:VI8 0 "memory_operand" "=m")
683 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
686 "%vmovntdq\t{%1, %0|%0, %1}"
687 [(set_attr "type" "ssecvt")
688 (set (attr "prefix_data16")
690 (match_test "TARGET_AVX")
693 (set_attr "prefix" "maybe_vex")
694 (set_attr "mode" "<sseinsnmode>")])
696 ; Expand patterns for non-temporal stores. At the moment, only those
697 ; that directly map to insns are defined; it would be possible to
698 ; define patterns for other modes that would expand to several insns.
700 ;; Modes handled by storent patterns.
701 (define_mode_iterator STORENT_MODE
702 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
703 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
704 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
705 (V8SF "TARGET_AVX") V4SF
706 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
708 (define_expand "storent<mode>"
709 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
711 [(match_operand:STORENT_MODE 1 "register_operand" "")]
715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
717 ;; Parallel floating point arithmetic
719 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
721 (define_expand "<code><mode>2"
722 [(set (match_operand:VF 0 "register_operand" "")
724 (match_operand:VF 1 "register_operand" "")))]
726 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
728 (define_insn_and_split "*absneg<mode>2"
729 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
730 (match_operator:VF 3 "absneg_operator"
731 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
732 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
735 "&& reload_completed"
738 enum rtx_code absneg_op;
744 if (MEM_P (operands[1]))
745 op1 = operands[2], op2 = operands[1];
747 op1 = operands[1], op2 = operands[2];
752 if (rtx_equal_p (operands[0], operands[1]))
758 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
759 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
760 t = gen_rtx_SET (VOIDmode, operands[0], t);
764 [(set_attr "isa" "noavx,noavx,avx,avx")])
766 (define_expand "<plusminus_insn><mode>3"
767 [(set (match_operand:VF 0 "register_operand" "")
769 (match_operand:VF 1 "nonimmediate_operand" "")
770 (match_operand:VF 2 "nonimmediate_operand" "")))]
772 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
774 (define_insn "*<plusminus_insn><mode>3"
775 [(set (match_operand:VF 0 "register_operand" "=x,x")
777 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
778 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
779 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
781 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
782 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
783 [(set_attr "isa" "noavx,avx")
784 (set_attr "type" "sseadd")
785 (set_attr "prefix" "orig,vex")
786 (set_attr "mode" "<MODE>")])
788 (define_insn "<sse>_vm<plusminus_insn><mode>3"
789 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
792 (match_operand:VF_128 1 "register_operand" "0,x")
793 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
798 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
799 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
800 [(set_attr "isa" "noavx,avx")
801 (set_attr "type" "sseadd")
802 (set_attr "prefix" "orig,vex")
803 (set_attr "mode" "<ssescalarmode>")])
805 (define_expand "mul<mode>3"
806 [(set (match_operand:VF 0 "register_operand" "")
808 (match_operand:VF 1 "nonimmediate_operand" "")
809 (match_operand:VF 2 "nonimmediate_operand" "")))]
811 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
813 (define_insn "*mul<mode>3"
814 [(set (match_operand:VF 0 "register_operand" "=x,x")
816 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
817 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
818 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
820 mul<ssemodesuffix>\t{%2, %0|%0, %2}
821 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
822 [(set_attr "isa" "noavx,avx")
823 (set_attr "type" "ssemul")
824 (set_attr "prefix" "orig,vex")
825 (set_attr "mode" "<MODE>")])
827 (define_insn "<sse>_vmmul<mode>3"
828 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
831 (match_operand:VF_128 1 "register_operand" "0,x")
832 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
837 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
838 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
839 [(set_attr "isa" "noavx,avx")
840 (set_attr "type" "ssemul")
841 (set_attr "prefix" "orig,vex")
842 (set_attr "mode" "<ssescalarmode>")])
844 (define_expand "div<mode>3"
845 [(set (match_operand:VF2 0 "register_operand" "")
846 (div:VF2 (match_operand:VF2 1 "register_operand" "")
847 (match_operand:VF2 2 "nonimmediate_operand" "")))]
849 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
851 (define_expand "div<mode>3"
852 [(set (match_operand:VF1 0 "register_operand" "")
853 (div:VF1 (match_operand:VF1 1 "register_operand" "")
854 (match_operand:VF1 2 "nonimmediate_operand" "")))]
857 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
860 && TARGET_RECIP_VEC_DIV
861 && !optimize_insn_for_size_p ()
862 && flag_finite_math_only && !flag_trapping_math
863 && flag_unsafe_math_optimizations)
865 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
870 (define_insn "<sse>_div<mode>3"
871 [(set (match_operand:VF 0 "register_operand" "=x,x")
873 (match_operand:VF 1 "register_operand" "0,x")
874 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
877 div<ssemodesuffix>\t{%2, %0|%0, %2}
878 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
879 [(set_attr "isa" "noavx,avx")
880 (set_attr "type" "ssediv")
881 (set_attr "prefix" "orig,vex")
882 (set_attr "mode" "<MODE>")])
884 (define_insn "<sse>_vmdiv<mode>3"
885 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
888 (match_operand:VF_128 1 "register_operand" "0,x")
889 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
894 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
895 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
896 [(set_attr "isa" "noavx,avx")
897 (set_attr "type" "ssediv")
898 (set_attr "prefix" "orig,vex")
899 (set_attr "mode" "<ssescalarmode>")])
901 (define_insn "<sse>_rcp<mode>2"
902 [(set (match_operand:VF1 0 "register_operand" "=x")
904 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
906 "%vrcpps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "atom_sse_attr" "rcp")
909 (set_attr "prefix" "maybe_vex")
910 (set_attr "mode" "<MODE>")])
912 (define_insn "sse_vmrcpv4sf2"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
915 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
917 (match_operand:V4SF 2 "register_operand" "0,x")
921 rcpss\t{%1, %0|%0, %1}
922 vrcpss\t{%1, %2, %0|%0, %2, %1}"
923 [(set_attr "isa" "noavx,avx")
924 (set_attr "type" "sse")
925 (set_attr "atom_sse_attr" "rcp")
926 (set_attr "prefix" "orig,vex")
927 (set_attr "mode" "SF")])
929 (define_expand "sqrt<mode>2"
930 [(set (match_operand:VF2 0 "register_operand" "")
931 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
934 (define_expand "sqrt<mode>2"
935 [(set (match_operand:VF1 0 "register_operand" "")
936 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
940 && TARGET_RECIP_VEC_SQRT
941 && !optimize_insn_for_size_p ()
942 && flag_finite_math_only && !flag_trapping_math
943 && flag_unsafe_math_optimizations)
945 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
950 (define_insn "<sse>_sqrt<mode>2"
951 [(set (match_operand:VF 0 "register_operand" "=x")
952 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
954 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
955 [(set_attr "type" "sse")
956 (set_attr "atom_sse_attr" "sqrt")
957 (set_attr "prefix" "maybe_vex")
958 (set_attr "mode" "<MODE>")])
960 (define_insn "<sse>_vmsqrt<mode>2"
961 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
964 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
965 (match_operand:VF_128 2 "register_operand" "0,x")
969 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
970 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
971 [(set_attr "isa" "noavx,avx")
972 (set_attr "type" "sse")
973 (set_attr "atom_sse_attr" "sqrt")
974 (set_attr "prefix" "orig,vex")
975 (set_attr "mode" "<ssescalarmode>")])
977 (define_expand "rsqrt<mode>2"
978 [(set (match_operand:VF1 0 "register_operand" "")
980 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
983 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
987 (define_insn "<sse>_rsqrt<mode>2"
988 [(set (match_operand:VF1 0 "register_operand" "=x")
990 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
992 "%vrsqrtps\t{%1, %0|%0, %1}"
993 [(set_attr "type" "sse")
994 (set_attr "prefix" "maybe_vex")
995 (set_attr "mode" "<MODE>")])
997 (define_insn "sse_vmrsqrtv4sf2"
998 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1000 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1002 (match_operand:V4SF 2 "register_operand" "0,x")
1006 rsqrtss\t{%1, %0|%0, %1}
1007 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1008 [(set_attr "isa" "noavx,avx")
1009 (set_attr "type" "sse")
1010 (set_attr "prefix" "orig,vex")
1011 (set_attr "mode" "SF")])
1013 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1014 ;; isn't really correct, as those rtl operators aren't defined when
1015 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1017 (define_expand "<code><mode>3"
1018 [(set (match_operand:VF 0 "register_operand" "")
1020 (match_operand:VF 1 "nonimmediate_operand" "")
1021 (match_operand:VF 2 "nonimmediate_operand" "")))]
1024 if (!flag_finite_math_only)
1025 operands[1] = force_reg (<MODE>mode, operands[1]);
1026 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1029 (define_insn "*<code><mode>3_finite"
1030 [(set (match_operand:VF 0 "register_operand" "=x,x")
1032 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1033 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1034 "TARGET_SSE && flag_finite_math_only
1035 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1037 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1038 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1039 [(set_attr "isa" "noavx,avx")
1040 (set_attr "type" "sseadd")
1041 (set_attr "prefix" "orig,vex")
1042 (set_attr "mode" "<MODE>")])
1044 (define_insn "*<code><mode>3"
1045 [(set (match_operand:VF 0 "register_operand" "=x,x")
1047 (match_operand:VF 1 "register_operand" "0,x")
1048 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1049 "TARGET_SSE && !flag_finite_math_only"
1051 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1052 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1053 [(set_attr "isa" "noavx,avx")
1054 (set_attr "type" "sseadd")
1055 (set_attr "prefix" "orig,vex")
1056 (set_attr "mode" "<MODE>")])
1058 (define_insn "<sse>_vm<code><mode>3"
1059 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1062 (match_operand:VF_128 1 "register_operand" "0,x")
1063 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1068 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1069 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1070 [(set_attr "isa" "noavx,avx")
1071 (set_attr "type" "sse")
1072 (set_attr "prefix" "orig,vex")
1073 (set_attr "mode" "<ssescalarmode>")])
1075 ;; These versions of the min/max patterns implement exactly the operations
1076 ;; min = (op1 < op2 ? op1 : op2)
1077 ;; max = (!(op1 < op2) ? op1 : op2)
1078 ;; Their operands are not commutative, and thus they may be used in the
1079 ;; presence of -0.0 and NaN.
1081 (define_insn "*ieee_smin<mode>3"
1082 [(set (match_operand:VF 0 "register_operand" "=x,x")
1084 [(match_operand:VF 1 "register_operand" "0,x")
1085 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1089 min<ssemodesuffix>\t{%2, %0|%0, %2}
1090 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1091 [(set_attr "isa" "noavx,avx")
1092 (set_attr "type" "sseadd")
1093 (set_attr "prefix" "orig,vex")
1094 (set_attr "mode" "<MODE>")])
1096 (define_insn "*ieee_smax<mode>3"
1097 [(set (match_operand:VF 0 "register_operand" "=x,x")
1099 [(match_operand:VF 1 "register_operand" "0,x")
1100 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1104 max<ssemodesuffix>\t{%2, %0|%0, %2}
1105 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1106 [(set_attr "isa" "noavx,avx")
1107 (set_attr "type" "sseadd")
1108 (set_attr "prefix" "orig,vex")
1109 (set_attr "mode" "<MODE>")])
1111 (define_insn "avx_addsubv4df3"
1112 [(set (match_operand:V4DF 0 "register_operand" "=x")
1115 (match_operand:V4DF 1 "register_operand" "x")
1116 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1117 (minus:V4DF (match_dup 1) (match_dup 2))
1120 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1121 [(set_attr "type" "sseadd")
1122 (set_attr "prefix" "vex")
1123 (set_attr "mode" "V4DF")])
1125 (define_insn "sse3_addsubv2df3"
1126 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1129 (match_operand:V2DF 1 "register_operand" "0,x")
1130 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1131 (minus:V2DF (match_dup 1) (match_dup 2))
1135 addsubpd\t{%2, %0|%0, %2}
1136 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1137 [(set_attr "isa" "noavx,avx")
1138 (set_attr "type" "sseadd")
1139 (set_attr "atom_unit" "complex")
1140 (set_attr "prefix" "orig,vex")
1141 (set_attr "mode" "V2DF")])
1143 (define_insn "avx_addsubv8sf3"
1144 [(set (match_operand:V8SF 0 "register_operand" "=x")
1147 (match_operand:V8SF 1 "register_operand" "x")
1148 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1149 (minus:V8SF (match_dup 1) (match_dup 2))
1152 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1153 [(set_attr "type" "sseadd")
1154 (set_attr "prefix" "vex")
1155 (set_attr "mode" "V8SF")])
1157 (define_insn "sse3_addsubv4sf3"
1158 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1161 (match_operand:V4SF 1 "register_operand" "0,x")
1162 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1163 (minus:V4SF (match_dup 1) (match_dup 2))
1167 addsubps\t{%2, %0|%0, %2}
1168 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1169 [(set_attr "isa" "noavx,avx")
1170 (set_attr "type" "sseadd")
1171 (set_attr "prefix" "orig,vex")
1172 (set_attr "prefix_rep" "1,*")
1173 (set_attr "mode" "V4SF")])
1175 (define_insn "avx_h<plusminus_insn>v4df3"
1176 [(set (match_operand:V4DF 0 "register_operand" "=x")
1181 (match_operand:V4DF 1 "register_operand" "x")
1182 (parallel [(const_int 0)]))
1183 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1186 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1187 (parallel [(const_int 0)]))
1188 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1191 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1192 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1194 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1195 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1197 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1198 [(set_attr "type" "sseadd")
1199 (set_attr "prefix" "vex")
1200 (set_attr "mode" "V4DF")])
1202 (define_insn "sse3_h<plusminus_insn>v2df3"
1203 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1207 (match_operand:V2DF 1 "register_operand" "0,x")
1208 (parallel [(const_int 0)]))
1209 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1213 (parallel [(const_int 0)]))
1214 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1217 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1218 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1219 [(set_attr "isa" "noavx,avx")
1220 (set_attr "type" "sseadd")
1221 (set_attr "prefix" "orig,vex")
1222 (set_attr "mode" "V2DF")])
1224 (define_insn "avx_h<plusminus_insn>v8sf3"
1225 [(set (match_operand:V8SF 0 "register_operand" "=x")
1231 (match_operand:V8SF 1 "register_operand" "x")
1232 (parallel [(const_int 0)]))
1233 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1235 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1240 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1241 (parallel [(const_int 0)]))
1242 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1245 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1249 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1250 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1252 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1253 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1256 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1257 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1259 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1260 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1262 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1263 [(set_attr "type" "sseadd")
1264 (set_attr "prefix" "vex")
1265 (set_attr "mode" "V8SF")])
1267 (define_insn "sse3_h<plusminus_insn>v4sf3"
1268 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1273 (match_operand:V4SF 1 "register_operand" "0,x")
1274 (parallel [(const_int 0)]))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1277 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1278 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1282 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1283 (parallel [(const_int 0)]))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1286 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1287 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1290 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1291 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1292 [(set_attr "isa" "noavx,avx")
1293 (set_attr "type" "sseadd")
1294 (set_attr "atom_unit" "complex")
1295 (set_attr "prefix" "orig,vex")
1296 (set_attr "prefix_rep" "1,*")
1297 (set_attr "mode" "V4SF")])
1299 (define_expand "reduc_splus_v4df"
1300 [(match_operand:V4DF 0 "register_operand" "")
1301 (match_operand:V4DF 1 "register_operand" "")]
1304 rtx tmp = gen_reg_rtx (V4DFmode);
1305 rtx tmp2 = gen_reg_rtx (V4DFmode);
1306 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1307 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1308 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1312 (define_expand "reduc_splus_v2df"
1313 [(match_operand:V2DF 0 "register_operand" "")
1314 (match_operand:V2DF 1 "register_operand" "")]
1317 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1321 (define_expand "reduc_splus_v8sf"
1322 [(match_operand:V8SF 0 "register_operand" "")
1323 (match_operand:V8SF 1 "register_operand" "")]
1326 rtx tmp = gen_reg_rtx (V8SFmode);
1327 rtx tmp2 = gen_reg_rtx (V8SFmode);
1328 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1329 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1330 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1331 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1335 (define_expand "reduc_splus_v4sf"
1336 [(match_operand:V4SF 0 "register_operand" "")
1337 (match_operand:V4SF 1 "register_operand" "")]
1342 rtx tmp = gen_reg_rtx (V4SFmode);
1343 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1344 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1347 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1351 ;; Modes handled by reduc_sm{in,ax}* patterns.
1352 (define_mode_iterator REDUC_SMINMAX_MODE
1353 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1354 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1355 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1356 (V4SF "TARGET_SSE")])
1358 (define_expand "reduc_<code>_<mode>"
1359 [(smaxmin:REDUC_SMINMAX_MODE
1360 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1361 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1364 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1368 (define_expand "reduc_<code>_<mode>"
1370 (match_operand:VI_256 0 "register_operand" "")
1371 (match_operand:VI_256 1 "register_operand" ""))]
1374 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1378 (define_expand "reduc_umin_v8hi"
1380 (match_operand:V8HI 0 "register_operand" "")
1381 (match_operand:V8HI 1 "register_operand" ""))]
1384 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1390 ;; Parallel floating point comparisons
1392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1394 (define_insn "avx_cmp<mode>3"
1395 [(set (match_operand:VF 0 "register_operand" "=x")
1397 [(match_operand:VF 1 "register_operand" "x")
1398 (match_operand:VF 2 "nonimmediate_operand" "xm")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1402 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp")
1404 (set_attr "length_immediate" "1")
1405 (set_attr "prefix" "vex")
1406 (set_attr "mode" "<MODE>")])
1408 (define_insn "avx_vmcmp<mode>3"
1409 [(set (match_operand:VF_128 0 "register_operand" "=x")
1412 [(match_operand:VF_128 1 "register_operand" "x")
1413 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1414 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1419 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1420 [(set_attr "type" "ssecmp")
1421 (set_attr "length_immediate" "1")
1422 (set_attr "prefix" "vex")
1423 (set_attr "mode" "<ssescalarmode>")])
1425 (define_insn "*<sse>_maskcmp<mode>3_comm"
1426 [(set (match_operand:VF 0 "register_operand" "=x,x")
1427 (match_operator:VF 3 "sse_comparison_operator"
1428 [(match_operand:VF 1 "register_operand" "%0,x")
1429 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1431 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1433 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1434 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1435 [(set_attr "isa" "noavx,avx")
1436 (set_attr "type" "ssecmp")
1437 (set_attr "length_immediate" "1")
1438 (set_attr "prefix" "orig,vex")
1439 (set_attr "mode" "<MODE>")])
1441 (define_insn "<sse>_maskcmp<mode>3"
1442 [(set (match_operand:VF 0 "register_operand" "=x,x")
1443 (match_operator:VF 3 "sse_comparison_operator"
1444 [(match_operand:VF 1 "register_operand" "0,x")
1445 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1448 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1449 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1450 [(set_attr "isa" "noavx,avx")
1451 (set_attr "type" "ssecmp")
1452 (set_attr "length_immediate" "1")
1453 (set_attr "prefix" "orig,vex")
1454 (set_attr "mode" "<MODE>")])
1456 (define_insn "<sse>_vmmaskcmp<mode>3"
1457 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1459 (match_operator:VF_128 3 "sse_comparison_operator"
1460 [(match_operand:VF_128 1 "register_operand" "0,x")
1461 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1466 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1467 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1468 [(set_attr "isa" "noavx,avx")
1469 (set_attr "type" "ssecmp")
1470 (set_attr "length_immediate" "1,*")
1471 (set_attr "prefix" "orig,vex")
1472 (set_attr "mode" "<ssescalarmode>")])
1474 (define_insn "<sse>_comi"
1475 [(set (reg:CCFP FLAGS_REG)
1478 (match_operand:<ssevecmode> 0 "register_operand" "x")
1479 (parallel [(const_int 0)]))
1481 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1482 (parallel [(const_int 0)]))))]
1483 "SSE_FLOAT_MODE_P (<MODE>mode)"
1484 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1485 [(set_attr "type" "ssecomi")
1486 (set_attr "prefix" "maybe_vex")
1487 (set_attr "prefix_rep" "0")
1488 (set (attr "prefix_data16")
1489 (if_then_else (eq_attr "mode" "DF")
1491 (const_string "0")))
1492 (set_attr "mode" "<MODE>")])
1494 (define_insn "<sse>_ucomi"
1495 [(set (reg:CCFPU FLAGS_REG)
1498 (match_operand:<ssevecmode> 0 "register_operand" "x")
1499 (parallel [(const_int 0)]))
1501 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1502 (parallel [(const_int 0)]))))]
1503 "SSE_FLOAT_MODE_P (<MODE>mode)"
1504 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1505 [(set_attr "type" "ssecomi")
1506 (set_attr "prefix" "maybe_vex")
1507 (set_attr "prefix_rep" "0")
1508 (set (attr "prefix_data16")
1509 (if_then_else (eq_attr "mode" "DF")
1511 (const_string "0")))
1512 (set_attr "mode" "<MODE>")])
1514 (define_expand "vcond<V_256:mode><VF_256:mode>"
1515 [(set (match_operand:V_256 0 "register_operand" "")
1517 (match_operator 3 ""
1518 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1519 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1520 (match_operand:V_256 1 "general_operand" "")
1521 (match_operand:V_256 2 "general_operand" "")))]
1523 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1524 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1526 bool ok = ix86_expand_fp_vcond (operands);
1531 (define_expand "vcond<V_128:mode><VF_128:mode>"
1532 [(set (match_operand:V_128 0 "register_operand" "")
1534 (match_operator 3 ""
1535 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1536 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1537 (match_operand:V_128 1 "general_operand" "")
1538 (match_operand:V_128 2 "general_operand" "")))]
1540 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1541 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1543 bool ok = ix86_expand_fp_vcond (operands);
1548 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1550 ;; Parallel floating point logical operations
1552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1554 (define_insn "<sse>_andnot<mode>3"
1555 [(set (match_operand:VF 0 "register_operand" "=x,x")
1558 (match_operand:VF 1 "register_operand" "0,x"))
1559 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1562 static char buf[32];
1565 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1567 switch (which_alternative)
1570 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1573 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1579 snprintf (buf, sizeof (buf), insn, suffix);
1582 [(set_attr "isa" "noavx,avx")
1583 (set_attr "type" "sselog")
1584 (set_attr "prefix" "orig,vex")
1585 (set_attr "mode" "<MODE>")])
1587 (define_expand "<code><mode>3"
1588 [(set (match_operand:VF 0 "register_operand" "")
1590 (match_operand:VF 1 "nonimmediate_operand" "")
1591 (match_operand:VF 2 "nonimmediate_operand" "")))]
1593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1595 (define_insn "*<code><mode>3"
1596 [(set (match_operand:VF 0 "register_operand" "=x,x")
1598 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1599 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1600 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1602 static char buf[32];
1605 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1607 switch (which_alternative)
1610 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1613 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1619 snprintf (buf, sizeof (buf), insn, suffix);
1622 [(set_attr "isa" "noavx,avx")
1623 (set_attr "type" "sselog")
1624 (set_attr "prefix" "orig,vex")
1625 (set_attr "mode" "<MODE>")])
1627 (define_expand "copysign<mode>3"
1630 (not:VF (match_dup 3))
1631 (match_operand:VF 1 "nonimmediate_operand" "")))
1633 (and:VF (match_dup 3)
1634 (match_operand:VF 2 "nonimmediate_operand" "")))
1635 (set (match_operand:VF 0 "register_operand" "")
1636 (ior:VF (match_dup 4) (match_dup 5)))]
1639 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1641 operands[4] = gen_reg_rtx (<MODE>mode);
1642 operands[5] = gen_reg_rtx (<MODE>mode);
1645 ;; Also define scalar versions. These are used for abs, neg, and
1646 ;; conditional move. Using subregs into vector modes causes register
1647 ;; allocation lossage. These patterns do not allow memory operands
1648 ;; because the native instructions read the full 128-bits.
1650 (define_insn "*andnot<mode>3"
1651 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1654 (match_operand:MODEF 1 "register_operand" "0,x"))
1655 (match_operand:MODEF 2 "register_operand" "x,x")))]
1656 "SSE_FLOAT_MODE_P (<MODE>mode)"
1658 static char buf[32];
1661 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1663 switch (which_alternative)
1666 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1669 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1675 snprintf (buf, sizeof (buf), insn, suffix);
1678 [(set_attr "isa" "noavx,avx")
1679 (set_attr "type" "sselog")
1680 (set_attr "prefix" "orig,vex")
1681 (set_attr "mode" "<ssevecmode>")])
1683 (define_insn "*<code><mode>3"
1684 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1686 (match_operand:MODEF 1 "register_operand" "%0,x")
1687 (match_operand:MODEF 2 "register_operand" "x,x")))]
1688 "SSE_FLOAT_MODE_P (<MODE>mode)"
1690 static char buf[32];
1693 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1695 switch (which_alternative)
1698 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1701 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1707 snprintf (buf, sizeof (buf), insn, suffix);
1710 [(set_attr "isa" "noavx,avx")
1711 (set_attr "type" "sselog")
1712 (set_attr "prefix" "orig,vex")
1713 (set_attr "mode" "<ssevecmode>")])
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 ;; FMA floating point multiply/accumulate instructions. These include
1718 ;; scalar versions of the instructions as well as vector versions.
1720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1722 ;; The standard names for scalar FMA are only available with SSE math enabled.
1723 (define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
1724 (DF "TARGET_SSE_MATH")
1725 V4SF V2DF V8SF V4DF])
1727 (define_expand "fma<mode>4"
1728 [(set (match_operand:FMAMODEM 0 "register_operand")
1730 (match_operand:FMAMODEM 1 "nonimmediate_operand")
1731 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1732 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
1733 "TARGET_FMA || TARGET_FMA4")
1735 (define_expand "fms<mode>4"
1736 [(set (match_operand:FMAMODEM 0 "register_operand")
1738 (match_operand:FMAMODEM 1 "nonimmediate_operand")
1739 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1740 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
1741 "TARGET_FMA || TARGET_FMA4")
1743 (define_expand "fnma<mode>4"
1744 [(set (match_operand:FMAMODEM 0 "register_operand")
1746 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
1747 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1748 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
1749 "TARGET_FMA || TARGET_FMA4")
1751 (define_expand "fnms<mode>4"
1752 [(set (match_operand:FMAMODEM 0 "register_operand")
1754 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
1755 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1756 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
1757 "TARGET_FMA || TARGET_FMA4")
1759 ;; The builtins for intrinsics are not constrained by SSE math enabled.
1760 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
1762 (define_expand "fma4i_fmadd_<mode>"
1763 [(set (match_operand:FMAMODE 0 "register_operand")
1765 (match_operand:FMAMODE 1 "nonimmediate_operand")
1766 (match_operand:FMAMODE 2 "nonimmediate_operand")
1767 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1768 "TARGET_FMA || TARGET_FMA4")
1770 (define_insn "*fma_fmadd_<mode>"
1771 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1773 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1774 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1775 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1776 "TARGET_FMA || TARGET_FMA4"
1778 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1779 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1780 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1781 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1782 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1783 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1784 (set_attr "type" "ssemuladd")
1785 (set_attr "mode" "<MODE>")])
1787 (define_insn "*fma_fmsub_<mode>"
1788 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1790 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1791 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1793 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1794 "TARGET_FMA || TARGET_FMA4"
1796 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1797 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1798 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1799 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1800 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1801 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1802 (set_attr "type" "ssemuladd")
1803 (set_attr "mode" "<MODE>")])
1805 (define_insn "*fma_fnmadd_<mode>"
1806 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1809 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1810 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1811 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1812 "TARGET_FMA || TARGET_FMA4"
1814 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1815 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1816 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1817 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1818 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1819 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1820 (set_attr "type" "ssemuladd")
1821 (set_attr "mode" "<MODE>")])
1823 (define_insn "*fma_fnmsub_<mode>"
1824 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1827 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1828 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1830 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1831 "TARGET_FMA || TARGET_FMA4"
1833 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1834 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1835 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1836 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1837 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1838 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1839 (set_attr "type" "ssemuladd")
1840 (set_attr "mode" "<MODE>")])
1842 ;; FMA parallel floating point multiply addsub and subadd operations.
1844 ;; It would be possible to represent these without the UNSPEC as
1847 ;; (fma op1 op2 op3)
1848 ;; (fma op1 op2 (neg op3))
1851 ;; But this doesn't seem useful in practice.
1853 (define_expand "fmaddsub_<mode>"
1854 [(set (match_operand:VF 0 "register_operand")
1856 [(match_operand:VF 1 "nonimmediate_operand")
1857 (match_operand:VF 2 "nonimmediate_operand")
1858 (match_operand:VF 3 "nonimmediate_operand")]
1860 "TARGET_FMA || TARGET_FMA4")
1862 (define_insn "*fma_fmaddsub_<mode>"
1863 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1865 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1866 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1867 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
1869 "TARGET_FMA || TARGET_FMA4"
1871 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1872 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1873 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1874 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1875 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1876 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1877 (set_attr "type" "ssemuladd")
1878 (set_attr "mode" "<MODE>")])
1880 (define_insn "*fma_fmsubadd_<mode>"
1881 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1883 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1884 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1886 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
1888 "TARGET_FMA || TARGET_FMA4"
1890 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1891 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1892 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1893 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1894 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1895 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1896 (set_attr "type" "ssemuladd")
1897 (set_attr "mode" "<MODE>")])
1899 ;; FMA3 floating point scalar intrinsics. These merge result with
1900 ;; high-order elements from the destination register.
1902 (define_expand "fmai_vmfmadd_<mode>"
1903 [(set (match_operand:VF_128 0 "register_operand")
1906 (match_operand:VF_128 1 "nonimmediate_operand")
1907 (match_operand:VF_128 2 "nonimmediate_operand")
1908 (match_operand:VF_128 3 "nonimmediate_operand"))
1913 (define_insn "*fmai_fmadd_<mode>"
1914 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1917 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1918 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
1919 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1924 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1925 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1926 [(set_attr "type" "ssemuladd")
1927 (set_attr "mode" "<MODE>")])
1929 (define_insn "*fmai_fmsub_<mode>"
1930 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1933 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1934 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
1936 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1941 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1942 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1943 [(set_attr "type" "ssemuladd")
1944 (set_attr "mode" "<MODE>")])
1946 (define_insn "*fmai_fnmadd_<mode>"
1947 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1951 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1952 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1953 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1958 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1959 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1960 [(set_attr "type" "ssemuladd")
1961 (set_attr "mode" "<MODE>")])
1963 (define_insn "*fmai_fnmsub_<mode>"
1964 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1968 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1969 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1971 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1976 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1977 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1978 [(set_attr "type" "ssemuladd")
1979 (set_attr "mode" "<MODE>")])
1981 ;; FMA4 floating point scalar intrinsics. These write the
1982 ;; entire destination register, with the high-order elements zeroed.
1984 (define_expand "fma4i_vmfmadd_<mode>"
1985 [(set (match_operand:VF_128 0 "register_operand")
1988 (match_operand:VF_128 1 "nonimmediate_operand")
1989 (match_operand:VF_128 2 "nonimmediate_operand")
1990 (match_operand:VF_128 3 "nonimmediate_operand"))
1995 operands[4] = CONST0_RTX (<MODE>mode);
1998 (define_insn "*fma4i_vmfmadd_<mode>"
1999 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2002 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2003 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2004 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2005 (match_operand:VF_128 4 "const0_operand" "")
2008 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2009 [(set_attr "type" "ssemuladd")
2010 (set_attr "mode" "<MODE>")])
2012 (define_insn "*fma4i_vmfmsub_<mode>"
2013 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2016 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2017 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2019 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2020 (match_operand:VF_128 4 "const0_operand" "")
2023 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2027 (define_insn "*fma4i_vmfnmadd_<mode>"
2028 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2032 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2033 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2034 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2035 (match_operand:VF_128 4 "const0_operand" "")
2038 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2039 [(set_attr "type" "ssemuladd")
2040 (set_attr "mode" "<MODE>")])
2042 (define_insn "*fma4i_vmfnmsub_<mode>"
2043 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2047 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2048 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2050 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2051 (match_operand:VF_128 4 "const0_operand" "")
2054 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2055 [(set_attr "type" "ssemuladd")
2056 (set_attr "mode" "<MODE>")])
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 ;; Parallel single-precision floating point conversion operations
2062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2064 (define_insn "sse_cvtpi2ps"
2065 [(set (match_operand:V4SF 0 "register_operand" "=x")
2068 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2069 (match_operand:V4SF 1 "register_operand" "0")
2072 "cvtpi2ps\t{%2, %0|%0, %2}"
2073 [(set_attr "type" "ssecvt")
2074 (set_attr "mode" "V4SF")])
2076 (define_insn "sse_cvtps2pi"
2077 [(set (match_operand:V2SI 0 "register_operand" "=y")
2079 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2081 (parallel [(const_int 0) (const_int 1)])))]
2083 "cvtps2pi\t{%1, %0|%0, %1}"
2084 [(set_attr "type" "ssecvt")
2085 (set_attr "unit" "mmx")
2086 (set_attr "mode" "DI")])
2088 (define_insn "sse_cvttps2pi"
2089 [(set (match_operand:V2SI 0 "register_operand" "=y")
2091 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2092 (parallel [(const_int 0) (const_int 1)])))]
2094 "cvttps2pi\t{%1, %0|%0, %1}"
2095 [(set_attr "type" "ssecvt")
2096 (set_attr "unit" "mmx")
2097 (set_attr "prefix_rep" "0")
2098 (set_attr "mode" "SF")])
2100 (define_insn "sse_cvtsi2ss"
2101 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2104 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2105 (match_operand:V4SF 1 "register_operand" "0,0,x")
2109 cvtsi2ss\t{%2, %0|%0, %2}
2110 cvtsi2ss\t{%2, %0|%0, %2}
2111 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2112 [(set_attr "isa" "noavx,noavx,avx")
2113 (set_attr "type" "sseicvt")
2114 (set_attr "athlon_decode" "vector,double,*")
2115 (set_attr "amdfam10_decode" "vector,double,*")
2116 (set_attr "bdver1_decode" "double,direct,*")
2117 (set_attr "prefix" "orig,orig,vex")
2118 (set_attr "mode" "SF")])
2120 (define_insn "sse_cvtsi2ssq"
2121 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2124 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2125 (match_operand:V4SF 1 "register_operand" "0,0,x")
2127 "TARGET_SSE && TARGET_64BIT"
2129 cvtsi2ssq\t{%2, %0|%0, %2}
2130 cvtsi2ssq\t{%2, %0|%0, %2}
2131 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2132 [(set_attr "isa" "noavx,noavx,avx")
2133 (set_attr "type" "sseicvt")
2134 (set_attr "athlon_decode" "vector,double,*")
2135 (set_attr "amdfam10_decode" "vector,double,*")
2136 (set_attr "bdver1_decode" "double,direct,*")
2137 (set_attr "length_vex" "*,*,4")
2138 (set_attr "prefix_rex" "1,1,*")
2139 (set_attr "prefix" "orig,orig,vex")
2140 (set_attr "mode" "SF")])
2142 (define_insn "sse_cvtss2si"
2143 [(set (match_operand:SI 0 "register_operand" "=r,r")
2146 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2147 (parallel [(const_int 0)]))]
2148 UNSPEC_FIX_NOTRUNC))]
2150 "%vcvtss2si\t{%1, %0|%0, %1}"
2151 [(set_attr "type" "sseicvt")
2152 (set_attr "athlon_decode" "double,vector")
2153 (set_attr "bdver1_decode" "double,double")
2154 (set_attr "prefix_rep" "1")
2155 (set_attr "prefix" "maybe_vex")
2156 (set_attr "mode" "SI")])
2158 (define_insn "sse_cvtss2si_2"
2159 [(set (match_operand:SI 0 "register_operand" "=r,r")
2160 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2161 UNSPEC_FIX_NOTRUNC))]
2163 "%vcvtss2si\t{%1, %0|%0, %1}"
2164 [(set_attr "type" "sseicvt")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")
2167 (set_attr "bdver1_decode" "double,double")
2168 (set_attr "prefix_rep" "1")
2169 (set_attr "prefix" "maybe_vex")
2170 (set_attr "mode" "SI")])
2172 (define_insn "sse_cvtss2siq"
2173 [(set (match_operand:DI 0 "register_operand" "=r,r")
2176 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2177 (parallel [(const_int 0)]))]
2178 UNSPEC_FIX_NOTRUNC))]
2179 "TARGET_SSE && TARGET_64BIT"
2180 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2181 [(set_attr "type" "sseicvt")
2182 (set_attr "athlon_decode" "double,vector")
2183 (set_attr "bdver1_decode" "double,double")
2184 (set_attr "prefix_rep" "1")
2185 (set_attr "prefix" "maybe_vex")
2186 (set_attr "mode" "DI")])
2188 (define_insn "sse_cvtss2siq_2"
2189 [(set (match_operand:DI 0 "register_operand" "=r,r")
2190 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2191 UNSPEC_FIX_NOTRUNC))]
2192 "TARGET_SSE && TARGET_64BIT"
2193 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2194 [(set_attr "type" "sseicvt")
2195 (set_attr "athlon_decode" "double,vector")
2196 (set_attr "amdfam10_decode" "double,double")
2197 (set_attr "bdver1_decode" "double,double")
2198 (set_attr "prefix_rep" "1")
2199 (set_attr "prefix" "maybe_vex")
2200 (set_attr "mode" "DI")])
2202 (define_insn "sse_cvttss2si"
2203 [(set (match_operand:SI 0 "register_operand" "=r,r")
2206 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2207 (parallel [(const_int 0)]))))]
2209 "%vcvttss2si\t{%1, %0|%0, %1}"
2210 [(set_attr "type" "sseicvt")
2211 (set_attr "athlon_decode" "double,vector")
2212 (set_attr "amdfam10_decode" "double,double")
2213 (set_attr "bdver1_decode" "double,double")
2214 (set_attr "prefix_rep" "1")
2215 (set_attr "prefix" "maybe_vex")
2216 (set_attr "mode" "SI")])
2218 (define_insn "sse_cvttss2siq"
2219 [(set (match_operand:DI 0 "register_operand" "=r,r")
2222 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2223 (parallel [(const_int 0)]))))]
2224 "TARGET_SSE && TARGET_64BIT"
2225 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2226 [(set_attr "type" "sseicvt")
2227 (set_attr "athlon_decode" "double,vector")
2228 (set_attr "amdfam10_decode" "double,double")
2229 (set_attr "bdver1_decode" "double,double")
2230 (set_attr "prefix_rep" "1")
2231 (set_attr "prefix" "maybe_vex")
2232 (set_attr "mode" "DI")])
2234 (define_insn "float<sseintvecmodelower><mode>2"
2235 [(set (match_operand:VF1 0 "register_operand" "=x")
2237 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2239 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2240 [(set_attr "type" "ssecvt")
2241 (set_attr "prefix" "maybe_vex")
2242 (set_attr "mode" "<sseinsnmode>")])
2244 (define_expand "floatuns<sseintvecmodelower><mode>2"
2245 [(match_operand:VF1 0 "register_operand" "")
2246 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2247 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2249 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2253 (define_insn "avx_cvtps2dq256"
2254 [(set (match_operand:V8SI 0 "register_operand" "=x")
2255 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2256 UNSPEC_FIX_NOTRUNC))]
2258 "vcvtps2dq\t{%1, %0|%0, %1}"
2259 [(set_attr "type" "ssecvt")
2260 (set_attr "prefix" "vex")
2261 (set_attr "mode" "OI")])
2263 (define_insn "sse2_cvtps2dq"
2264 [(set (match_operand:V4SI 0 "register_operand" "=x")
2265 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2266 UNSPEC_FIX_NOTRUNC))]
2268 "%vcvtps2dq\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssecvt")
2270 (set (attr "prefix_data16")
2272 (match_test "TARGET_AVX")
2274 (const_string "1")))
2275 (set_attr "prefix" "maybe_vex")
2276 (set_attr "mode" "TI")])
2278 (define_insn "fix_truncv8sfv8si2"
2279 [(set (match_operand:V8SI 0 "register_operand" "=x")
2280 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2282 "vcvttps2dq\t{%1, %0|%0, %1}"
2283 [(set_attr "type" "ssecvt")
2284 (set_attr "prefix" "vex")
2285 (set_attr "mode" "OI")])
2287 (define_insn "fix_truncv4sfv4si2"
2288 [(set (match_operand:V4SI 0 "register_operand" "=x")
2289 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2291 "%vcvttps2dq\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "ssecvt")
2293 (set (attr "prefix_rep")
2295 (match_test "TARGET_AVX")
2297 (const_string "1")))
2298 (set (attr "prefix_data16")
2300 (match_test "TARGET_AVX")
2302 (const_string "0")))
2303 (set_attr "prefix_data16" "0")
2304 (set_attr "prefix" "maybe_vex")
2305 (set_attr "mode" "TI")])
2307 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2308 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2309 (match_operand:VF1 1 "register_operand" "")]
2313 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2314 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2315 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2316 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2322 ;; Parallel double-precision floating point conversion operations
2324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2326 (define_insn "sse2_cvtpi2pd"
2327 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2328 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2330 "cvtpi2pd\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "unit" "mmx,*")
2333 (set_attr "prefix_data16" "1,*")
2334 (set_attr "mode" "V2DF")])
2336 (define_insn "sse2_cvtpd2pi"
2337 [(set (match_operand:V2SI 0 "register_operand" "=y")
2338 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2339 UNSPEC_FIX_NOTRUNC))]
2341 "cvtpd2pi\t{%1, %0|%0, %1}"
2342 [(set_attr "type" "ssecvt")
2343 (set_attr "unit" "mmx")
2344 (set_attr "bdver1_decode" "double")
2345 (set_attr "prefix_data16" "1")
2346 (set_attr "mode" "DI")])
2348 (define_insn "sse2_cvttpd2pi"
2349 [(set (match_operand:V2SI 0 "register_operand" "=y")
2350 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2352 "cvttpd2pi\t{%1, %0|%0, %1}"
2353 [(set_attr "type" "ssecvt")
2354 (set_attr "unit" "mmx")
2355 (set_attr "bdver1_decode" "double")
2356 (set_attr "prefix_data16" "1")
2357 (set_attr "mode" "TI")])
2359 (define_insn "sse2_cvtsi2sd"
2360 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2363 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2364 (match_operand:V2DF 1 "register_operand" "0,0,x")
2368 cvtsi2sd\t{%2, %0|%0, %2}
2369 cvtsi2sd\t{%2, %0|%0, %2}
2370 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2371 [(set_attr "isa" "noavx,noavx,avx")
2372 (set_attr "type" "sseicvt")
2373 (set_attr "athlon_decode" "double,direct,*")
2374 (set_attr "amdfam10_decode" "vector,double,*")
2375 (set_attr "bdver1_decode" "double,direct,*")
2376 (set_attr "prefix" "orig,orig,vex")
2377 (set_attr "mode" "DF")])
2379 (define_insn "sse2_cvtsi2sdq"
2380 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2383 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2384 (match_operand:V2DF 1 "register_operand" "0,0,x")
2386 "TARGET_SSE2 && TARGET_64BIT"
2388 cvtsi2sdq\t{%2, %0|%0, %2}
2389 cvtsi2sdq\t{%2, %0|%0, %2}
2390 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2391 [(set_attr "isa" "noavx,noavx,avx")
2392 (set_attr "type" "sseicvt")
2393 (set_attr "athlon_decode" "double,direct,*")
2394 (set_attr "amdfam10_decode" "vector,double,*")
2395 (set_attr "bdver1_decode" "double,direct,*")
2396 (set_attr "length_vex" "*,*,4")
2397 (set_attr "prefix_rex" "1,1,*")
2398 (set_attr "prefix" "orig,orig,vex")
2399 (set_attr "mode" "DF")])
2401 (define_insn "sse2_cvtsd2si"
2402 [(set (match_operand:SI 0 "register_operand" "=r,r")
2405 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2406 (parallel [(const_int 0)]))]
2407 UNSPEC_FIX_NOTRUNC))]
2409 "%vcvtsd2si\t{%1, %0|%0, %1}"
2410 [(set_attr "type" "sseicvt")
2411 (set_attr "athlon_decode" "double,vector")
2412 (set_attr "bdver1_decode" "double,double")
2413 (set_attr "prefix_rep" "1")
2414 (set_attr "prefix" "maybe_vex")
2415 (set_attr "mode" "SI")])
2417 (define_insn "sse2_cvtsd2si_2"
2418 [(set (match_operand:SI 0 "register_operand" "=r,r")
2419 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2420 UNSPEC_FIX_NOTRUNC))]
2422 "%vcvtsd2si\t{%1, %0|%0, %1}"
2423 [(set_attr "type" "sseicvt")
2424 (set_attr "athlon_decode" "double,vector")
2425 (set_attr "amdfam10_decode" "double,double")
2426 (set_attr "bdver1_decode" "double,double")
2427 (set_attr "prefix_rep" "1")
2428 (set_attr "prefix" "maybe_vex")
2429 (set_attr "mode" "SI")])
2431 (define_insn "sse2_cvtsd2siq"
2432 [(set (match_operand:DI 0 "register_operand" "=r,r")
2435 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2436 (parallel [(const_int 0)]))]
2437 UNSPEC_FIX_NOTRUNC))]
2438 "TARGET_SSE2 && TARGET_64BIT"
2439 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2440 [(set_attr "type" "sseicvt")
2441 (set_attr "athlon_decode" "double,vector")
2442 (set_attr "bdver1_decode" "double,double")
2443 (set_attr "prefix_rep" "1")
2444 (set_attr "prefix" "maybe_vex")
2445 (set_attr "mode" "DI")])
2447 (define_insn "sse2_cvtsd2siq_2"
2448 [(set (match_operand:DI 0 "register_operand" "=r,r")
2449 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2450 UNSPEC_FIX_NOTRUNC))]
2451 "TARGET_SSE2 && TARGET_64BIT"
2452 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2453 [(set_attr "type" "sseicvt")
2454 (set_attr "athlon_decode" "double,vector")
2455 (set_attr "amdfam10_decode" "double,double")
2456 (set_attr "bdver1_decode" "double,double")
2457 (set_attr "prefix_rep" "1")
2458 (set_attr "prefix" "maybe_vex")
2459 (set_attr "mode" "DI")])
2461 (define_insn "sse2_cvttsd2si"
2462 [(set (match_operand:SI 0 "register_operand" "=r,r")
2465 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2466 (parallel [(const_int 0)]))))]
2468 "%vcvttsd2si\t{%1, %0|%0, %1}"
2469 [(set_attr "type" "sseicvt")
2470 (set_attr "athlon_decode" "double,vector")
2471 (set_attr "amdfam10_decode" "double,double")
2472 (set_attr "bdver1_decode" "double,double")
2473 (set_attr "prefix_rep" "1")
2474 (set_attr "prefix" "maybe_vex")
2475 (set_attr "mode" "SI")])
2477 (define_insn "sse2_cvttsd2siq"
2478 [(set (match_operand:DI 0 "register_operand" "=r,r")
2481 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2482 (parallel [(const_int 0)]))))]
2483 "TARGET_SSE2 && TARGET_64BIT"
2484 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2485 [(set_attr "type" "sseicvt")
2486 (set_attr "athlon_decode" "double,vector")
2487 (set_attr "amdfam10_decode" "double,double")
2488 (set_attr "bdver1_decode" "double,double")
2489 (set_attr "prefix_rep" "1")
2490 (set_attr "prefix" "maybe_vex")
2491 (set_attr "mode" "DI")])
2493 (define_insn "floatv4siv4df2"
2494 [(set (match_operand:V4DF 0 "register_operand" "=x")
2495 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2497 "vcvtdq2pd\t{%1, %0|%0, %1}"
2498 [(set_attr "type" "ssecvt")
2499 (set_attr "prefix" "vex")
2500 (set_attr "mode" "V4DF")])
2502 (define_insn "avx_cvtdq2pd256_2"
2503 [(set (match_operand:V4DF 0 "register_operand" "=x")
2506 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2507 (parallel [(const_int 0) (const_int 1)
2508 (const_int 2) (const_int 3)]))))]
2510 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2511 [(set_attr "type" "ssecvt")
2512 (set_attr "prefix" "vex")
2513 (set_attr "mode" "V4DF")])
2515 (define_insn "sse2_cvtdq2pd"
2516 [(set (match_operand:V2DF 0 "register_operand" "=x")
2519 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2520 (parallel [(const_int 0) (const_int 1)]))))]
2522 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2523 [(set_attr "type" "ssecvt")
2524 (set_attr "prefix" "maybe_vex")
2525 (set_attr "mode" "V2DF")])
2527 (define_insn "avx_cvtpd2dq256"
2528 [(set (match_operand:V4SI 0 "register_operand" "=x")
2529 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2530 UNSPEC_FIX_NOTRUNC))]
2532 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2533 [(set_attr "type" "ssecvt")
2534 (set_attr "prefix" "vex")
2535 (set_attr "mode" "OI")])
2537 (define_expand "avx_cvtpd2dq256_2"
2538 [(set (match_operand:V8SI 0 "register_operand" "")
2540 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2544 "operands[2] = CONST0_RTX (V4SImode);")
2546 (define_insn "*avx_cvtpd2dq256_2"
2547 [(set (match_operand:V8SI 0 "register_operand" "=x")
2549 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2551 (match_operand:V4SI 2 "const0_operand" "")))]
2553 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2554 [(set_attr "type" "ssecvt")
2555 (set_attr "prefix" "vex")
2556 (set_attr "mode" "OI")])
2558 (define_expand "sse2_cvtpd2dq"
2559 [(set (match_operand:V4SI 0 "register_operand" "")
2561 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2565 "operands[2] = CONST0_RTX (V2SImode);")
2567 (define_insn "*sse2_cvtpd2dq"
2568 [(set (match_operand:V4SI 0 "register_operand" "=x")
2570 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2572 (match_operand:V2SI 2 "const0_operand" "")))]
2576 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2578 return "cvtpd2dq\t{%1, %0|%0, %1}";
2580 [(set_attr "type" "ssecvt")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix_data16" "0")
2583 (set_attr "prefix" "maybe_vex")
2584 (set_attr "mode" "TI")
2585 (set_attr "amdfam10_decode" "double")
2586 (set_attr "athlon_decode" "vector")
2587 (set_attr "bdver1_decode" "double")])
2589 (define_insn "fix_truncv4dfv4si2"
2590 [(set (match_operand:V4SI 0 "register_operand" "=x")
2591 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2593 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2594 [(set_attr "type" "ssecvt")
2595 (set_attr "prefix" "vex")
2596 (set_attr "mode" "OI")])
2598 (define_expand "avx_cvttpd2dq256_2"
2599 [(set (match_operand:V8SI 0 "register_operand" "")
2601 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2604 "operands[2] = CONST0_RTX (V4SImode);")
2606 (define_insn "*avx_cvttpd2dq256_2"
2607 [(set (match_operand:V8SI 0 "register_operand" "=x")
2609 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2610 (match_operand:V4SI 2 "const0_operand" "")))]
2612 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2613 [(set_attr "type" "ssecvt")
2614 (set_attr "prefix" "vex")
2615 (set_attr "mode" "OI")])
2617 (define_expand "sse2_cvttpd2dq"
2618 [(set (match_operand:V4SI 0 "register_operand" "")
2620 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2623 "operands[2] = CONST0_RTX (V2SImode);")
2625 (define_insn "*sse2_cvttpd2dq"
2626 [(set (match_operand:V4SI 0 "register_operand" "=x")
2628 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2629 (match_operand:V2SI 2 "const0_operand" "")))]
2633 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2635 return "cvttpd2dq\t{%1, %0|%0, %1}";
2637 [(set_attr "type" "ssecvt")
2638 (set_attr "amdfam10_decode" "double")
2639 (set_attr "athlon_decode" "vector")
2640 (set_attr "bdver1_decode" "double")
2641 (set_attr "prefix" "maybe_vex")
2642 (set_attr "mode" "TI")])
2644 (define_insn "sse2_cvtsd2ss"
2645 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2648 (float_truncate:V2SF
2649 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2650 (match_operand:V4SF 1 "register_operand" "0,0,x")
2654 cvtsd2ss\t{%2, %0|%0, %2}
2655 cvtsd2ss\t{%2, %0|%0, %2}
2656 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2657 [(set_attr "isa" "noavx,noavx,avx")
2658 (set_attr "type" "ssecvt")
2659 (set_attr "athlon_decode" "vector,double,*")
2660 (set_attr "amdfam10_decode" "vector,double,*")
2661 (set_attr "bdver1_decode" "direct,direct,*")
2662 (set_attr "prefix" "orig,orig,vex")
2663 (set_attr "mode" "SF")])
2665 (define_insn "sse2_cvtss2sd"
2666 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2670 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2671 (parallel [(const_int 0) (const_int 1)])))
2672 (match_operand:V2DF 1 "register_operand" "0,0,x")
2676 cvtss2sd\t{%2, %0|%0, %2}
2677 cvtss2sd\t{%2, %0|%0, %2}
2678 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2679 [(set_attr "isa" "noavx,noavx,avx")
2680 (set_attr "type" "ssecvt")
2681 (set_attr "amdfam10_decode" "vector,double,*")
2682 (set_attr "athlon_decode" "direct,direct,*")
2683 (set_attr "bdver1_decode" "direct,direct,*")
2684 (set_attr "prefix" "orig,orig,vex")
2685 (set_attr "mode" "DF")])
2687 (define_insn "avx_cvtpd2ps256"
2688 [(set (match_operand:V4SF 0 "register_operand" "=x")
2689 (float_truncate:V4SF
2690 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2692 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2693 [(set_attr "type" "ssecvt")
2694 (set_attr "prefix" "vex")
2695 (set_attr "mode" "V4SF")])
2697 (define_expand "sse2_cvtpd2ps"
2698 [(set (match_operand:V4SF 0 "register_operand" "")
2700 (float_truncate:V2SF
2701 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2704 "operands[2] = CONST0_RTX (V2SFmode);")
2706 (define_insn "*sse2_cvtpd2ps"
2707 [(set (match_operand:V4SF 0 "register_operand" "=x")
2709 (float_truncate:V2SF
2710 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2711 (match_operand:V2SF 2 "const0_operand" "")))]
2715 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2717 return "cvtpd2ps\t{%1, %0|%0, %1}";
2719 [(set_attr "type" "ssecvt")
2720 (set_attr "amdfam10_decode" "double")
2721 (set_attr "athlon_decode" "vector")
2722 (set_attr "bdver1_decode" "double")
2723 (set_attr "prefix_data16" "1")
2724 (set_attr "prefix" "maybe_vex")
2725 (set_attr "mode" "V4SF")])
2727 (define_insn "avx_cvtps2pd256"
2728 [(set (match_operand:V4DF 0 "register_operand" "=x")
2730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2732 "vcvtps2pd\t{%1, %0|%0, %1}"
2733 [(set_attr "type" "ssecvt")
2734 (set_attr "prefix" "vex")
2735 (set_attr "mode" "V4DF")])
2737 (define_insn "*avx_cvtps2pd256_2"
2738 [(set (match_operand:V4DF 0 "register_operand" "=x")
2741 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2742 (parallel [(const_int 0) (const_int 1)
2743 (const_int 2) (const_int 3)]))))]
2745 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2746 [(set_attr "type" "ssecvt")
2747 (set_attr "prefix" "vex")
2748 (set_attr "mode" "V4DF")])
2750 (define_insn "sse2_cvtps2pd"
2751 [(set (match_operand:V2DF 0 "register_operand" "=x")
2754 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2755 (parallel [(const_int 0) (const_int 1)]))))]
2757 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2758 [(set_attr "type" "ssecvt")
2759 (set_attr "amdfam10_decode" "direct")
2760 (set_attr "athlon_decode" "double")
2761 (set_attr "bdver1_decode" "double")
2762 (set_attr "prefix_data16" "0")
2763 (set_attr "prefix" "maybe_vex")
2764 (set_attr "mode" "V2DF")])
2766 (define_expand "vec_unpacks_hi_v4sf"
2771 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2772 (parallel [(const_int 6) (const_int 7)
2773 (const_int 2) (const_int 3)])))
2774 (set (match_operand:V2DF 0 "register_operand" "")
2778 (parallel [(const_int 0) (const_int 1)]))))]
2780 "operands[2] = gen_reg_rtx (V4SFmode);")
2782 (define_expand "vec_unpacks_hi_v8sf"
2785 (match_operand:V8SF 1 "nonimmediate_operand" "")
2786 (parallel [(const_int 4) (const_int 5)
2787 (const_int 6) (const_int 7)])))
2788 (set (match_operand:V4DF 0 "register_operand" "")
2792 "operands[2] = gen_reg_rtx (V4SFmode);")
2794 (define_expand "vec_unpacks_lo_v4sf"
2795 [(set (match_operand:V2DF 0 "register_operand" "")
2798 (match_operand:V4SF 1 "nonimmediate_operand" "")
2799 (parallel [(const_int 0) (const_int 1)]))))]
2802 (define_expand "vec_unpacks_lo_v8sf"
2803 [(set (match_operand:V4DF 0 "register_operand" "")
2806 (match_operand:V8SF 1 "nonimmediate_operand" "")
2807 (parallel [(const_int 0) (const_int 1)
2808 (const_int 2) (const_int 3)]))))]
2811 (define_mode_attr sseunpackfltmode
2812 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2814 (define_expand "vec_unpacks_float_hi_<mode>"
2815 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2816 (match_operand:VI2_AVX2 1 "register_operand" "")]
2819 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2821 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2822 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2823 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2827 (define_expand "vec_unpacks_float_lo_<mode>"
2828 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2829 (match_operand:VI2_AVX2 1 "register_operand" "")]
2832 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2834 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2835 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2836 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2840 (define_expand "vec_unpacku_float_hi_<mode>"
2841 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2842 (match_operand:VI2_AVX2 1 "register_operand" "")]
2845 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2847 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2848 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2849 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2853 (define_expand "vec_unpacku_float_lo_<mode>"
2854 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2855 (match_operand:VI2_AVX2 1 "register_operand" "")]
2858 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2860 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2861 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2862 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2866 (define_expand "vec_unpacks_float_hi_v4si"
2869 (match_operand:V4SI 1 "nonimmediate_operand" "")
2870 (parallel [(const_int 2) (const_int 3)
2871 (const_int 2) (const_int 3)])))
2872 (set (match_operand:V2DF 0 "register_operand" "")
2876 (parallel [(const_int 0) (const_int 1)]))))]
2878 "operands[2] = gen_reg_rtx (V4SImode);")
2880 (define_expand "vec_unpacks_float_lo_v4si"
2881 [(set (match_operand:V2DF 0 "register_operand" "")
2884 (match_operand:V4SI 1 "nonimmediate_operand" "")
2885 (parallel [(const_int 0) (const_int 1)]))))]
2888 (define_expand "vec_unpacks_float_hi_v8si"
2891 (match_operand:V8SI 1 "nonimmediate_operand" "")
2892 (parallel [(const_int 4) (const_int 5)
2893 (const_int 6) (const_int 7)])))
2894 (set (match_operand:V4DF 0 "register_operand" "")
2898 "operands[2] = gen_reg_rtx (V4SImode);")
2900 (define_expand "vec_unpacks_float_lo_v8si"
2901 [(set (match_operand:V4DF 0 "register_operand" "")
2904 (match_operand:V8SI 1 "nonimmediate_operand" "")
2905 (parallel [(const_int 0) (const_int 1)
2906 (const_int 2) (const_int 3)]))))]
2909 (define_expand "vec_unpacku_float_hi_v4si"
2912 (match_operand:V4SI 1 "nonimmediate_operand" "")
2913 (parallel [(const_int 2) (const_int 3)
2914 (const_int 2) (const_int 3)])))
2919 (parallel [(const_int 0) (const_int 1)]))))
2921 (lt:V2DF (match_dup 6) (match_dup 3)))
2923 (and:V2DF (match_dup 7) (match_dup 4)))
2924 (set (match_operand:V2DF 0 "register_operand" "")
2925 (plus:V2DF (match_dup 6) (match_dup 8)))]
2928 REAL_VALUE_TYPE TWO32r;
2932 real_ldexp (&TWO32r, &dconst1, 32);
2933 x = const_double_from_real_value (TWO32r, DFmode);
2935 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2936 operands[4] = force_reg (V2DFmode,
2937 ix86_build_const_vector (V2DFmode, 1, x));
2939 operands[5] = gen_reg_rtx (V4SImode);
2941 for (i = 6; i < 9; i++)
2942 operands[i] = gen_reg_rtx (V2DFmode);
2945 (define_expand "vec_unpacku_float_lo_v4si"
2949 (match_operand:V4SI 1 "nonimmediate_operand" "")
2950 (parallel [(const_int 0) (const_int 1)]))))
2952 (lt:V2DF (match_dup 5) (match_dup 3)))
2954 (and:V2DF (match_dup 6) (match_dup 4)))
2955 (set (match_operand:V2DF 0 "register_operand" "")
2956 (plus:V2DF (match_dup 5) (match_dup 7)))]
2959 REAL_VALUE_TYPE TWO32r;
2963 real_ldexp (&TWO32r, &dconst1, 32);
2964 x = const_double_from_real_value (TWO32r, DFmode);
2966 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2967 operands[4] = force_reg (V2DFmode,
2968 ix86_build_const_vector (V2DFmode, 1, x));
2970 for (i = 5; i < 8; i++)
2971 operands[i] = gen_reg_rtx (V2DFmode);
2974 (define_expand "vec_unpacku_float_hi_v8si"
2975 [(match_operand:V4DF 0 "register_operand" "")
2976 (match_operand:V8SI 1 "register_operand" "")]
2979 REAL_VALUE_TYPE TWO32r;
2983 real_ldexp (&TWO32r, &dconst1, 32);
2984 x = const_double_from_real_value (TWO32r, DFmode);
2986 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2987 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2988 tmp[5] = gen_reg_rtx (V4SImode);
2990 for (i = 2; i < 5; i++)
2991 tmp[i] = gen_reg_rtx (V4DFmode);
2992 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2993 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2994 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2995 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2996 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2997 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3001 (define_expand "vec_unpacku_float_lo_v8si"
3002 [(match_operand:V4DF 0 "register_operand" "")
3003 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3006 REAL_VALUE_TYPE TWO32r;
3010 real_ldexp (&TWO32r, &dconst1, 32);
3011 x = const_double_from_real_value (TWO32r, DFmode);
3013 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3014 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3016 for (i = 2; i < 5; i++)
3017 tmp[i] = gen_reg_rtx (V4DFmode);
3018 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3019 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3020 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3021 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3022 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3026 (define_expand "vec_pack_trunc_v4df"
3028 (float_truncate:V4SF
3029 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3031 (float_truncate:V4SF
3032 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3033 (set (match_operand:V8SF 0 "register_operand" "")
3039 operands[3] = gen_reg_rtx (V4SFmode);
3040 operands[4] = gen_reg_rtx (V4SFmode);
3043 (define_expand "vec_pack_trunc_v2df"
3044 [(match_operand:V4SF 0 "register_operand" "")
3045 (match_operand:V2DF 1 "nonimmediate_operand" "")
3046 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3051 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3053 tmp0 = gen_reg_rtx (V4DFmode);
3054 tmp1 = force_reg (V2DFmode, operands[1]);
3056 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3057 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3061 tmp0 = gen_reg_rtx (V4SFmode);
3062 tmp1 = gen_reg_rtx (V4SFmode);
3064 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3065 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3066 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3071 (define_expand "vec_pack_sfix_trunc_v4df"
3072 [(match_operand:V8SI 0 "register_operand" "")
3073 (match_operand:V4DF 1 "nonimmediate_operand" "")
3074 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3079 r1 = gen_reg_rtx (V4SImode);
3080 r2 = gen_reg_rtx (V4SImode);
3082 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3083 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3084 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3088 (define_expand "vec_pack_sfix_trunc_v2df"
3089 [(match_operand:V4SI 0 "register_operand" "")
3090 (match_operand:V2DF 1 "nonimmediate_operand" "")
3091 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3096 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3098 tmp0 = gen_reg_rtx (V4DFmode);
3099 tmp1 = force_reg (V2DFmode, operands[1]);
3101 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3102 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3106 tmp0 = gen_reg_rtx (V4SImode);
3107 tmp1 = gen_reg_rtx (V4SImode);
3109 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3110 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3112 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3113 gen_lowpart (V2DImode, tmp0),
3114 gen_lowpart (V2DImode, tmp1)));
3119 (define_mode_attr ssepackfltmode
3120 [(V4DF "V8SI") (V2DF "V4SI")])
3122 (define_expand "vec_pack_ufix_trunc_<mode>"
3123 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3124 (match_operand:VF2 1 "register_operand" "")
3125 (match_operand:VF2 2 "register_operand" "")]
3129 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3130 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3131 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3132 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3133 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3135 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3136 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3140 tmp[5] = gen_reg_rtx (V8SFmode);
3141 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3142 gen_lowpart (V8SFmode, tmp[3]), 0);
3143 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3145 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3146 operands[0], 0, OPTAB_DIRECT);
3147 if (tmp[6] != operands[0])
3148 emit_move_insn (operands[0], tmp[6]);
3152 (define_expand "vec_pack_sfix_v4df"
3153 [(match_operand:V8SI 0 "register_operand" "")
3154 (match_operand:V4DF 1 "nonimmediate_operand" "")
3155 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3160 r1 = gen_reg_rtx (V4SImode);
3161 r2 = gen_reg_rtx (V4SImode);
3163 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3164 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3165 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3169 (define_expand "vec_pack_sfix_v2df"
3170 [(match_operand:V4SI 0 "register_operand" "")
3171 (match_operand:V2DF 1 "nonimmediate_operand" "")
3172 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3177 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3179 tmp0 = gen_reg_rtx (V4DFmode);
3180 tmp1 = force_reg (V2DFmode, operands[1]);
3182 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3183 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3187 tmp0 = gen_reg_rtx (V4SImode);
3188 tmp1 = gen_reg_rtx (V4SImode);
3190 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3191 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3193 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3194 gen_lowpart (V2DImode, tmp0),
3195 gen_lowpart (V2DImode, tmp1)));
3200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3202 ;; Parallel single-precision floating point element swizzling
3204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3206 (define_expand "sse_movhlps_exp"
3207 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3210 (match_operand:V4SF 1 "nonimmediate_operand" "")
3211 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3212 (parallel [(const_int 6)
3218 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3220 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3222 /* Fix up the destination if needed. */
3223 if (dst != operands[0])
3224 emit_move_insn (operands[0], dst);
3229 (define_insn "sse_movhlps"
3230 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3233 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3234 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3235 (parallel [(const_int 6)
3239 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3241 movhlps\t{%2, %0|%0, %2}
3242 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3243 movlps\t{%H2, %0|%0, %H2}
3244 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3245 %vmovhps\t{%2, %0|%0, %2}"
3246 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3247 (set_attr "type" "ssemov")
3248 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3249 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3251 (define_expand "sse_movlhps_exp"
3252 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3255 (match_operand:V4SF 1 "nonimmediate_operand" "")
3256 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3257 (parallel [(const_int 0)
3263 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3265 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3267 /* Fix up the destination if needed. */
3268 if (dst != operands[0])
3269 emit_move_insn (operands[0], dst);
3274 (define_insn "sse_movlhps"
3275 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3278 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3279 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
3280 (parallel [(const_int 0)
3284 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3286 movlhps\t{%2, %0|%0, %2}
3287 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3288 movhps\t{%2, %0|%0, %2}
3289 vmovhps\t{%2, %1, %0|%0, %1, %2}
3290 %vmovlps\t{%2, %H0|%H0, %2}"
3291 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3292 (set_attr "type" "ssemov")
3293 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3294 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3296 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3297 (define_insn "avx_unpckhps256"
3298 [(set (match_operand:V8SF 0 "register_operand" "=x")
3301 (match_operand:V8SF 1 "register_operand" "x")
3302 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3303 (parallel [(const_int 2) (const_int 10)
3304 (const_int 3) (const_int 11)
3305 (const_int 6) (const_int 14)
3306 (const_int 7) (const_int 15)])))]
3308 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3309 [(set_attr "type" "sselog")
3310 (set_attr "prefix" "vex")
3311 (set_attr "mode" "V8SF")])
3313 (define_expand "vec_interleave_highv8sf"
3317 (match_operand:V8SF 1 "register_operand" "x")
3318 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3319 (parallel [(const_int 0) (const_int 8)
3320 (const_int 1) (const_int 9)
3321 (const_int 4) (const_int 12)
3322 (const_int 5) (const_int 13)])))
3328 (parallel [(const_int 2) (const_int 10)
3329 (const_int 3) (const_int 11)
3330 (const_int 6) (const_int 14)
3331 (const_int 7) (const_int 15)])))
3332 (set (match_operand:V8SF 0 "register_operand" "")
3337 (parallel [(const_int 4) (const_int 5)
3338 (const_int 6) (const_int 7)
3339 (const_int 12) (const_int 13)
3340 (const_int 14) (const_int 15)])))]
3343 operands[3] = gen_reg_rtx (V8SFmode);
3344 operands[4] = gen_reg_rtx (V8SFmode);
3347 (define_insn "vec_interleave_highv4sf"
3348 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3351 (match_operand:V4SF 1 "register_operand" "0,x")
3352 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3353 (parallel [(const_int 2) (const_int 6)
3354 (const_int 3) (const_int 7)])))]
3357 unpckhps\t{%2, %0|%0, %2}
3358 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3359 [(set_attr "isa" "noavx,avx")
3360 (set_attr "type" "sselog")
3361 (set_attr "prefix" "orig,vex")
3362 (set_attr "mode" "V4SF")])
3364 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3365 (define_insn "avx_unpcklps256"
3366 [(set (match_operand:V8SF 0 "register_operand" "=x")
3369 (match_operand:V8SF 1 "register_operand" "x")
3370 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3371 (parallel [(const_int 0) (const_int 8)
3372 (const_int 1) (const_int 9)
3373 (const_int 4) (const_int 12)
3374 (const_int 5) (const_int 13)])))]
3376 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3377 [(set_attr "type" "sselog")
3378 (set_attr "prefix" "vex")
3379 (set_attr "mode" "V8SF")])
3381 (define_expand "vec_interleave_lowv8sf"
3385 (match_operand:V8SF 1 "register_operand" "x")
3386 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3387 (parallel [(const_int 0) (const_int 8)
3388 (const_int 1) (const_int 9)
3389 (const_int 4) (const_int 12)
3390 (const_int 5) (const_int 13)])))
3396 (parallel [(const_int 2) (const_int 10)
3397 (const_int 3) (const_int 11)
3398 (const_int 6) (const_int 14)
3399 (const_int 7) (const_int 15)])))
3400 (set (match_operand:V8SF 0 "register_operand" "")
3405 (parallel [(const_int 0) (const_int 1)
3406 (const_int 2) (const_int 3)
3407 (const_int 8) (const_int 9)
3408 (const_int 10) (const_int 11)])))]
3411 operands[3] = gen_reg_rtx (V8SFmode);
3412 operands[4] = gen_reg_rtx (V8SFmode);
3415 (define_insn "vec_interleave_lowv4sf"
3416 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3419 (match_operand:V4SF 1 "register_operand" "0,x")
3420 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3421 (parallel [(const_int 0) (const_int 4)
3422 (const_int 1) (const_int 5)])))]
3425 unpcklps\t{%2, %0|%0, %2}
3426 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3427 [(set_attr "isa" "noavx,avx")
3428 (set_attr "type" "sselog")
3429 (set_attr "prefix" "orig,vex")
3430 (set_attr "mode" "V4SF")])
3432 ;; These are modeled with the same vec_concat as the others so that we
3433 ;; capture users of shufps that can use the new instructions
3434 (define_insn "avx_movshdup256"
3435 [(set (match_operand:V8SF 0 "register_operand" "=x")
3438 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3440 (parallel [(const_int 1) (const_int 1)
3441 (const_int 3) (const_int 3)
3442 (const_int 5) (const_int 5)
3443 (const_int 7) (const_int 7)])))]
3445 "vmovshdup\t{%1, %0|%0, %1}"
3446 [(set_attr "type" "sse")
3447 (set_attr "prefix" "vex")
3448 (set_attr "mode" "V8SF")])
3450 (define_insn "sse3_movshdup"
3451 [(set (match_operand:V4SF 0 "register_operand" "=x")
3454 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3456 (parallel [(const_int 1)
3461 "%vmovshdup\t{%1, %0|%0, %1}"
3462 [(set_attr "type" "sse")
3463 (set_attr "prefix_rep" "1")
3464 (set_attr "prefix" "maybe_vex")
3465 (set_attr "mode" "V4SF")])
3467 (define_insn "avx_movsldup256"
3468 [(set (match_operand:V8SF 0 "register_operand" "=x")
3471 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3473 (parallel [(const_int 0) (const_int 0)
3474 (const_int 2) (const_int 2)
3475 (const_int 4) (const_int 4)
3476 (const_int 6) (const_int 6)])))]
3478 "vmovsldup\t{%1, %0|%0, %1}"
3479 [(set_attr "type" "sse")
3480 (set_attr "prefix" "vex")
3481 (set_attr "mode" "V8SF")])
3483 (define_insn "sse3_movsldup"
3484 [(set (match_operand:V4SF 0 "register_operand" "=x")
3487 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3489 (parallel [(const_int 0)
3494 "%vmovsldup\t{%1, %0|%0, %1}"
3495 [(set_attr "type" "sse")
3496 (set_attr "prefix_rep" "1")
3497 (set_attr "prefix" "maybe_vex")
3498 (set_attr "mode" "V4SF")])
3500 (define_expand "avx_shufps256"
3501 [(match_operand:V8SF 0 "register_operand" "")
3502 (match_operand:V8SF 1 "register_operand" "")
3503 (match_operand:V8SF 2 "nonimmediate_operand" "")
3504 (match_operand:SI 3 "const_int_operand" "")]
3507 int mask = INTVAL (operands[3]);
3508 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3509 GEN_INT ((mask >> 0) & 3),
3510 GEN_INT ((mask >> 2) & 3),
3511 GEN_INT (((mask >> 4) & 3) + 8),
3512 GEN_INT (((mask >> 6) & 3) + 8),
3513 GEN_INT (((mask >> 0) & 3) + 4),
3514 GEN_INT (((mask >> 2) & 3) + 4),
3515 GEN_INT (((mask >> 4) & 3) + 12),
3516 GEN_INT (((mask >> 6) & 3) + 12)));
3520 ;; One bit in mask selects 2 elements.
3521 (define_insn "avx_shufps256_1"
3522 [(set (match_operand:V8SF 0 "register_operand" "=x")
3525 (match_operand:V8SF 1 "register_operand" "x")
3526 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3527 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3528 (match_operand 4 "const_0_to_3_operand" "")
3529 (match_operand 5 "const_8_to_11_operand" "")
3530 (match_operand 6 "const_8_to_11_operand" "")
3531 (match_operand 7 "const_4_to_7_operand" "")
3532 (match_operand 8 "const_4_to_7_operand" "")
3533 (match_operand 9 "const_12_to_15_operand" "")
3534 (match_operand 10 "const_12_to_15_operand" "")])))]
3536 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3537 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3538 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3539 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3542 mask = INTVAL (operands[3]);
3543 mask |= INTVAL (operands[4]) << 2;
3544 mask |= (INTVAL (operands[5]) - 8) << 4;
3545 mask |= (INTVAL (operands[6]) - 8) << 6;
3546 operands[3] = GEN_INT (mask);
3548 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3550 [(set_attr "type" "sselog")
3551 (set_attr "length_immediate" "1")
3552 (set_attr "prefix" "vex")
3553 (set_attr "mode" "V8SF")])
3555 (define_expand "sse_shufps"
3556 [(match_operand:V4SF 0 "register_operand" "")
3557 (match_operand:V4SF 1 "register_operand" "")
3558 (match_operand:V4SF 2 "nonimmediate_operand" "")
3559 (match_operand:SI 3 "const_int_operand" "")]
3562 int mask = INTVAL (operands[3]);
3563 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3564 GEN_INT ((mask >> 0) & 3),
3565 GEN_INT ((mask >> 2) & 3),
3566 GEN_INT (((mask >> 4) & 3) + 4),
3567 GEN_INT (((mask >> 6) & 3) + 4)));
3571 (define_insn "sse_shufps_<mode>"
3572 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3573 (vec_select:VI4F_128
3574 (vec_concat:<ssedoublevecmode>
3575 (match_operand:VI4F_128 1 "register_operand" "0,x")
3576 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3577 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3578 (match_operand 4 "const_0_to_3_operand" "")
3579 (match_operand 5 "const_4_to_7_operand" "")
3580 (match_operand 6 "const_4_to_7_operand" "")])))]
3584 mask |= INTVAL (operands[3]) << 0;
3585 mask |= INTVAL (operands[4]) << 2;
3586 mask |= (INTVAL (operands[5]) - 4) << 4;
3587 mask |= (INTVAL (operands[6]) - 4) << 6;
3588 operands[3] = GEN_INT (mask);
3590 switch (which_alternative)
3593 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3595 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3600 [(set_attr "isa" "noavx,avx")
3601 (set_attr "type" "sselog")
3602 (set_attr "length_immediate" "1")
3603 (set_attr "prefix" "orig,vex")
3604 (set_attr "mode" "V4SF")])
3606 (define_insn "sse_storehps"
3607 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3609 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3610 (parallel [(const_int 2) (const_int 3)])))]
3613 %vmovhps\t{%1, %0|%0, %1}
3614 %vmovhlps\t{%1, %d0|%d0, %1}
3615 %vmovlps\t{%H1, %d0|%d0, %H1}"
3616 [(set_attr "type" "ssemov")
3617 (set_attr "prefix" "maybe_vex")
3618 (set_attr "mode" "V2SF,V4SF,V2SF")])
3620 (define_expand "sse_loadhps_exp"
3621 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3624 (match_operand:V4SF 1 "nonimmediate_operand" "")
3625 (parallel [(const_int 0) (const_int 1)]))
3626 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3629 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3631 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3633 /* Fix up the destination if needed. */
3634 if (dst != operands[0])
3635 emit_move_insn (operands[0], dst);
3640 (define_insn "sse_loadhps"
3641 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3644 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3645 (parallel [(const_int 0) (const_int 1)]))
3646 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3649 movhps\t{%2, %0|%0, %2}
3650 vmovhps\t{%2, %1, %0|%0, %1, %2}
3651 movlhps\t{%2, %0|%0, %2}
3652 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3653 %vmovlps\t{%2, %H0|%H0, %2}"
3654 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3655 (set_attr "type" "ssemov")
3656 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3657 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3659 (define_insn "sse_storelps"
3660 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3662 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3663 (parallel [(const_int 0) (const_int 1)])))]
3666 %vmovlps\t{%1, %0|%0, %1}
3667 %vmovaps\t{%1, %0|%0, %1}
3668 %vmovlps\t{%1, %d0|%d0, %1}"
3669 [(set_attr "type" "ssemov")
3670 (set_attr "prefix" "maybe_vex")
3671 (set_attr "mode" "V2SF,V4SF,V2SF")])
3673 (define_expand "sse_loadlps_exp"
3674 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3676 (match_operand:V2SF 2 "nonimmediate_operand" "")
3678 (match_operand:V4SF 1 "nonimmediate_operand" "")
3679 (parallel [(const_int 2) (const_int 3)]))))]
3682 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3684 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3686 /* Fix up the destination if needed. */
3687 if (dst != operands[0])
3688 emit_move_insn (operands[0], dst);
3693 (define_insn "sse_loadlps"
3694 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3696 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
3698 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3699 (parallel [(const_int 2) (const_int 3)]))))]
3702 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3703 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3704 movlps\t{%2, %0|%0, %2}
3705 vmovlps\t{%2, %1, %0|%0, %1, %2}
3706 %vmovlps\t{%2, %0|%0, %2}"
3707 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3708 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3709 (set_attr "length_immediate" "1,1,*,*,*")
3710 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3711 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3713 (define_insn "sse_movss"
3714 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3716 (match_operand:V4SF 2 "register_operand" " x,x")
3717 (match_operand:V4SF 1 "register_operand" " 0,x")
3721 movss\t{%2, %0|%0, %2}
3722 vmovss\t{%2, %1, %0|%0, %1, %2}"
3723 [(set_attr "isa" "noavx,avx")
3724 (set_attr "type" "ssemov")
3725 (set_attr "prefix" "orig,vex")
3726 (set_attr "mode" "SF")])
3728 (define_insn "avx2_vec_dup<mode>"
3729 [(set (match_operand:VF1 0 "register_operand" "=x")
3732 (match_operand:V4SF 1 "register_operand" "x")
3733 (parallel [(const_int 0)]))))]
3735 "vbroadcastss\t{%1, %0|%0, %1}"
3736 [(set_attr "type" "sselog1")
3737 (set_attr "prefix" "vex")
3738 (set_attr "mode" "<MODE>")])
3740 (define_insn "vec_dupv4sf"
3741 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3743 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3746 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3747 vbroadcastss\t{%1, %0|%0, %1}
3748 shufps\t{$0, %0, %0|%0, %0, 0}"
3749 [(set_attr "isa" "avx,avx,noavx")
3750 (set_attr "type" "sselog1,ssemov,sselog1")
3751 (set_attr "length_immediate" "1,0,1")
3752 (set_attr "prefix_extra" "0,1,*")
3753 (set_attr "prefix" "vex,vex,orig")
3754 (set_attr "mode" "V4SF")])
3756 ;; Although insertps takes register source, we prefer
3757 ;; unpcklps with register source since it is shorter.
3758 (define_insn "*vec_concatv2sf_sse4_1"
3759 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3761 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3762 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3765 unpcklps\t{%2, %0|%0, %2}
3766 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3767 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3768 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3769 %vmovss\t{%1, %0|%0, %1}
3770 punpckldq\t{%2, %0|%0, %2}
3771 movd\t{%1, %0|%0, %1}"
3772 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3773 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3774 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3775 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3776 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3777 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3778 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3780 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3781 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3782 ;; alternatives pretty much forces the MMX alternative to be chosen.
3783 (define_insn "*vec_concatv2sf_sse"
3784 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3786 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3787 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3790 unpcklps\t{%2, %0|%0, %2}
3791 movss\t{%1, %0|%0, %1}
3792 punpckldq\t{%2, %0|%0, %2}
3793 movd\t{%1, %0|%0, %1}"
3794 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3795 (set_attr "mode" "V4SF,SF,DI,DI")])
3797 (define_insn "*vec_concatv4sf"
3798 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3800 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3801 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3804 movlhps\t{%2, %0|%0, %2}
3805 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3806 movhps\t{%2, %0|%0, %2}
3807 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3808 [(set_attr "isa" "noavx,avx,noavx,avx")
3809 (set_attr "type" "ssemov")
3810 (set_attr "prefix" "orig,vex,orig,vex")
3811 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3813 (define_expand "vec_init<mode>"
3814 [(match_operand:V_128 0 "register_operand" "")
3815 (match_operand 1 "" "")]
3818 ix86_expand_vector_init (false, operands[0], operands[1]);
3822 ;; Avoid combining registers from different units in a single alternative,
3823 ;; see comment above inline_secondary_memory_needed function in i386.c
3824 (define_insn "vec_set<mode>_0"
3825 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3826 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
3828 (vec_duplicate:VI4F_128
3829 (match_operand:<ssescalarmode> 2 "general_operand"
3830 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
3831 (match_operand:VI4F_128 1 "vector_move_operand"
3832 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
3836 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3837 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3838 %vmovd\t{%2, %0|%0, %2}
3839 movss\t{%2, %0|%0, %2}
3840 movss\t{%2, %0|%0, %2}
3841 vmovss\t{%2, %1, %0|%0, %1, %2}
3842 pinsrd\t{$0, %2, %0|%0, %2, 0}
3843 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3847 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3849 (cond [(eq_attr "alternative" "0,6,7")
3850 (const_string "sselog")
3851 (eq_attr "alternative" "9")
3852 (const_string "imov")
3853 (eq_attr "alternative" "10")
3854 (const_string "fmov")
3856 (const_string "ssemov")))
3857 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3858 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3859 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3860 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3862 ;; A subset is vec_setv4sf.
3863 (define_insn "*vec_setv4sf_sse4_1"
3864 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3867 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3868 (match_operand:V4SF 1 "register_operand" "0,x")
3869 (match_operand:SI 3 "const_int_operand" "")))]
3871 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3872 < GET_MODE_NUNITS (V4SFmode))"
3874 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3875 switch (which_alternative)
3878 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3880 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3885 [(set_attr "isa" "noavx,avx")
3886 (set_attr "type" "sselog")
3887 (set_attr "prefix_data16" "1,*")
3888 (set_attr "prefix_extra" "1")
3889 (set_attr "length_immediate" "1")
3890 (set_attr "prefix" "orig,vex")
3891 (set_attr "mode" "V4SF")])
3893 (define_insn "sse4_1_insertps"
3894 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3895 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3896 (match_operand:V4SF 1 "register_operand" "0,x")
3897 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3901 if (MEM_P (operands[2]))
3903 unsigned count_s = INTVAL (operands[3]) >> 6;
3905 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3906 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3908 switch (which_alternative)
3911 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3913 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3918 [(set_attr "isa" "noavx,avx")
3919 (set_attr "type" "sselog")
3920 (set_attr "prefix_data16" "1,*")
3921 (set_attr "prefix_extra" "1")
3922 (set_attr "length_immediate" "1")
3923 (set_attr "prefix" "orig,vex")
3924 (set_attr "mode" "V4SF")])
3927 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3929 (vec_duplicate:VI4F_128
3930 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3933 "TARGET_SSE && reload_completed"
3936 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3941 (define_expand "vec_set<mode>"
3942 [(match_operand:V 0 "register_operand" "")
3943 (match_operand:<ssescalarmode> 1 "register_operand" "")
3944 (match_operand 2 "const_int_operand" "")]
3947 ix86_expand_vector_set (false, operands[0], operands[1],
3948 INTVAL (operands[2]));
3952 (define_insn_and_split "*vec_extractv4sf_0"
3953 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3955 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3956 (parallel [(const_int 0)])))]
3957 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3959 "&& reload_completed"
3962 rtx op1 = operands[1];
3964 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3966 op1 = gen_lowpart (SFmode, op1);
3967 emit_move_insn (operands[0], op1);
3971 (define_insn_and_split "*sse4_1_extractps"
3972 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3974 (match_operand:V4SF 1 "register_operand" "x,0,x")
3975 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3978 %vextractps\t{%2, %1, %0|%0, %1, %2}
3981 "&& reload_completed && SSE_REG_P (operands[0])"
3984 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3985 switch (INTVAL (operands[2]))
3989 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3990 operands[2], operands[2],
3991 GEN_INT (INTVAL (operands[2]) + 4),
3992 GEN_INT (INTVAL (operands[2]) + 4)));
3995 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3998 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4003 [(set_attr "isa" "*,noavx,avx")
4004 (set_attr "type" "sselog,*,*")
4005 (set_attr "prefix_data16" "1,*,*")
4006 (set_attr "prefix_extra" "1,*,*")
4007 (set_attr "length_immediate" "1,*,*")
4008 (set_attr "prefix" "maybe_vex,*,*")
4009 (set_attr "mode" "V4SF,*,*")])
4011 (define_insn_and_split "*vec_extract_v4sf_mem"
4012 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4014 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4015 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4018 "&& reload_completed"
4021 int i = INTVAL (operands[2]);
4023 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4027 (define_expand "avx_vextractf128<mode>"
4028 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4029 (match_operand:V_256 1 "register_operand" "")
4030 (match_operand:SI 2 "const_0_to_1_operand" "")]
4033 rtx (*insn)(rtx, rtx);
4035 switch (INTVAL (operands[2]))
4038 insn = gen_vec_extract_lo_<mode>;
4041 insn = gen_vec_extract_hi_<mode>;
4047 emit_insn (insn (operands[0], operands[1]));
4051 (define_insn_and_split "vec_extract_lo_<mode>"
4052 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4053 (vec_select:<ssehalfvecmode>
4054 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4055 (parallel [(const_int 0) (const_int 1)])))]
4056 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4058 "&& reload_completed"
4061 rtx op1 = operands[1];
4063 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4065 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4066 emit_move_insn (operands[0], op1);
4070 (define_insn "vec_extract_hi_<mode>"
4071 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4072 (vec_select:<ssehalfvecmode>
4073 (match_operand:VI8F_256 1 "register_operand" "x,x")
4074 (parallel [(const_int 2) (const_int 3)])))]
4076 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4077 [(set_attr "type" "sselog")
4078 (set_attr "prefix_extra" "1")
4079 (set_attr "length_immediate" "1")
4080 (set_attr "memory" "none,store")
4081 (set_attr "prefix" "vex")
4082 (set_attr "mode" "<sseinsnmode>")])
4084 (define_insn_and_split "vec_extract_lo_<mode>"
4085 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4086 (vec_select:<ssehalfvecmode>
4087 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4088 (parallel [(const_int 0) (const_int 1)
4089 (const_int 2) (const_int 3)])))]
4090 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4092 "&& reload_completed"
4095 rtx op1 = operands[1];
4097 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4099 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4100 emit_move_insn (operands[0], op1);
4104 (define_insn "vec_extract_hi_<mode>"
4105 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4106 (vec_select:<ssehalfvecmode>
4107 (match_operand:VI4F_256 1 "register_operand" "x,x")
4108 (parallel [(const_int 4) (const_int 5)
4109 (const_int 6) (const_int 7)])))]
4111 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4112 [(set_attr "type" "sselog")
4113 (set_attr "prefix_extra" "1")
4114 (set_attr "length_immediate" "1")
4115 (set_attr "memory" "none,store")
4116 (set_attr "prefix" "vex")
4117 (set_attr "mode" "<sseinsnmode>")])
4119 (define_insn_and_split "vec_extract_lo_v16hi"
4120 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4122 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4123 (parallel [(const_int 0) (const_int 1)
4124 (const_int 2) (const_int 3)
4125 (const_int 4) (const_int 5)
4126 (const_int 6) (const_int 7)])))]
4127 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4129 "&& reload_completed"
4132 rtx op1 = operands[1];
4134 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4136 op1 = gen_lowpart (V8HImode, op1);
4137 emit_move_insn (operands[0], op1);
4141 (define_insn "vec_extract_hi_v16hi"
4142 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4144 (match_operand:V16HI 1 "register_operand" "x,x")
4145 (parallel [(const_int 8) (const_int 9)
4146 (const_int 10) (const_int 11)
4147 (const_int 12) (const_int 13)
4148 (const_int 14) (const_int 15)])))]
4150 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4151 [(set_attr "type" "sselog")
4152 (set_attr "prefix_extra" "1")
4153 (set_attr "length_immediate" "1")
4154 (set_attr "memory" "none,store")
4155 (set_attr "prefix" "vex")
4156 (set_attr "mode" "OI")])
4158 (define_insn_and_split "vec_extract_lo_v32qi"
4159 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4161 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4162 (parallel [(const_int 0) (const_int 1)
4163 (const_int 2) (const_int 3)
4164 (const_int 4) (const_int 5)
4165 (const_int 6) (const_int 7)
4166 (const_int 8) (const_int 9)
4167 (const_int 10) (const_int 11)
4168 (const_int 12) (const_int 13)
4169 (const_int 14) (const_int 15)])))]
4170 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4172 "&& reload_completed"
4175 rtx op1 = operands[1];
4177 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4179 op1 = gen_lowpart (V16QImode, op1);
4180 emit_move_insn (operands[0], op1);
4184 (define_insn "vec_extract_hi_v32qi"
4185 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4187 (match_operand:V32QI 1 "register_operand" "x,x")
4188 (parallel [(const_int 16) (const_int 17)
4189 (const_int 18) (const_int 19)
4190 (const_int 20) (const_int 21)
4191 (const_int 22) (const_int 23)
4192 (const_int 24) (const_int 25)
4193 (const_int 26) (const_int 27)
4194 (const_int 28) (const_int 29)
4195 (const_int 30) (const_int 31)])))]
4197 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4198 [(set_attr "type" "sselog")
4199 (set_attr "prefix_extra" "1")
4200 (set_attr "length_immediate" "1")
4201 (set_attr "memory" "none,store")
4202 (set_attr "prefix" "vex")
4203 (set_attr "mode" "OI")])
4205 ;; Modes handled by vec_extract patterns.
4206 (define_mode_iterator VEC_EXTRACT_MODE
4207 [(V32QI "TARGET_AVX") V16QI
4208 (V16HI "TARGET_AVX") V8HI
4209 (V8SI "TARGET_AVX") V4SI
4210 (V4DI "TARGET_AVX") V2DI
4211 (V8SF "TARGET_AVX") V4SF
4212 (V4DF "TARGET_AVX") V2DF])
4214 (define_expand "vec_extract<mode>"
4215 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4216 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4217 (match_operand 2 "const_int_operand" "")]
4220 ix86_expand_vector_extract (false, operands[0], operands[1],
4221 INTVAL (operands[2]));
4225 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4227 ;; Parallel double-precision floating point element swizzling
4229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4231 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4232 (define_insn "avx_unpckhpd256"
4233 [(set (match_operand:V4DF 0 "register_operand" "=x")
4236 (match_operand:V4DF 1 "register_operand" "x")
4237 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4238 (parallel [(const_int 1) (const_int 5)
4239 (const_int 3) (const_int 7)])))]
4241 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4242 [(set_attr "type" "sselog")
4243 (set_attr "prefix" "vex")
4244 (set_attr "mode" "V4DF")])
4246 (define_expand "vec_interleave_highv4df"
4250 (match_operand:V4DF 1 "register_operand" "x")
4251 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4252 (parallel [(const_int 0) (const_int 4)
4253 (const_int 2) (const_int 6)])))
4259 (parallel [(const_int 1) (const_int 5)
4260 (const_int 3) (const_int 7)])))
4261 (set (match_operand:V4DF 0 "register_operand" "")
4266 (parallel [(const_int 2) (const_int 3)
4267 (const_int 6) (const_int 7)])))]
4270 operands[3] = gen_reg_rtx (V4DFmode);
4271 operands[4] = gen_reg_rtx (V4DFmode);
4275 (define_expand "vec_interleave_highv2df"
4276 [(set (match_operand:V2DF 0 "register_operand" "")
4279 (match_operand:V2DF 1 "nonimmediate_operand" "")
4280 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4281 (parallel [(const_int 1)
4285 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4286 operands[2] = force_reg (V2DFmode, operands[2]);
4289 (define_insn "*vec_interleave_highv2df"
4290 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4293 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4294 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4295 (parallel [(const_int 1)
4297 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4299 unpckhpd\t{%2, %0|%0, %2}
4300 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4301 %vmovddup\t{%H1, %0|%0, %H1}
4302 movlpd\t{%H1, %0|%0, %H1}
4303 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4304 %vmovhpd\t{%1, %0|%0, %1}"
4305 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4306 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4307 (set_attr "prefix_data16" "*,*,*,1,*,1")
4308 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4309 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4311 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4312 (define_expand "avx_movddup256"
4313 [(set (match_operand:V4DF 0 "register_operand" "")
4316 (match_operand:V4DF 1 "nonimmediate_operand" "")
4318 (parallel [(const_int 0) (const_int 4)
4319 (const_int 2) (const_int 6)])))]
4322 (define_expand "avx_unpcklpd256"
4323 [(set (match_operand:V4DF 0 "register_operand" "")
4326 (match_operand:V4DF 1 "register_operand" "")
4327 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4328 (parallel [(const_int 0) (const_int 4)
4329 (const_int 2) (const_int 6)])))]
4332 (define_insn "*avx_unpcklpd256"
4333 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4336 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4337 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4338 (parallel [(const_int 0) (const_int 4)
4339 (const_int 2) (const_int 6)])))]
4342 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4343 vmovddup\t{%1, %0|%0, %1}"
4344 [(set_attr "type" "sselog")
4345 (set_attr "prefix" "vex")
4346 (set_attr "mode" "V4DF")])
4348 (define_expand "vec_interleave_lowv4df"
4352 (match_operand:V4DF 1 "register_operand" "x")
4353 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4354 (parallel [(const_int 0) (const_int 4)
4355 (const_int 2) (const_int 6)])))
4361 (parallel [(const_int 1) (const_int 5)
4362 (const_int 3) (const_int 7)])))
4363 (set (match_operand:V4DF 0 "register_operand" "")
4368 (parallel [(const_int 0) (const_int 1)
4369 (const_int 4) (const_int 5)])))]
4372 operands[3] = gen_reg_rtx (V4DFmode);
4373 operands[4] = gen_reg_rtx (V4DFmode);
4376 (define_expand "vec_interleave_lowv2df"
4377 [(set (match_operand:V2DF 0 "register_operand" "")
4380 (match_operand:V2DF 1 "nonimmediate_operand" "")
4381 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4382 (parallel [(const_int 0)
4386 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4387 operands[1] = force_reg (V2DFmode, operands[1]);
4390 (define_insn "*vec_interleave_lowv2df"
4391 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4394 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4395 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4396 (parallel [(const_int 0)
4398 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4400 unpcklpd\t{%2, %0|%0, %2}
4401 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4402 %vmovddup\t{%1, %0|%0, %1}
4403 movhpd\t{%2, %0|%0, %2}
4404 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4405 %vmovlpd\t{%2, %H0|%H0, %2}"
4406 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4407 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4408 (set_attr "prefix_data16" "*,*,*,1,*,1")
4409 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4410 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4413 [(set (match_operand:V2DF 0 "memory_operand" "")
4416 (match_operand:V2DF 1 "register_operand" "")
4418 (parallel [(const_int 0)
4420 "TARGET_SSE3 && reload_completed"
4423 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4424 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4425 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4430 [(set (match_operand:V2DF 0 "register_operand" "")
4433 (match_operand:V2DF 1 "memory_operand" "")
4435 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4436 (match_operand:SI 3 "const_int_operand" "")])))]
4437 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4438 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4440 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4443 (define_expand "avx_shufpd256"
4444 [(match_operand:V4DF 0 "register_operand" "")
4445 (match_operand:V4DF 1 "register_operand" "")
4446 (match_operand:V4DF 2 "nonimmediate_operand" "")
4447 (match_operand:SI 3 "const_int_operand" "")]
4450 int mask = INTVAL (operands[3]);
4451 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4453 GEN_INT (mask & 2 ? 5 : 4),
4454 GEN_INT (mask & 4 ? 3 : 2),
4455 GEN_INT (mask & 8 ? 7 : 6)));
4459 (define_insn "avx_shufpd256_1"
4460 [(set (match_operand:V4DF 0 "register_operand" "=x")
4463 (match_operand:V4DF 1 "register_operand" "x")
4464 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4465 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4466 (match_operand 4 "const_4_to_5_operand" "")
4467 (match_operand 5 "const_2_to_3_operand" "")
4468 (match_operand 6 "const_6_to_7_operand" "")])))]
4472 mask = INTVAL (operands[3]);
4473 mask |= (INTVAL (operands[4]) - 4) << 1;
4474 mask |= (INTVAL (operands[5]) - 2) << 2;
4475 mask |= (INTVAL (operands[6]) - 6) << 3;
4476 operands[3] = GEN_INT (mask);
4478 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4480 [(set_attr "type" "sselog")
4481 (set_attr "length_immediate" "1")
4482 (set_attr "prefix" "vex")
4483 (set_attr "mode" "V4DF")])
4485 (define_expand "sse2_shufpd"
4486 [(match_operand:V2DF 0 "register_operand" "")
4487 (match_operand:V2DF 1 "register_operand" "")
4488 (match_operand:V2DF 2 "nonimmediate_operand" "")
4489 (match_operand:SI 3 "const_int_operand" "")]
4492 int mask = INTVAL (operands[3]);
4493 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4495 GEN_INT (mask & 2 ? 3 : 2)));
4499 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4500 (define_insn "avx2_interleave_highv4di"
4501 [(set (match_operand:V4DI 0 "register_operand" "=x")
4504 (match_operand:V4DI 1 "register_operand" "x")
4505 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4506 (parallel [(const_int 1)
4511 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4512 [(set_attr "type" "sselog")
4513 (set_attr "prefix" "vex")
4514 (set_attr "mode" "OI")])
4516 (define_insn "vec_interleave_highv2di"
4517 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4520 (match_operand:V2DI 1 "register_operand" "0,x")
4521 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4522 (parallel [(const_int 1)
4526 punpckhqdq\t{%2, %0|%0, %2}
4527 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4528 [(set_attr "isa" "noavx,avx")
4529 (set_attr "type" "sselog")
4530 (set_attr "prefix_data16" "1,*")
4531 (set_attr "prefix" "orig,vex")
4532 (set_attr "mode" "TI")])
4534 (define_insn "avx2_interleave_lowv4di"
4535 [(set (match_operand:V4DI 0 "register_operand" "=x")
4538 (match_operand:V4DI 1 "register_operand" "x")
4539 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4540 (parallel [(const_int 0)
4545 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4546 [(set_attr "type" "sselog")
4547 (set_attr "prefix" "vex")
4548 (set_attr "mode" "OI")])
4550 (define_insn "vec_interleave_lowv2di"
4551 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4554 (match_operand:V2DI 1 "register_operand" "0,x")
4555 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4556 (parallel [(const_int 0)
4560 punpcklqdq\t{%2, %0|%0, %2}
4561 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4562 [(set_attr "isa" "noavx,avx")
4563 (set_attr "type" "sselog")
4564 (set_attr "prefix_data16" "1,*")
4565 (set_attr "prefix" "orig,vex")
4566 (set_attr "mode" "TI")])
4568 (define_insn "sse2_shufpd_<mode>"
4569 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4570 (vec_select:VI8F_128
4571 (vec_concat:<ssedoublevecmode>
4572 (match_operand:VI8F_128 1 "register_operand" "0,x")
4573 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4574 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4575 (match_operand 4 "const_2_to_3_operand" "")])))]
4579 mask = INTVAL (operands[3]);
4580 mask |= (INTVAL (operands[4]) - 2) << 1;
4581 operands[3] = GEN_INT (mask);
4583 switch (which_alternative)
4586 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4588 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4593 [(set_attr "isa" "noavx,avx")
4594 (set_attr "type" "sselog")
4595 (set_attr "length_immediate" "1")
4596 (set_attr "prefix" "orig,vex")
4597 (set_attr "mode" "V2DF")])
4599 ;; Avoid combining registers from different units in a single alternative,
4600 ;; see comment above inline_secondary_memory_needed function in i386.c
4601 (define_insn "sse2_storehpd"
4602 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4604 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4605 (parallel [(const_int 1)])))]
4606 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4608 %vmovhpd\t{%1, %0|%0, %1}
4610 vunpckhpd\t{%d1, %0|%0, %d1}
4614 [(set_attr "isa" "*,noavx,avx,*,*,*")
4615 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4616 (set (attr "prefix_data16")
4618 (and (eq_attr "alternative" "0")
4619 (not (match_test "TARGET_AVX")))
4621 (const_string "*")))
4622 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4623 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4626 [(set (match_operand:DF 0 "register_operand" "")
4628 (match_operand:V2DF 1 "memory_operand" "")
4629 (parallel [(const_int 1)])))]
4630 "TARGET_SSE2 && reload_completed"
4631 [(set (match_dup 0) (match_dup 1))]
4632 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4634 (define_insn "*vec_extractv2df_1_sse"
4635 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4637 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4638 (parallel [(const_int 1)])))]
4639 "!TARGET_SSE2 && TARGET_SSE
4640 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4642 movhps\t{%1, %0|%0, %1}
4643 movhlps\t{%1, %0|%0, %1}
4644 movlps\t{%H1, %0|%0, %H1}"
4645 [(set_attr "type" "ssemov")
4646 (set_attr "mode" "V2SF,V4SF,V2SF")])
4648 ;; Avoid combining registers from different units in a single alternative,
4649 ;; see comment above inline_secondary_memory_needed function in i386.c
4650 (define_insn "sse2_storelpd"
4651 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4653 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4654 (parallel [(const_int 0)])))]
4655 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4657 %vmovlpd\t{%1, %0|%0, %1}
4662 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4663 (set_attr "prefix_data16" "1,*,*,*,*")
4664 (set_attr "prefix" "maybe_vex")
4665 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4668 [(set (match_operand:DF 0 "register_operand" "")
4670 (match_operand:V2DF 1 "nonimmediate_operand" "")
4671 (parallel [(const_int 0)])))]
4672 "TARGET_SSE2 && reload_completed"
4675 rtx op1 = operands[1];
4677 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4679 op1 = gen_lowpart (DFmode, op1);
4680 emit_move_insn (operands[0], op1);
4684 (define_insn "*vec_extractv2df_0_sse"
4685 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4687 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4688 (parallel [(const_int 0)])))]
4689 "!TARGET_SSE2 && TARGET_SSE
4690 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4692 movlps\t{%1, %0|%0, %1}
4693 movaps\t{%1, %0|%0, %1}
4694 movlps\t{%1, %0|%0, %1}"
4695 [(set_attr "type" "ssemov")
4696 (set_attr "mode" "V2SF,V4SF,V2SF")])
4698 (define_expand "sse2_loadhpd_exp"
4699 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4702 (match_operand:V2DF 1 "nonimmediate_operand" "")
4703 (parallel [(const_int 0)]))
4704 (match_operand:DF 2 "nonimmediate_operand" "")))]
4707 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4709 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4711 /* Fix up the destination if needed. */
4712 if (dst != operands[0])
4713 emit_move_insn (operands[0], dst);
4718 ;; Avoid combining registers from different units in a single alternative,
4719 ;; see comment above inline_secondary_memory_needed function in i386.c
4720 (define_insn "sse2_loadhpd"
4721 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4725 (match_operand:V2DF 1 "nonimmediate_operand"
4727 (parallel [(const_int 0)]))
4728 (match_operand:DF 2 "nonimmediate_operand"
4729 " m,m,x,x,x,*f,r")))]
4730 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4732 movhpd\t{%2, %0|%0, %2}
4733 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4734 unpcklpd\t{%2, %0|%0, %2}
4735 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4739 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4740 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4741 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4742 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4743 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4746 [(set (match_operand:V2DF 0 "memory_operand" "")
4748 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4749 (match_operand:DF 1 "register_operand" "")))]
4750 "TARGET_SSE2 && reload_completed"
4751 [(set (match_dup 0) (match_dup 1))]
4752 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4754 (define_expand "sse2_loadlpd_exp"
4755 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4757 (match_operand:DF 2 "nonimmediate_operand" "")
4759 (match_operand:V2DF 1 "nonimmediate_operand" "")
4760 (parallel [(const_int 1)]))))]
4763 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4765 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4767 /* Fix up the destination if needed. */
4768 if (dst != operands[0])
4769 emit_move_insn (operands[0], dst);
4774 ;; Avoid combining registers from different units in a single alternative,
4775 ;; see comment above inline_secondary_memory_needed function in i386.c
4776 (define_insn "sse2_loadlpd"
4777 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4778 "=x,x,x,x,x,x,x,x,m,m ,m")
4780 (match_operand:DF 2 "nonimmediate_operand"
4781 " m,m,m,x,x,0,0,x,x,*f,r")
4783 (match_operand:V2DF 1 "vector_move_operand"
4784 " C,0,x,0,x,x,o,o,0,0 ,0")
4785 (parallel [(const_int 1)]))))]
4786 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4788 %vmovsd\t{%2, %0|%0, %2}
4789 movlpd\t{%2, %0|%0, %2}
4790 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4791 movsd\t{%2, %0|%0, %2}
4792 vmovsd\t{%2, %1, %0|%0, %1, %2}
4793 shufpd\t{$2, %1, %0|%0, %1, 2}
4794 movhpd\t{%H1, %0|%0, %H1}
4795 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4799 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4801 (cond [(eq_attr "alternative" "5")
4802 (const_string "sselog")
4803 (eq_attr "alternative" "9")
4804 (const_string "fmov")
4805 (eq_attr "alternative" "10")
4806 (const_string "imov")
4808 (const_string "ssemov")))
4809 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4810 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4811 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4812 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4815 [(set (match_operand:V2DF 0 "memory_operand" "")
4817 (match_operand:DF 1 "register_operand" "")
4818 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4819 "TARGET_SSE2 && reload_completed"
4820 [(set (match_dup 0) (match_dup 1))]
4821 "operands[0] = adjust_address (operands[0], DFmode, 0);")
4823 (define_insn "sse2_movsd"
4824 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4826 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4827 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4831 movsd\t{%2, %0|%0, %2}
4832 vmovsd\t{%2, %1, %0|%0, %1, %2}
4833 movlpd\t{%2, %0|%0, %2}
4834 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4835 %vmovlpd\t{%2, %0|%0, %2}
4836 shufpd\t{$2, %1, %0|%0, %1, 2}
4837 movhps\t{%H1, %0|%0, %H1}
4838 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4839 %vmovhps\t{%1, %H0|%H0, %1}"
4840 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4843 (eq_attr "alternative" "5")
4844 (const_string "sselog")
4845 (const_string "ssemov")))
4846 (set (attr "prefix_data16")
4848 (and (eq_attr "alternative" "2,4")
4849 (not (match_test "TARGET_AVX")))
4851 (const_string "*")))
4852 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4853 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4854 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4856 (define_insn "vec_dupv2df"
4857 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4859 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4863 %vmovddup\t{%1, %0|%0, %1}"
4864 [(set_attr "isa" "noavx,sse3")
4865 (set_attr "type" "sselog1")
4866 (set_attr "prefix" "orig,maybe_vex")
4867 (set_attr "mode" "V2DF,DF")])
4869 (define_insn "*vec_concatv2df"
4870 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
4872 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4873 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
4876 unpcklpd\t{%2, %0|%0, %2}
4877 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4878 %vmovddup\t{%1, %0|%0, %1}
4879 movhpd\t{%2, %0|%0, %2}
4880 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4881 %vmovsd\t{%1, %0|%0, %1}
4882 movlhps\t{%2, %0|%0, %2}
4883 movhps\t{%2, %0|%0, %2}"
4884 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4887 (eq_attr "alternative" "0,1,2")
4888 (const_string "sselog")
4889 (const_string "ssemov")))
4890 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4891 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4892 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4894 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4896 ;; Parallel integral arithmetic
4898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4900 (define_expand "neg<mode>2"
4901 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4904 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4906 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4908 (define_expand "<plusminus_insn><mode>3"
4909 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4911 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4912 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4914 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4916 (define_insn "*<plusminus_insn><mode>3"
4917 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4919 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4920 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4921 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4923 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4924 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4925 [(set_attr "isa" "noavx,avx")
4926 (set_attr "type" "sseiadd")
4927 (set_attr "prefix_data16" "1,*")
4928 (set_attr "prefix" "orig,vex")
4929 (set_attr "mode" "<sseinsnmode>")])
4931 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4932 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4933 (sat_plusminus:VI12_AVX2
4934 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4935 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4937 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4939 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4940 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4941 (sat_plusminus:VI12_AVX2
4942 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4943 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4944 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4946 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4947 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4948 [(set_attr "isa" "noavx,avx")
4949 (set_attr "type" "sseiadd")
4950 (set_attr "prefix_data16" "1,*")
4951 (set_attr "prefix" "orig,vex")
4952 (set_attr "mode" "TI")])
4954 (define_insn_and_split "mul<mode>3"
4955 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4956 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4957 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4959 && can_create_pseudo_p ()"
4966 enum machine_mode mulmode = <sseunpackmode>mode;
4968 for (i = 0; i < 6; ++i)
4969 t[i] = gen_reg_rtx (<MODE>mode);
4971 /* Unpack data such that we've got a source byte in each low byte of
4972 each word. We don't care what goes into the high byte of each word.
4973 Rather than trying to get zero in there, most convenient is to let
4974 it be a copy of the low byte. */
4975 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4977 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4979 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4981 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4984 /* Multiply words. The end-of-line annotations here give a picture of what
4985 the output of that instruction looks like. Dot means don't care; the
4986 letters are the bytes of the result with A being the most significant. */
4987 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4988 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4989 gen_lowpart (mulmode, t[0]),
4990 gen_lowpart (mulmode, t[1]))));
4991 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4992 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4993 gen_lowpart (mulmode, t[2]),
4994 gen_lowpart (mulmode, t[3]))));
4996 /* Extract the even bytes and merge them back together. */
4997 if (<MODE>mode == V16QImode)
4998 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5001 /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
5002 this can't be normal even extraction, but one where additionally
5003 the second and third quarter are swapped. That is even one insn
5004 shorter than even extraction. */
5005 rtvec v = rtvec_alloc (32);
5006 for (i = 0; i < 32; ++i)
5008 = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
5012 t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
5013 ix86_expand_vec_perm_const (t);
5016 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5017 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5021 (define_expand "mul<mode>3"
5022 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5023 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5024 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5026 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5028 (define_insn "*mul<mode>3"
5029 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5030 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5031 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5032 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5034 pmullw\t{%2, %0|%0, %2}
5035 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5036 [(set_attr "isa" "noavx,avx")
5037 (set_attr "type" "sseimul")
5038 (set_attr "prefix_data16" "1,*")
5039 (set_attr "prefix" "orig,vex")
5040 (set_attr "mode" "<sseinsnmode>")])
5042 (define_expand "<s>mul<mode>3_highpart"
5043 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5045 (lshiftrt:<ssedoublemode>
5046 (mult:<ssedoublemode>
5047 (any_extend:<ssedoublemode>
5048 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5049 (any_extend:<ssedoublemode>
5050 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5053 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5055 (define_insn "*<s>mul<mode>3_highpart"
5056 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5058 (lshiftrt:<ssedoublemode>
5059 (mult:<ssedoublemode>
5060 (any_extend:<ssedoublemode>
5061 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5062 (any_extend:<ssedoublemode>
5063 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5065 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5067 pmulh<u>w\t{%2, %0|%0, %2}
5068 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5069 [(set_attr "isa" "noavx,avx")
5070 (set_attr "type" "sseimul")
5071 (set_attr "prefix_data16" "1,*")
5072 (set_attr "prefix" "orig,vex")
5073 (set_attr "mode" "<sseinsnmode>")])
5075 (define_expand "avx2_umulv4siv4di3"
5076 [(set (match_operand:V4DI 0 "register_operand" "")
5080 (match_operand:V8SI 1 "nonimmediate_operand" "")
5081 (parallel [(const_int 0) (const_int 2)
5082 (const_int 4) (const_int 6)])))
5085 (match_operand:V8SI 2 "nonimmediate_operand" "")
5086 (parallel [(const_int 0) (const_int 2)
5087 (const_int 4) (const_int 6)])))))]
5089 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5091 (define_insn "*avx_umulv4siv4di3"
5092 [(set (match_operand:V4DI 0 "register_operand" "=x")
5096 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5097 (parallel [(const_int 0) (const_int 2)
5098 (const_int 4) (const_int 6)])))
5101 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5102 (parallel [(const_int 0) (const_int 2)
5103 (const_int 4) (const_int 6)])))))]
5104 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5105 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5106 [(set_attr "type" "sseimul")
5107 (set_attr "prefix" "vex")
5108 (set_attr "mode" "OI")])
5110 (define_expand "sse2_umulv2siv2di3"
5111 [(set (match_operand:V2DI 0 "register_operand" "")
5115 (match_operand:V4SI 1 "nonimmediate_operand" "")
5116 (parallel [(const_int 0) (const_int 2)])))
5119 (match_operand:V4SI 2 "nonimmediate_operand" "")
5120 (parallel [(const_int 0) (const_int 2)])))))]
5122 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5124 (define_insn "*sse2_umulv2siv2di3"
5125 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5129 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5130 (parallel [(const_int 0) (const_int 2)])))
5133 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5134 (parallel [(const_int 0) (const_int 2)])))))]
5135 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5137 pmuludq\t{%2, %0|%0, %2}
5138 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5139 [(set_attr "isa" "noavx,avx")
5140 (set_attr "type" "sseimul")
5141 (set_attr "prefix_data16" "1,*")
5142 (set_attr "prefix" "orig,vex")
5143 (set_attr "mode" "TI")])
5145 (define_expand "avx2_mulv4siv4di3"
5146 [(set (match_operand:V4DI 0 "register_operand" "")
5150 (match_operand:V8SI 1 "nonimmediate_operand" "")
5151 (parallel [(const_int 0) (const_int 2)
5152 (const_int 4) (const_int 6)])))
5155 (match_operand:V8SI 2 "nonimmediate_operand" "")
5156 (parallel [(const_int 0) (const_int 2)
5157 (const_int 4) (const_int 6)])))))]
5159 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5161 (define_insn "*avx2_mulv4siv4di3"
5162 [(set (match_operand:V4DI 0 "register_operand" "=x")
5166 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5167 (parallel [(const_int 0) (const_int 2)
5168 (const_int 4) (const_int 6)])))
5171 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5172 (parallel [(const_int 0) (const_int 2)
5173 (const_int 4) (const_int 6)])))))]
5174 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5175 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5176 [(set_attr "isa" "avx")
5177 (set_attr "type" "sseimul")
5178 (set_attr "prefix_extra" "1")
5179 (set_attr "prefix" "vex")
5180 (set_attr "mode" "OI")])
5182 (define_expand "sse4_1_mulv2siv2di3"
5183 [(set (match_operand:V2DI 0 "register_operand" "")
5187 (match_operand:V4SI 1 "nonimmediate_operand" "")
5188 (parallel [(const_int 0) (const_int 2)])))
5191 (match_operand:V4SI 2 "nonimmediate_operand" "")
5192 (parallel [(const_int 0) (const_int 2)])))))]
5194 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5196 (define_insn "*sse4_1_mulv2siv2di3"
5197 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5201 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5202 (parallel [(const_int 0) (const_int 2)])))
5205 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5206 (parallel [(const_int 0) (const_int 2)])))))]
5207 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5209 pmuldq\t{%2, %0|%0, %2}
5210 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5211 [(set_attr "isa" "noavx,avx")
5212 (set_attr "type" "sseimul")
5213 (set_attr "prefix_data16" "1,*")
5214 (set_attr "prefix_extra" "1")
5215 (set_attr "prefix" "orig,vex")
5216 (set_attr "mode" "TI")])
5218 (define_expand "avx2_pmaddwd"
5219 [(set (match_operand:V8SI 0 "register_operand" "")
5224 (match_operand:V16HI 1 "nonimmediate_operand" "")
5225 (parallel [(const_int 0)
5235 (match_operand:V16HI 2 "nonimmediate_operand" "")
5236 (parallel [(const_int 0)
5246 (vec_select:V8HI (match_dup 1)
5247 (parallel [(const_int 1)
5256 (vec_select:V8HI (match_dup 2)
5257 (parallel [(const_int 1)
5264 (const_int 15)]))))))]
5266 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5268 (define_expand "sse2_pmaddwd"
5269 [(set (match_operand:V4SI 0 "register_operand" "")
5274 (match_operand:V8HI 1 "nonimmediate_operand" "")
5275 (parallel [(const_int 0)
5281 (match_operand:V8HI 2 "nonimmediate_operand" "")
5282 (parallel [(const_int 0)
5288 (vec_select:V4HI (match_dup 1)
5289 (parallel [(const_int 1)
5294 (vec_select:V4HI (match_dup 2)
5295 (parallel [(const_int 1)
5298 (const_int 7)]))))))]
5300 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5302 (define_insn "*avx2_pmaddwd"
5303 [(set (match_operand:V8SI 0 "register_operand" "=x")
5308 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5309 (parallel [(const_int 0)
5319 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5320 (parallel [(const_int 0)
5330 (vec_select:V8HI (match_dup 1)
5331 (parallel [(const_int 1)
5340 (vec_select:V8HI (match_dup 2)
5341 (parallel [(const_int 1)
5348 (const_int 15)]))))))]
5349 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5350 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5351 [(set_attr "type" "sseiadd")
5352 (set_attr "prefix" "vex")
5353 (set_attr "mode" "OI")])
5355 (define_insn "*sse2_pmaddwd"
5356 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5361 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5362 (parallel [(const_int 0)
5368 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5369 (parallel [(const_int 0)
5375 (vec_select:V4HI (match_dup 1)
5376 (parallel [(const_int 1)
5381 (vec_select:V4HI (match_dup 2)
5382 (parallel [(const_int 1)
5385 (const_int 7)]))))))]
5386 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5388 pmaddwd\t{%2, %0|%0, %2}
5389 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5390 [(set_attr "isa" "noavx,avx")
5391 (set_attr "type" "sseiadd")
5392 (set_attr "atom_unit" "simul")
5393 (set_attr "prefix_data16" "1,*")
5394 (set_attr "prefix" "orig,vex")
5395 (set_attr "mode" "TI")])
5397 (define_expand "mul<mode>3"
5398 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5399 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5400 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5403 if (TARGET_SSE4_1 || TARGET_AVX)
5404 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5407 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5408 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5409 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5410 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5411 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5413 pmulld\t{%2, %0|%0, %2}
5414 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5415 [(set_attr "isa" "noavx,avx")
5416 (set_attr "type" "sseimul")
5417 (set_attr "prefix_extra" "1")
5418 (set_attr "prefix" "orig,vex")
5419 (set_attr "mode" "<sseinsnmode>")])
5421 (define_insn_and_split "*sse2_mulv4si3"
5422 [(set (match_operand:V4SI 0 "register_operand" "")
5423 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5424 (match_operand:V4SI 2 "register_operand" "")))]
5425 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5426 && can_create_pseudo_p ()"
5431 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5437 t1 = gen_reg_rtx (V4SImode);
5438 t2 = gen_reg_rtx (V4SImode);
5439 t3 = gen_reg_rtx (V4SImode);
5440 t4 = gen_reg_rtx (V4SImode);
5441 t5 = gen_reg_rtx (V4SImode);
5442 t6 = gen_reg_rtx (V4SImode);
5443 thirtytwo = GEN_INT (32);
5445 /* Multiply elements 2 and 0. */
5446 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5449 /* Shift both input vectors down one element, so that elements 3
5450 and 1 are now in the slots for elements 2 and 0. For K8, at
5451 least, this is faster than using a shuffle. */
5452 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5453 gen_lowpart (V1TImode, op1),
5455 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5456 gen_lowpart (V1TImode, op2),
5458 /* Multiply elements 3 and 1. */
5459 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5462 /* Move the results in element 2 down to element 1; we don't care
5463 what goes in elements 2 and 3. */
5464 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5465 const0_rtx, const0_rtx));
5466 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5467 const0_rtx, const0_rtx));
5469 /* Merge the parts back together. */
5470 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5472 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5473 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5477 (define_insn_and_split "mul<mode>3"
5478 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5479 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5480 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5482 && can_create_pseudo_p ()"
5487 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5494 if (TARGET_XOP && <MODE>mode == V2DImode)
5496 /* op1: A,B,C,D, op2: E,F,G,H */
5497 op1 = gen_lowpart (V4SImode, op1);
5498 op2 = gen_lowpart (V4SImode, op2);
5500 t1 = gen_reg_rtx (V4SImode);
5501 t2 = gen_reg_rtx (V4SImode);
5502 t3 = gen_reg_rtx (V2DImode);
5503 t4 = gen_reg_rtx (V2DImode);
5506 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5512 /* t2: (B*E),(A*F),(D*G),(C*H) */
5513 emit_insn (gen_mulv4si3 (t2, t1, op2));
5515 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5516 emit_insn (gen_xop_phadddq (t3, t2));
5518 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5519 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5521 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5522 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5526 t1 = gen_reg_rtx (<MODE>mode);
5527 t2 = gen_reg_rtx (<MODE>mode);
5528 t3 = gen_reg_rtx (<MODE>mode);
5529 t4 = gen_reg_rtx (<MODE>mode);
5530 t5 = gen_reg_rtx (<MODE>mode);
5531 t6 = gen_reg_rtx (<MODE>mode);
5532 thirtytwo = GEN_INT (32);
5534 /* Multiply low parts. */
5535 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5536 (t1, gen_lowpart (<ssepackmode>mode, op1),
5537 gen_lowpart (<ssepackmode>mode, op2)));
5539 /* Shift input vectors right 32 bits so we can multiply high parts. */
5540 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5541 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5543 /* Multiply high parts by low parts. */
5544 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5545 (t4, gen_lowpart (<ssepackmode>mode, op1),
5546 gen_lowpart (<ssepackmode>mode, t3)));
5547 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5548 (t5, gen_lowpart (<ssepackmode>mode, op2),
5549 gen_lowpart (<ssepackmode>mode, t2)));
5551 /* Shift them back. */
5552 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5553 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5555 /* Add the three parts together. */
5556 emit_insn (gen_add<mode>3 (t6, t1, t4));
5557 emit_insn (gen_add<mode>3 (op0, t6, t5));
5560 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5561 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5565 (define_expand "vec_widen_<s>mult_hi_<mode>"
5566 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5567 (any_extend:<sseunpackmode>
5568 (match_operand:VI2_AVX2 1 "register_operand" ""))
5569 (match_operand:VI2_AVX2 2 "register_operand" "")]
5572 rtx op1, op2, t1, t2, dest;
5576 t1 = gen_reg_rtx (<MODE>mode);
5577 t2 = gen_reg_rtx (<MODE>mode);
5578 dest = gen_lowpart (<MODE>mode, operands[0]);
5580 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5581 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5582 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5586 (define_expand "vec_widen_<s>mult_lo_<mode>"
5587 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5588 (any_extend:<sseunpackmode>
5589 (match_operand:VI2_AVX2 1 "register_operand" ""))
5590 (match_operand:VI2_AVX2 2 "register_operand" "")]
5593 rtx op1, op2, t1, t2, dest;
5597 t1 = gen_reg_rtx (<MODE>mode);
5598 t2 = gen_reg_rtx (<MODE>mode);
5599 dest = gen_lowpart (<MODE>mode, operands[0]);
5601 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5602 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5603 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5607 (define_expand "vec_widen_<s>mult_hi_v8si"
5608 [(match_operand:V4DI 0 "register_operand" "")
5609 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5610 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5615 t1 = gen_reg_rtx (V4DImode);
5616 t2 = gen_reg_rtx (V4DImode);
5617 t3 = gen_reg_rtx (V8SImode);
5618 t4 = gen_reg_rtx (V8SImode);
5619 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5620 const0_rtx, const2_rtx,
5621 const1_rtx, GEN_INT (3)));
5622 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5623 const0_rtx, const2_rtx,
5624 const1_rtx, GEN_INT (3)));
5625 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5626 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5627 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5628 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5629 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5633 (define_expand "vec_widen_<s>mult_lo_v8si"
5634 [(match_operand:V4DI 0 "register_operand" "")
5635 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5636 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5641 t1 = gen_reg_rtx (V4DImode);
5642 t2 = gen_reg_rtx (V4DImode);
5643 t3 = gen_reg_rtx (V8SImode);
5644 t4 = gen_reg_rtx (V8SImode);
5645 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5646 const0_rtx, const2_rtx,
5647 const1_rtx, GEN_INT (3)));
5648 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5649 const0_rtx, const2_rtx,
5650 const1_rtx, GEN_INT (3)));
5651 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5652 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5653 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5654 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5655 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5659 (define_expand "vec_widen_smult_hi_v4si"
5660 [(match_operand:V2DI 0 "register_operand" "")
5661 (match_operand:V4SI 1 "register_operand" "")
5662 (match_operand:V4SI 2 "register_operand" "")]
5665 rtx op1, op2, t1, t2;
5669 t1 = gen_reg_rtx (V4SImode);
5670 t2 = gen_reg_rtx (V4SImode);
5674 rtx t3 = gen_reg_rtx (V2DImode);
5676 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5677 GEN_INT (1), GEN_INT (3)));
5678 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5679 GEN_INT (1), GEN_INT (3)));
5680 emit_move_insn (t3, CONST0_RTX (V2DImode));
5682 emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
5686 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5687 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5688 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5692 (define_expand "vec_widen_smult_lo_v4si"
5693 [(match_operand:V2DI 0 "register_operand" "")
5694 (match_operand:V4SI 1 "register_operand" "")
5695 (match_operand:V4SI 2 "register_operand" "")]
5698 rtx op1, op2, t1, t2;
5702 t1 = gen_reg_rtx (V4SImode);
5703 t2 = gen_reg_rtx (V4SImode);
5707 rtx t3 = gen_reg_rtx (V2DImode);
5709 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5710 GEN_INT (1), GEN_INT (3)));
5711 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5712 GEN_INT (1), GEN_INT (3)));
5713 emit_move_insn (t3, CONST0_RTX (V2DImode));
5715 emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
5719 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5720 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5721 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5725 (define_expand "vec_widen_umult_hi_v4si"
5726 [(match_operand:V2DI 0 "register_operand" "")
5727 (match_operand:V4SI 1 "register_operand" "")
5728 (match_operand:V4SI 2 "register_operand" "")]
5731 rtx op1, op2, t1, t2;
5735 t1 = gen_reg_rtx (V4SImode);
5736 t2 = gen_reg_rtx (V4SImode);
5738 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5739 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5740 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5744 (define_expand "vec_widen_umult_lo_v4si"
5745 [(match_operand:V2DI 0 "register_operand" "")
5746 (match_operand:V4SI 1 "register_operand" "")
5747 (match_operand:V4SI 2 "register_operand" "")]
5750 rtx op1, op2, t1, t2;
5754 t1 = gen_reg_rtx (V4SImode);
5755 t2 = gen_reg_rtx (V4SImode);
5757 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5758 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5759 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5763 (define_expand "sdot_prod<mode>"
5764 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5765 (match_operand:VI2_AVX2 1 "register_operand" "")
5766 (match_operand:VI2_AVX2 2 "register_operand" "")
5767 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5770 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5771 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5772 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5773 gen_rtx_PLUS (<sseunpackmode>mode,
5778 (define_code_attr sse2_sse4_1
5779 [(zero_extend "sse2") (sign_extend "sse4_1")])
5781 (define_expand "<s>dot_prodv4si"
5782 [(match_operand:V2DI 0 "register_operand" "")
5783 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5784 (match_operand:V4SI 2 "register_operand" "")
5785 (match_operand:V2DI 3 "register_operand" "")]
5786 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5790 t1 = gen_reg_rtx (V2DImode);
5791 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5792 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5794 t2 = gen_reg_rtx (V4SImode);
5795 t3 = gen_reg_rtx (V4SImode);
5796 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5797 gen_lowpart (V1TImode, operands[1]),
5799 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5800 gen_lowpart (V1TImode, operands[2]),
5803 t4 = gen_reg_rtx (V2DImode);
5804 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5806 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5810 (define_expand "<s>dot_prodv8si"
5811 [(match_operand:V4DI 0 "register_operand" "")
5812 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5813 (match_operand:V8SI 2 "register_operand" "")
5814 (match_operand:V4DI 3 "register_operand" "")]
5819 t1 = gen_reg_rtx (V4DImode);
5820 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5821 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5823 t2 = gen_reg_rtx (V8SImode);
5824 t3 = gen_reg_rtx (V8SImode);
5825 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5826 gen_lowpart (V2TImode, operands[1]),
5828 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5829 gen_lowpart (V2TImode, operands[2]),
5832 t4 = gen_reg_rtx (V4DImode);
5833 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5835 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5839 (define_insn "ashr<mode>3"
5840 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5842 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5843 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5846 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5847 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5848 [(set_attr "isa" "noavx,avx")
5849 (set_attr "type" "sseishft")
5850 (set (attr "length_immediate")
5851 (if_then_else (match_operand 2 "const_int_operand" "")
5853 (const_string "0")))
5854 (set_attr "prefix_data16" "1,*")
5855 (set_attr "prefix" "orig,vex")
5856 (set_attr "mode" "<sseinsnmode>")])
5858 (define_insn "<shift_insn><mode>3"
5859 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5860 (any_lshift:VI248_AVX2
5861 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5862 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5865 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5866 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5867 [(set_attr "isa" "noavx,avx")
5868 (set_attr "type" "sseishft")
5869 (set (attr "length_immediate")
5870 (if_then_else (match_operand 2 "const_int_operand" "")
5872 (const_string "0")))
5873 (set_attr "prefix_data16" "1,*")
5874 (set_attr "prefix" "orig,vex")
5875 (set_attr "mode" "<sseinsnmode>")])
5877 (define_expand "vec_shl_<mode>"
5878 [(set (match_operand:VI_128 0 "register_operand" "")
5880 (match_operand:VI_128 1 "register_operand" "")
5881 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5884 operands[0] = gen_lowpart (V1TImode, operands[0]);
5885 operands[1] = gen_lowpart (V1TImode, operands[1]);
5888 (define_insn "<sse2_avx2>_ashl<mode>3"
5889 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5891 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5892 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5895 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5897 switch (which_alternative)
5900 return "pslldq\t{%2, %0|%0, %2}";
5902 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5907 [(set_attr "isa" "noavx,avx")
5908 (set_attr "type" "sseishft")
5909 (set_attr "length_immediate" "1")
5910 (set_attr "prefix_data16" "1,*")
5911 (set_attr "prefix" "orig,vex")
5912 (set_attr "mode" "<sseinsnmode>")])
5914 (define_expand "vec_shr_<mode>"
5915 [(set (match_operand:VI_128 0 "register_operand" "")
5917 (match_operand:VI_128 1 "register_operand" "")
5918 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5921 operands[0] = gen_lowpart (V1TImode, operands[0]);
5922 operands[1] = gen_lowpart (V1TImode, operands[1]);
5925 (define_insn "<sse2_avx2>_lshr<mode>3"
5926 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5927 (lshiftrt:VIMAX_AVX2
5928 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5929 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5932 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5934 switch (which_alternative)
5937 return "psrldq\t{%2, %0|%0, %2}";
5939 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5944 [(set_attr "isa" "noavx,avx")
5945 (set_attr "type" "sseishft")
5946 (set_attr "length_immediate" "1")
5947 (set_attr "atom_unit" "sishuf")
5948 (set_attr "prefix_data16" "1,*")
5949 (set_attr "prefix" "orig,vex")
5950 (set_attr "mode" "<sseinsnmode>")])
5953 (define_expand "<code><mode>3"
5954 [(set (match_operand:VI124_256 0 "register_operand" "")
5956 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5957 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5959 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5961 (define_insn "*avx2_<code><mode>3"
5962 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5964 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5965 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5966 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5967 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5968 [(set_attr "type" "sseiadd")
5969 (set_attr "prefix_extra" "1")
5970 (set_attr "prefix" "vex")
5971 (set_attr "mode" "OI")])
5973 (define_expand "<code><mode>3"
5974 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5976 (match_operand:VI8_AVX2 1 "register_operand" "")
5977 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5984 xops[0] = operands[0];
5986 if (<CODE> == SMAX || <CODE> == UMAX)
5988 xops[1] = operands[1];
5989 xops[2] = operands[2];
5993 xops[1] = operands[2];
5994 xops[2] = operands[1];
5997 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5999 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6000 xops[4] = operands[1];
6001 xops[5] = operands[2];
6003 ok = ix86_expand_int_vcond (xops);
6008 (define_expand "<code><mode>3"
6009 [(set (match_operand:VI124_128 0 "register_operand" "")
6011 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6012 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6015 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6016 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6022 xops[0] = operands[0];
6023 operands[1] = force_reg (<MODE>mode, operands[1]);
6024 operands[2] = force_reg (<MODE>mode, operands[2]);
6028 xops[1] = operands[1];
6029 xops[2] = operands[2];
6033 xops[1] = operands[2];
6034 xops[2] = operands[1];
6037 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6038 xops[4] = operands[1];
6039 xops[5] = operands[2];
6041 ok = ix86_expand_int_vcond (xops);
6047 (define_insn "*sse4_1_<code><mode>3"
6048 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6050 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6051 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6052 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6054 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6055 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6056 [(set_attr "isa" "noavx,avx")
6057 (set_attr "type" "sseiadd")
6058 (set_attr "prefix_extra" "1,*")
6059 (set_attr "prefix" "orig,vex")
6060 (set_attr "mode" "TI")])
6062 (define_insn "*<code>v8hi3"
6063 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6065 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6066 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6067 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6069 p<maxmin_int>w\t{%2, %0|%0, %2}
6070 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6071 [(set_attr "isa" "noavx,avx")
6072 (set_attr "type" "sseiadd")
6073 (set_attr "prefix_data16" "1,*")
6074 (set_attr "prefix_extra" "*,1")
6075 (set_attr "prefix" "orig,vex")
6076 (set_attr "mode" "TI")])
6078 (define_expand "<code><mode>3"
6079 [(set (match_operand:VI124_128 0 "register_operand" "")
6081 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6082 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6085 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6086 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6087 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6089 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6090 operands[1] = force_reg (<MODE>mode, operands[1]);
6091 if (rtx_equal_p (op3, op2))
6092 op3 = gen_reg_rtx (V8HImode);
6093 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6094 emit_insn (gen_addv8hi3 (op0, op3, op2));
6102 operands[1] = force_reg (<MODE>mode, operands[1]);
6103 operands[2] = force_reg (<MODE>mode, operands[2]);
6105 xops[0] = operands[0];
6109 xops[1] = operands[1];
6110 xops[2] = operands[2];
6114 xops[1] = operands[2];
6115 xops[2] = operands[1];
6118 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6119 xops[4] = operands[1];
6120 xops[5] = operands[2];
6122 ok = ix86_expand_int_vcond (xops);
6128 (define_insn "*sse4_1_<code><mode>3"
6129 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6131 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6132 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6133 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6135 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6136 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6137 [(set_attr "isa" "noavx,avx")
6138 (set_attr "type" "sseiadd")
6139 (set_attr "prefix_extra" "1,*")
6140 (set_attr "prefix" "orig,vex")
6141 (set_attr "mode" "TI")])
6143 (define_insn "*<code>v16qi3"
6144 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6146 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6147 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6148 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6150 p<maxmin_int>b\t{%2, %0|%0, %2}
6151 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6152 [(set_attr "isa" "noavx,avx")
6153 (set_attr "type" "sseiadd")
6154 (set_attr "prefix_data16" "1,*")
6155 (set_attr "prefix_extra" "*,1")
6156 (set_attr "prefix" "orig,vex")
6157 (set_attr "mode" "TI")])
6159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6161 ;; Parallel integral comparisons
6163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6165 (define_expand "avx2_eq<mode>3"
6166 [(set (match_operand:VI_256 0 "register_operand" "")
6168 (match_operand:VI_256 1 "nonimmediate_operand" "")
6169 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6171 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6173 (define_insn "*avx2_eq<mode>3"
6174 [(set (match_operand:VI_256 0 "register_operand" "=x")
6176 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6177 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6178 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6179 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6180 [(set_attr "type" "ssecmp")
6181 (set_attr "prefix_extra" "1")
6182 (set_attr "prefix" "vex")
6183 (set_attr "mode" "OI")])
6185 (define_insn "*sse4_1_eqv2di3"
6186 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6188 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6189 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6190 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6192 pcmpeqq\t{%2, %0|%0, %2}
6193 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6194 [(set_attr "isa" "noavx,avx")
6195 (set_attr "type" "ssecmp")
6196 (set_attr "prefix_extra" "1")
6197 (set_attr "prefix" "orig,vex")
6198 (set_attr "mode" "TI")])
6200 (define_insn "*sse2_eq<mode>3"
6201 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6203 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6204 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6205 "TARGET_SSE2 && !TARGET_XOP
6206 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6208 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6209 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6210 [(set_attr "isa" "noavx,avx")
6211 (set_attr "type" "ssecmp")
6212 (set_attr "prefix_data16" "1,*")
6213 (set_attr "prefix" "orig,vex")
6214 (set_attr "mode" "TI")])
6216 (define_expand "sse2_eq<mode>3"
6217 [(set (match_operand:VI124_128 0 "register_operand" "")
6219 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6220 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6221 "TARGET_SSE2 && !TARGET_XOP "
6222 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6224 (define_expand "sse4_1_eqv2di3"
6225 [(set (match_operand:V2DI 0 "register_operand" "")
6227 (match_operand:V2DI 1 "nonimmediate_operand" "")
6228 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6230 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6232 (define_insn "sse4_2_gtv2di3"
6233 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6235 (match_operand:V2DI 1 "register_operand" "0,x")
6236 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6239 pcmpgtq\t{%2, %0|%0, %2}
6240 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6241 [(set_attr "isa" "noavx,avx")
6242 (set_attr "type" "ssecmp")
6243 (set_attr "prefix_extra" "1")
6244 (set_attr "prefix" "orig,vex")
6245 (set_attr "mode" "TI")])
6247 (define_insn "avx2_gt<mode>3"
6248 [(set (match_operand:VI_256 0 "register_operand" "=x")
6250 (match_operand:VI_256 1 "register_operand" "x")
6251 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6253 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6254 [(set_attr "type" "ssecmp")
6255 (set_attr "prefix_extra" "1")
6256 (set_attr "prefix" "vex")
6257 (set_attr "mode" "OI")])
6259 (define_insn "sse2_gt<mode>3"
6260 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6262 (match_operand:VI124_128 1 "register_operand" "0,x")
6263 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6264 "TARGET_SSE2 && !TARGET_XOP"
6266 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6267 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6268 [(set_attr "isa" "noavx,avx")
6269 (set_attr "type" "ssecmp")
6270 (set_attr "prefix_data16" "1,*")
6271 (set_attr "prefix" "orig,vex")
6272 (set_attr "mode" "TI")])
6274 (define_expand "vcond<V_256:mode><VI_256:mode>"
6275 [(set (match_operand:V_256 0 "register_operand" "")
6277 (match_operator 3 ""
6278 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6279 (match_operand:VI_256 5 "general_operand" "")])
6280 (match_operand:V_256 1 "" "")
6281 (match_operand:V_256 2 "" "")))]
6283 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6284 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6286 bool ok = ix86_expand_int_vcond (operands);
6291 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6292 [(set (match_operand:V_128 0 "register_operand" "")
6294 (match_operator 3 ""
6295 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6296 (match_operand:VI124_128 5 "general_operand" "")])
6297 (match_operand:V_128 1 "" "")
6298 (match_operand:V_128 2 "" "")))]
6300 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6301 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6303 bool ok = ix86_expand_int_vcond (operands);
6308 (define_expand "vcond<VI8F_128:mode>v2di"
6309 [(set (match_operand:VI8F_128 0 "register_operand" "")
6310 (if_then_else:VI8F_128
6311 (match_operator 3 ""
6312 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6313 (match_operand:V2DI 5 "general_operand" "")])
6314 (match_operand:VI8F_128 1 "" "")
6315 (match_operand:VI8F_128 2 "" "")))]
6318 bool ok = ix86_expand_int_vcond (operands);
6323 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6324 [(set (match_operand:V_256 0 "register_operand" "")
6326 (match_operator 3 ""
6327 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6328 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6329 (match_operand:V_256 1 "general_operand" "")
6330 (match_operand:V_256 2 "general_operand" "")))]
6332 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6333 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6335 bool ok = ix86_expand_int_vcond (operands);
6340 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6341 [(set (match_operand:V_128 0 "register_operand" "")
6343 (match_operator 3 ""
6344 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6345 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6346 (match_operand:V_128 1 "general_operand" "")
6347 (match_operand:V_128 2 "general_operand" "")))]
6349 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6350 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6352 bool ok = ix86_expand_int_vcond (operands);
6357 (define_expand "vcondu<VI8F_128:mode>v2di"
6358 [(set (match_operand:VI8F_128 0 "register_operand" "")
6359 (if_then_else:VI8F_128
6360 (match_operator 3 ""
6361 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6362 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6363 (match_operand:VI8F_128 1 "general_operand" "")
6364 (match_operand:VI8F_128 2 "general_operand" "")))]
6367 bool ok = ix86_expand_int_vcond (operands);
6372 (define_mode_iterator VEC_PERM_AVX2
6373 [V16QI V8HI V4SI V2DI V4SF V2DF
6374 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6375 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6376 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6378 (define_expand "vec_perm<mode>"
6379 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6380 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6381 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6382 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6383 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6385 ix86_expand_vec_perm (operands);
6389 (define_mode_iterator VEC_PERM_CONST
6390 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6391 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6392 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6393 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6394 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6395 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6397 (define_expand "vec_perm_const<mode>"
6398 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6399 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6400 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6401 (match_operand:<sseintvecmode> 3 "" "")]
6404 if (ix86_expand_vec_perm_const (operands))
6410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6412 ;; Parallel bitwise logical operations
6414 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6416 (define_expand "one_cmpl<mode>2"
6417 [(set (match_operand:VI 0 "register_operand" "")
6418 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6422 int i, n = GET_MODE_NUNITS (<MODE>mode);
6423 rtvec v = rtvec_alloc (n);
6425 for (i = 0; i < n; ++i)
6426 RTVEC_ELT (v, i) = constm1_rtx;
6428 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6431 (define_expand "<sse2_avx2>_andnot<mode>3"
6432 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6434 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6435 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6438 (define_insn "*andnot<mode>3"
6439 [(set (match_operand:VI 0 "register_operand" "=x,x")
6441 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6442 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6445 static char buf[32];
6449 switch (get_attr_mode (insn))
6452 gcc_assert (TARGET_AVX2);
6454 gcc_assert (TARGET_SSE2);
6460 gcc_assert (TARGET_AVX);
6462 gcc_assert (TARGET_SSE);
6471 switch (which_alternative)
6474 ops = "%s\t{%%2, %%0|%%0, %%2}";
6477 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6483 snprintf (buf, sizeof (buf), ops, tmp);
6486 [(set_attr "isa" "noavx,avx")
6487 (set_attr "type" "sselog")
6488 (set (attr "prefix_data16")
6490 (and (eq_attr "alternative" "0")
6491 (eq_attr "mode" "TI"))
6493 (const_string "*")))
6494 (set_attr "prefix" "orig,vex")
6496 (cond [(and (not (match_test "TARGET_AVX2"))
6497 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6498 (const_string "V8SF")
6499 (not (match_test "TARGET_SSE2"))
6500 (const_string "V4SF")
6502 (const_string "<sseinsnmode>")))])
6504 (define_expand "<code><mode>3"
6505 [(set (match_operand:VI 0 "register_operand" "")
6507 (match_operand:VI 1 "nonimmediate_operand" "")
6508 (match_operand:VI 2 "nonimmediate_operand" "")))]
6510 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6512 (define_insn "*<code><mode>3"
6513 [(set (match_operand:VI 0 "register_operand" "=x,x")
6515 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6516 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6518 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6520 static char buf[32];
6524 switch (get_attr_mode (insn))
6527 gcc_assert (TARGET_AVX2);
6529 gcc_assert (TARGET_SSE2);
6535 gcc_assert (TARGET_AVX);
6537 gcc_assert (TARGET_SSE);
6546 switch (which_alternative)
6549 ops = "%s\t{%%2, %%0|%%0, %%2}";
6552 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6558 snprintf (buf, sizeof (buf), ops, tmp);
6561 [(set_attr "isa" "noavx,avx")
6562 (set_attr "type" "sselog")
6563 (set (attr "prefix_data16")
6565 (and (eq_attr "alternative" "0")
6566 (eq_attr "mode" "TI"))
6568 (const_string "*")))
6569 (set_attr "prefix" "orig,vex")
6571 (cond [(and (not (match_test "TARGET_AVX2"))
6572 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6573 (const_string "V8SF")
6574 (not (match_test "TARGET_SSE2"))
6575 (const_string "V4SF")
6577 (const_string "<sseinsnmode>")))])
6579 (define_insn "*andnottf3"
6580 [(set (match_operand:TF 0 "register_operand" "=x,x")
6582 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6583 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6586 pandn\t{%2, %0|%0, %2}
6587 vpandn\t{%2, %1, %0|%0, %1, %2}"
6588 [(set_attr "isa" "noavx,avx")
6589 (set_attr "type" "sselog")
6590 (set_attr "prefix_data16" "1,*")
6591 (set_attr "prefix" "orig,vex")
6592 (set_attr "mode" "TI")])
6594 (define_expand "<code>tf3"
6595 [(set (match_operand:TF 0 "register_operand" "")
6597 (match_operand:TF 1 "nonimmediate_operand" "")
6598 (match_operand:TF 2 "nonimmediate_operand" "")))]
6600 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6602 (define_insn "*<code>tf3"
6603 [(set (match_operand:TF 0 "register_operand" "=x,x")
6605 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6606 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6608 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6610 p<logic>\t{%2, %0|%0, %2}
6611 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6612 [(set_attr "isa" "noavx,avx")
6613 (set_attr "type" "sselog")
6614 (set_attr "prefix_data16" "1,*")
6615 (set_attr "prefix" "orig,vex")
6616 (set_attr "mode" "TI")])
6618 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6620 ;; Parallel integral element swizzling
6622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6624 (define_expand "vec_pack_trunc_<mode>"
6625 [(match_operand:<ssepackmode> 0 "register_operand" "")
6626 (match_operand:VI248_AVX2 1 "register_operand" "")
6627 (match_operand:VI248_AVX2 2 "register_operand" "")]
6630 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6631 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6632 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6636 (define_insn "<sse2_avx2>_packsswb"
6637 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6638 (vec_concat:VI1_AVX2
6639 (ss_truncate:<ssehalfvecmode>
6640 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6641 (ss_truncate:<ssehalfvecmode>
6642 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6645 packsswb\t{%2, %0|%0, %2}
6646 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6647 [(set_attr "isa" "noavx,avx")
6648 (set_attr "type" "sselog")
6649 (set_attr "prefix_data16" "1,*")
6650 (set_attr "prefix" "orig,vex")
6651 (set_attr "mode" "<sseinsnmode>")])
6653 (define_insn "<sse2_avx2>_packssdw"
6654 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6655 (vec_concat:VI2_AVX2
6656 (ss_truncate:<ssehalfvecmode>
6657 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6658 (ss_truncate:<ssehalfvecmode>
6659 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6662 packssdw\t{%2, %0|%0, %2}
6663 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6664 [(set_attr "isa" "noavx,avx")
6665 (set_attr "type" "sselog")
6666 (set_attr "prefix_data16" "1,*")
6667 (set_attr "prefix" "orig,vex")
6668 (set_attr "mode" "<sseinsnmode>")])
6670 (define_insn "<sse2_avx2>_packuswb"
6671 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6672 (vec_concat:VI1_AVX2
6673 (us_truncate:<ssehalfvecmode>
6674 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6675 (us_truncate:<ssehalfvecmode>
6676 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6679 packuswb\t{%2, %0|%0, %2}
6680 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6681 [(set_attr "isa" "noavx,avx")
6682 (set_attr "type" "sselog")
6683 (set_attr "prefix_data16" "1,*")
6684 (set_attr "prefix" "orig,vex")
6685 (set_attr "mode" "<sseinsnmode>")])
6687 (define_insn "avx2_interleave_highv32qi"
6688 [(set (match_operand:V32QI 0 "register_operand" "=x")
6691 (match_operand:V32QI 1 "register_operand" "x")
6692 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6693 (parallel [(const_int 8) (const_int 40)
6694 (const_int 9) (const_int 41)
6695 (const_int 10) (const_int 42)
6696 (const_int 11) (const_int 43)
6697 (const_int 12) (const_int 44)
6698 (const_int 13) (const_int 45)
6699 (const_int 14) (const_int 46)
6700 (const_int 15) (const_int 47)
6701 (const_int 24) (const_int 56)
6702 (const_int 25) (const_int 57)
6703 (const_int 26) (const_int 58)
6704 (const_int 27) (const_int 59)
6705 (const_int 28) (const_int 60)
6706 (const_int 29) (const_int 61)
6707 (const_int 30) (const_int 62)
6708 (const_int 31) (const_int 63)])))]
6710 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6711 [(set_attr "type" "sselog")
6712 (set_attr "prefix" "vex")
6713 (set_attr "mode" "OI")])
6715 (define_insn "vec_interleave_highv16qi"
6716 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6719 (match_operand:V16QI 1 "register_operand" "0,x")
6720 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6721 (parallel [(const_int 8) (const_int 24)
6722 (const_int 9) (const_int 25)
6723 (const_int 10) (const_int 26)
6724 (const_int 11) (const_int 27)
6725 (const_int 12) (const_int 28)
6726 (const_int 13) (const_int 29)
6727 (const_int 14) (const_int 30)
6728 (const_int 15) (const_int 31)])))]
6731 punpckhbw\t{%2, %0|%0, %2}
6732 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6733 [(set_attr "isa" "noavx,avx")
6734 (set_attr "type" "sselog")
6735 (set_attr "prefix_data16" "1,*")
6736 (set_attr "prefix" "orig,vex")
6737 (set_attr "mode" "TI")])
6739 (define_insn "avx2_interleave_lowv32qi"
6740 [(set (match_operand:V32QI 0 "register_operand" "=x")
6743 (match_operand:V32QI 1 "register_operand" "x")
6744 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6745 (parallel [(const_int 0) (const_int 32)
6746 (const_int 1) (const_int 33)
6747 (const_int 2) (const_int 34)
6748 (const_int 3) (const_int 35)
6749 (const_int 4) (const_int 36)
6750 (const_int 5) (const_int 37)
6751 (const_int 6) (const_int 38)
6752 (const_int 7) (const_int 39)
6753 (const_int 16) (const_int 48)
6754 (const_int 17) (const_int 49)
6755 (const_int 18) (const_int 50)
6756 (const_int 19) (const_int 51)
6757 (const_int 20) (const_int 52)
6758 (const_int 21) (const_int 53)
6759 (const_int 22) (const_int 54)
6760 (const_int 23) (const_int 55)])))]
6762 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6763 [(set_attr "type" "sselog")
6764 (set_attr "prefix" "vex")
6765 (set_attr "mode" "OI")])
6767 (define_insn "vec_interleave_lowv16qi"
6768 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6771 (match_operand:V16QI 1 "register_operand" "0,x")
6772 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6773 (parallel [(const_int 0) (const_int 16)
6774 (const_int 1) (const_int 17)
6775 (const_int 2) (const_int 18)
6776 (const_int 3) (const_int 19)
6777 (const_int 4) (const_int 20)
6778 (const_int 5) (const_int 21)
6779 (const_int 6) (const_int 22)
6780 (const_int 7) (const_int 23)])))]
6783 punpcklbw\t{%2, %0|%0, %2}
6784 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6785 [(set_attr "isa" "noavx,avx")
6786 (set_attr "type" "sselog")
6787 (set_attr "prefix_data16" "1,*")
6788 (set_attr "prefix" "orig,vex")
6789 (set_attr "mode" "TI")])
6791 (define_insn "avx2_interleave_highv16hi"
6792 [(set (match_operand:V16HI 0 "register_operand" "=x")
6795 (match_operand:V16HI 1 "register_operand" "x")
6796 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6797 (parallel [(const_int 4) (const_int 20)
6798 (const_int 5) (const_int 21)
6799 (const_int 6) (const_int 22)
6800 (const_int 7) (const_int 23)
6801 (const_int 12) (const_int 28)
6802 (const_int 13) (const_int 29)
6803 (const_int 14) (const_int 30)
6804 (const_int 15) (const_int 31)])))]
6806 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6807 [(set_attr "type" "sselog")
6808 (set_attr "prefix" "vex")
6809 (set_attr "mode" "OI")])
6811 (define_insn "vec_interleave_highv8hi"
6812 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6815 (match_operand:V8HI 1 "register_operand" "0,x")
6816 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6817 (parallel [(const_int 4) (const_int 12)
6818 (const_int 5) (const_int 13)
6819 (const_int 6) (const_int 14)
6820 (const_int 7) (const_int 15)])))]
6823 punpckhwd\t{%2, %0|%0, %2}
6824 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6825 [(set_attr "isa" "noavx,avx")
6826 (set_attr "type" "sselog")
6827 (set_attr "prefix_data16" "1,*")
6828 (set_attr "prefix" "orig,vex")
6829 (set_attr "mode" "TI")])
6831 (define_insn "avx2_interleave_lowv16hi"
6832 [(set (match_operand:V16HI 0 "register_operand" "=x")
6835 (match_operand:V16HI 1 "register_operand" "x")
6836 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6837 (parallel [(const_int 0) (const_int 16)
6838 (const_int 1) (const_int 17)
6839 (const_int 2) (const_int 18)
6840 (const_int 3) (const_int 19)
6841 (const_int 8) (const_int 24)
6842 (const_int 9) (const_int 25)
6843 (const_int 10) (const_int 26)
6844 (const_int 11) (const_int 27)])))]
6846 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6847 [(set_attr "type" "sselog")
6848 (set_attr "prefix" "vex")
6849 (set_attr "mode" "OI")])
6851 (define_insn "vec_interleave_lowv8hi"
6852 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6855 (match_operand:V8HI 1 "register_operand" "0,x")
6856 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6857 (parallel [(const_int 0) (const_int 8)
6858 (const_int 1) (const_int 9)
6859 (const_int 2) (const_int 10)
6860 (const_int 3) (const_int 11)])))]
6863 punpcklwd\t{%2, %0|%0, %2}
6864 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6865 [(set_attr "isa" "noavx,avx")
6866 (set_attr "type" "sselog")
6867 (set_attr "prefix_data16" "1,*")
6868 (set_attr "prefix" "orig,vex")
6869 (set_attr "mode" "TI")])
6871 (define_insn "avx2_interleave_highv8si"
6872 [(set (match_operand:V8SI 0 "register_operand" "=x")
6875 (match_operand:V8SI 1 "register_operand" "x")
6876 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6877 (parallel [(const_int 2) (const_int 10)
6878 (const_int 3) (const_int 11)
6879 (const_int 6) (const_int 14)
6880 (const_int 7) (const_int 15)])))]
6882 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6883 [(set_attr "type" "sselog")
6884 (set_attr "prefix" "vex")
6885 (set_attr "mode" "OI")])
6887 (define_insn "vec_interleave_highv4si"
6888 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6891 (match_operand:V4SI 1 "register_operand" "0,x")
6892 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6893 (parallel [(const_int 2) (const_int 6)
6894 (const_int 3) (const_int 7)])))]
6897 punpckhdq\t{%2, %0|%0, %2}
6898 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6899 [(set_attr "isa" "noavx,avx")
6900 (set_attr "type" "sselog")
6901 (set_attr "prefix_data16" "1,*")
6902 (set_attr "prefix" "orig,vex")
6903 (set_attr "mode" "TI")])
6905 (define_insn "avx2_interleave_lowv8si"
6906 [(set (match_operand:V8SI 0 "register_operand" "=x")
6909 (match_operand:V8SI 1 "register_operand" "x")
6910 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6911 (parallel [(const_int 0) (const_int 8)
6912 (const_int 1) (const_int 9)
6913 (const_int 4) (const_int 12)
6914 (const_int 5) (const_int 13)])))]
6916 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6917 [(set_attr "type" "sselog")
6918 (set_attr "prefix" "vex")
6919 (set_attr "mode" "OI")])
6921 (define_insn "vec_interleave_lowv4si"
6922 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6925 (match_operand:V4SI 1 "register_operand" "0,x")
6926 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6927 (parallel [(const_int 0) (const_int 4)
6928 (const_int 1) (const_int 5)])))]
6931 punpckldq\t{%2, %0|%0, %2}
6932 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6933 [(set_attr "isa" "noavx,avx")
6934 (set_attr "type" "sselog")
6935 (set_attr "prefix_data16" "1,*")
6936 (set_attr "prefix" "orig,vex")
6937 (set_attr "mode" "TI")])
6939 (define_expand "vec_interleave_high<mode>"
6940 [(match_operand:VI_256 0 "register_operand" "=x")
6941 (match_operand:VI_256 1 "register_operand" "x")
6942 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6945 rtx t1 = gen_reg_rtx (<MODE>mode);
6946 rtx t2 = gen_reg_rtx (<MODE>mode);
6947 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6948 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6949 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6950 gen_lowpart (V4DImode, t1),
6951 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6955 (define_expand "vec_interleave_low<mode>"
6956 [(match_operand:VI_256 0 "register_operand" "=x")
6957 (match_operand:VI_256 1 "register_operand" "x")
6958 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6961 rtx t1 = gen_reg_rtx (<MODE>mode);
6962 rtx t2 = gen_reg_rtx (<MODE>mode);
6963 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6964 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6965 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6966 gen_lowpart (V4DImode, t1),
6967 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6971 ;; Modes handled by pinsr patterns.
6972 (define_mode_iterator PINSR_MODE
6973 [(V16QI "TARGET_SSE4_1") V8HI
6974 (V4SI "TARGET_SSE4_1")
6975 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6977 (define_mode_attr sse2p4_1
6978 [(V16QI "sse4_1") (V8HI "sse2")
6979 (V4SI "sse4_1") (V2DI "sse4_1")])
6981 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6982 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6983 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6984 (vec_merge:PINSR_MODE
6985 (vec_duplicate:PINSR_MODE
6986 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6987 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6988 (match_operand:SI 3 "const_int_operand" "")))]
6990 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6991 < GET_MODE_NUNITS (<MODE>mode))"
6993 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6995 switch (which_alternative)
6998 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6999 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7002 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7004 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7005 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7008 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7013 [(set_attr "isa" "noavx,noavx,avx,avx")
7014 (set_attr "type" "sselog")
7015 (set (attr "prefix_rex")
7017 (and (not (match_test "TARGET_AVX"))
7018 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7020 (const_string "*")))
7021 (set (attr "prefix_data16")
7023 (and (not (match_test "TARGET_AVX"))
7024 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7026 (const_string "*")))
7027 (set (attr "prefix_extra")
7029 (and (not (match_test "TARGET_AVX"))
7030 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7032 (const_string "1")))
7033 (set_attr "length_immediate" "1")
7034 (set_attr "prefix" "orig,orig,vex,vex")
7035 (set_attr "mode" "TI")])
7037 (define_insn "*sse4_1_pextrb_<mode>"
7038 [(set (match_operand:SWI48 0 "register_operand" "=r")
7041 (match_operand:V16QI 1 "register_operand" "x")
7042 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7044 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7045 [(set_attr "type" "sselog")
7046 (set_attr "prefix_extra" "1")
7047 (set_attr "length_immediate" "1")
7048 (set_attr "prefix" "maybe_vex")
7049 (set_attr "mode" "TI")])
7051 (define_insn "*sse4_1_pextrb_memory"
7052 [(set (match_operand:QI 0 "memory_operand" "=m")
7054 (match_operand:V16QI 1 "register_operand" "x")
7055 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7057 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7058 [(set_attr "type" "sselog")
7059 (set_attr "prefix_extra" "1")
7060 (set_attr "length_immediate" "1")
7061 (set_attr "prefix" "maybe_vex")
7062 (set_attr "mode" "TI")])
7064 (define_insn "*sse2_pextrw_<mode>"
7065 [(set (match_operand:SWI48 0 "register_operand" "=r")
7068 (match_operand:V8HI 1 "register_operand" "x")
7069 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7071 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7072 [(set_attr "type" "sselog")
7073 (set_attr "prefix_data16" "1")
7074 (set_attr "length_immediate" "1")
7075 (set_attr "prefix" "maybe_vex")
7076 (set_attr "mode" "TI")])
7078 (define_insn "*sse4_1_pextrw_memory"
7079 [(set (match_operand:HI 0 "memory_operand" "=m")
7081 (match_operand:V8HI 1 "register_operand" "x")
7082 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7084 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7085 [(set_attr "type" "sselog")
7086 (set_attr "prefix_extra" "1")
7087 (set_attr "length_immediate" "1")
7088 (set_attr "prefix" "maybe_vex")
7089 (set_attr "mode" "TI")])
7091 (define_insn "*sse4_1_pextrd"
7092 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7094 (match_operand:V4SI 1 "register_operand" "x")
7095 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7097 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7098 [(set_attr "type" "sselog")
7099 (set_attr "prefix_extra" "1")
7100 (set_attr "length_immediate" "1")
7101 (set_attr "prefix" "maybe_vex")
7102 (set_attr "mode" "TI")])
7104 (define_insn "*sse4_1_pextrd_zext"
7105 [(set (match_operand:DI 0 "register_operand" "=r")
7108 (match_operand:V4SI 1 "register_operand" "x")
7109 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7110 "TARGET_64BIT && TARGET_SSE4_1"
7111 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7112 [(set_attr "type" "sselog")
7113 (set_attr "prefix_extra" "1")
7114 (set_attr "length_immediate" "1")
7115 (set_attr "prefix" "maybe_vex")
7116 (set_attr "mode" "TI")])
7118 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7119 (define_insn "*sse4_1_pextrq"
7120 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7122 (match_operand:V2DI 1 "register_operand" "x")
7123 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7124 "TARGET_SSE4_1 && TARGET_64BIT"
7125 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7126 [(set_attr "type" "sselog")
7127 (set_attr "prefix_rex" "1")
7128 (set_attr "prefix_extra" "1")
7129 (set_attr "length_immediate" "1")
7130 (set_attr "prefix" "maybe_vex")
7131 (set_attr "mode" "TI")])
7133 (define_expand "avx2_pshufdv3"
7134 [(match_operand:V8SI 0 "register_operand" "")
7135 (match_operand:V8SI 1 "nonimmediate_operand" "")
7136 (match_operand:SI 2 "const_0_to_255_operand" "")]
7139 int mask = INTVAL (operands[2]);
7140 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7141 GEN_INT ((mask >> 0) & 3),
7142 GEN_INT ((mask >> 2) & 3),
7143 GEN_INT ((mask >> 4) & 3),
7144 GEN_INT ((mask >> 6) & 3),
7145 GEN_INT (((mask >> 0) & 3) + 4),
7146 GEN_INT (((mask >> 2) & 3) + 4),
7147 GEN_INT (((mask >> 4) & 3) + 4),
7148 GEN_INT (((mask >> 6) & 3) + 4)));
7152 (define_insn "avx2_pshufd_1"
7153 [(set (match_operand:V8SI 0 "register_operand" "=x")
7155 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7156 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7157 (match_operand 3 "const_0_to_3_operand" "")
7158 (match_operand 4 "const_0_to_3_operand" "")
7159 (match_operand 5 "const_0_to_3_operand" "")
7160 (match_operand 6 "const_4_to_7_operand" "")
7161 (match_operand 7 "const_4_to_7_operand" "")
7162 (match_operand 8 "const_4_to_7_operand" "")
7163 (match_operand 9 "const_4_to_7_operand" "")])))]
7165 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7166 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7167 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7168 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7171 mask |= INTVAL (operands[2]) << 0;
7172 mask |= INTVAL (operands[3]) << 2;
7173 mask |= INTVAL (operands[4]) << 4;
7174 mask |= INTVAL (operands[5]) << 6;
7175 operands[2] = GEN_INT (mask);
7177 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7179 [(set_attr "type" "sselog1")
7180 (set_attr "prefix" "vex")
7181 (set_attr "length_immediate" "1")
7182 (set_attr "mode" "OI")])
7184 (define_expand "sse2_pshufd"
7185 [(match_operand:V4SI 0 "register_operand" "")
7186 (match_operand:V4SI 1 "nonimmediate_operand" "")
7187 (match_operand:SI 2 "const_int_operand" "")]
7190 int mask = INTVAL (operands[2]);
7191 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7192 GEN_INT ((mask >> 0) & 3),
7193 GEN_INT ((mask >> 2) & 3),
7194 GEN_INT ((mask >> 4) & 3),
7195 GEN_INT ((mask >> 6) & 3)));
7199 (define_insn "sse2_pshufd_1"
7200 [(set (match_operand:V4SI 0 "register_operand" "=x")
7202 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7203 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7204 (match_operand 3 "const_0_to_3_operand" "")
7205 (match_operand 4 "const_0_to_3_operand" "")
7206 (match_operand 5 "const_0_to_3_operand" "")])))]
7210 mask |= INTVAL (operands[2]) << 0;
7211 mask |= INTVAL (operands[3]) << 2;
7212 mask |= INTVAL (operands[4]) << 4;
7213 mask |= INTVAL (operands[5]) << 6;
7214 operands[2] = GEN_INT (mask);
7216 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7218 [(set_attr "type" "sselog1")
7219 (set_attr "prefix_data16" "1")
7220 (set_attr "prefix" "maybe_vex")
7221 (set_attr "length_immediate" "1")
7222 (set_attr "mode" "TI")])
7224 (define_expand "avx2_pshuflwv3"
7225 [(match_operand:V16HI 0 "register_operand" "")
7226 (match_operand:V16HI 1 "nonimmediate_operand" "")
7227 (match_operand:SI 2 "const_0_to_255_operand" "")]
7230 int mask = INTVAL (operands[2]);
7231 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7232 GEN_INT ((mask >> 0) & 3),
7233 GEN_INT ((mask >> 2) & 3),
7234 GEN_INT ((mask >> 4) & 3),
7235 GEN_INT ((mask >> 6) & 3),
7236 GEN_INT (((mask >> 0) & 3) + 8),
7237 GEN_INT (((mask >> 2) & 3) + 8),
7238 GEN_INT (((mask >> 4) & 3) + 8),
7239 GEN_INT (((mask >> 6) & 3) + 8)));
7243 (define_insn "avx2_pshuflw_1"
7244 [(set (match_operand:V16HI 0 "register_operand" "=x")
7246 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7247 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7248 (match_operand 3 "const_0_to_3_operand" "")
7249 (match_operand 4 "const_0_to_3_operand" "")
7250 (match_operand 5 "const_0_to_3_operand" "")
7255 (match_operand 6 "const_8_to_11_operand" "")
7256 (match_operand 7 "const_8_to_11_operand" "")
7257 (match_operand 8 "const_8_to_11_operand" "")
7258 (match_operand 9 "const_8_to_11_operand" "")
7264 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7265 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7266 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7267 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7270 mask |= INTVAL (operands[2]) << 0;
7271 mask |= INTVAL (operands[3]) << 2;
7272 mask |= INTVAL (operands[4]) << 4;
7273 mask |= INTVAL (operands[5]) << 6;
7274 operands[2] = GEN_INT (mask);
7276 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7278 [(set_attr "type" "sselog")
7279 (set_attr "prefix" "vex")
7280 (set_attr "length_immediate" "1")
7281 (set_attr "mode" "OI")])
7283 (define_expand "sse2_pshuflw"
7284 [(match_operand:V8HI 0 "register_operand" "")
7285 (match_operand:V8HI 1 "nonimmediate_operand" "")
7286 (match_operand:SI 2 "const_int_operand" "")]
7289 int mask = INTVAL (operands[2]);
7290 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7291 GEN_INT ((mask >> 0) & 3),
7292 GEN_INT ((mask >> 2) & 3),
7293 GEN_INT ((mask >> 4) & 3),
7294 GEN_INT ((mask >> 6) & 3)));
7298 (define_insn "sse2_pshuflw_1"
7299 [(set (match_operand:V8HI 0 "register_operand" "=x")
7301 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7302 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7303 (match_operand 3 "const_0_to_3_operand" "")
7304 (match_operand 4 "const_0_to_3_operand" "")
7305 (match_operand 5 "const_0_to_3_operand" "")
7313 mask |= INTVAL (operands[2]) << 0;
7314 mask |= INTVAL (operands[3]) << 2;
7315 mask |= INTVAL (operands[4]) << 4;
7316 mask |= INTVAL (operands[5]) << 6;
7317 operands[2] = GEN_INT (mask);
7319 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7321 [(set_attr "type" "sselog")
7322 (set_attr "prefix_data16" "0")
7323 (set_attr "prefix_rep" "1")
7324 (set_attr "prefix" "maybe_vex")
7325 (set_attr "length_immediate" "1")
7326 (set_attr "mode" "TI")])
7328 (define_expand "avx2_pshufhwv3"
7329 [(match_operand:V16HI 0 "register_operand" "")
7330 (match_operand:V16HI 1 "nonimmediate_operand" "")
7331 (match_operand:SI 2 "const_0_to_255_operand" "")]
7334 int mask = INTVAL (operands[2]);
7335 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7336 GEN_INT (((mask >> 0) & 3) + 4),
7337 GEN_INT (((mask >> 2) & 3) + 4),
7338 GEN_INT (((mask >> 4) & 3) + 4),
7339 GEN_INT (((mask >> 6) & 3) + 4),
7340 GEN_INT (((mask >> 0) & 3) + 12),
7341 GEN_INT (((mask >> 2) & 3) + 12),
7342 GEN_INT (((mask >> 4) & 3) + 12),
7343 GEN_INT (((mask >> 6) & 3) + 12)));
7347 (define_insn "avx2_pshufhw_1"
7348 [(set (match_operand:V16HI 0 "register_operand" "=x")
7350 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7351 (parallel [(const_int 0)
7355 (match_operand 2 "const_4_to_7_operand" "")
7356 (match_operand 3 "const_4_to_7_operand" "")
7357 (match_operand 4 "const_4_to_7_operand" "")
7358 (match_operand 5 "const_4_to_7_operand" "")
7363 (match_operand 6 "const_12_to_15_operand" "")
7364 (match_operand 7 "const_12_to_15_operand" "")
7365 (match_operand 8 "const_12_to_15_operand" "")
7366 (match_operand 9 "const_12_to_15_operand" "")])))]
7368 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7369 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7370 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7371 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7374 mask |= (INTVAL (operands[2]) - 4) << 0;
7375 mask |= (INTVAL (operands[3]) - 4) << 2;
7376 mask |= (INTVAL (operands[4]) - 4) << 4;
7377 mask |= (INTVAL (operands[5]) - 4) << 6;
7378 operands[2] = GEN_INT (mask);
7380 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7382 [(set_attr "type" "sselog")
7383 (set_attr "prefix" "vex")
7384 (set_attr "length_immediate" "1")
7385 (set_attr "mode" "OI")])
7387 (define_expand "sse2_pshufhw"
7388 [(match_operand:V8HI 0 "register_operand" "")
7389 (match_operand:V8HI 1 "nonimmediate_operand" "")
7390 (match_operand:SI 2 "const_int_operand" "")]
7393 int mask = INTVAL (operands[2]);
7394 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7395 GEN_INT (((mask >> 0) & 3) + 4),
7396 GEN_INT (((mask >> 2) & 3) + 4),
7397 GEN_INT (((mask >> 4) & 3) + 4),
7398 GEN_INT (((mask >> 6) & 3) + 4)));
7402 (define_insn "sse2_pshufhw_1"
7403 [(set (match_operand:V8HI 0 "register_operand" "=x")
7405 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7406 (parallel [(const_int 0)
7410 (match_operand 2 "const_4_to_7_operand" "")
7411 (match_operand 3 "const_4_to_7_operand" "")
7412 (match_operand 4 "const_4_to_7_operand" "")
7413 (match_operand 5 "const_4_to_7_operand" "")])))]
7417 mask |= (INTVAL (operands[2]) - 4) << 0;
7418 mask |= (INTVAL (operands[3]) - 4) << 2;
7419 mask |= (INTVAL (operands[4]) - 4) << 4;
7420 mask |= (INTVAL (operands[5]) - 4) << 6;
7421 operands[2] = GEN_INT (mask);
7423 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7425 [(set_attr "type" "sselog")
7426 (set_attr "prefix_rep" "1")
7427 (set_attr "prefix_data16" "0")
7428 (set_attr "prefix" "maybe_vex")
7429 (set_attr "length_immediate" "1")
7430 (set_attr "mode" "TI")])
7432 (define_expand "sse2_loadd"
7433 [(set (match_operand:V4SI 0 "register_operand" "")
7436 (match_operand:SI 1 "nonimmediate_operand" ""))
7440 "operands[2] = CONST0_RTX (V4SImode);")
7442 (define_insn "sse2_loadld"
7443 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7446 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7447 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7451 %vmovd\t{%2, %0|%0, %2}
7452 %vmovd\t{%2, %0|%0, %2}
7453 movss\t{%2, %0|%0, %2}
7454 movss\t{%2, %0|%0, %2}
7455 vmovss\t{%2, %1, %0|%0, %1, %2}"
7456 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7457 (set_attr "type" "ssemov")
7458 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7459 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7461 (define_insn_and_split "sse2_stored"
7462 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7464 (match_operand:V4SI 1 "register_operand" "x,Yi")
7465 (parallel [(const_int 0)])))]
7468 "&& reload_completed
7469 && (TARGET_INTER_UNIT_MOVES
7470 || MEM_P (operands [0])
7471 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7472 [(set (match_dup 0) (match_dup 1))]
7473 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7475 (define_insn_and_split "*vec_ext_v4si_mem"
7476 [(set (match_operand:SI 0 "register_operand" "=r")
7478 (match_operand:V4SI 1 "memory_operand" "o")
7479 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7485 int i = INTVAL (operands[2]);
7487 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7491 (define_expand "sse_storeq"
7492 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7494 (match_operand:V2DI 1 "register_operand" "")
7495 (parallel [(const_int 0)])))]
7498 (define_insn "*sse2_storeq_rex64"
7499 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7501 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7502 (parallel [(const_int 0)])))]
7503 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7507 mov{q}\t{%1, %0|%0, %1}"
7508 [(set_attr "type" "*,*,imov")
7509 (set_attr "mode" "*,*,DI")])
7511 (define_insn "*sse2_storeq"
7512 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7514 (match_operand:V2DI 1 "register_operand" "x")
7515 (parallel [(const_int 0)])))]
7520 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7522 (match_operand:V2DI 1 "register_operand" "")
7523 (parallel [(const_int 0)])))]
7526 && (TARGET_INTER_UNIT_MOVES
7527 || MEM_P (operands [0])
7528 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7529 [(set (match_dup 0) (match_dup 1))]
7530 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7532 (define_insn "*vec_extractv2di_1_rex64"
7533 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7535 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7536 (parallel [(const_int 1)])))]
7537 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7539 %vmovhps\t{%1, %0|%0, %1}
7540 psrldq\t{$8, %0|%0, 8}
7541 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7542 %vmovq\t{%H1, %0|%0, %H1}
7543 mov{q}\t{%H1, %0|%0, %H1}"
7544 [(set_attr "isa" "*,noavx,avx,*,*")
7545 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7546 (set_attr "length_immediate" "*,1,1,*,*")
7547 (set_attr "memory" "*,none,none,*,*")
7548 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7549 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7551 (define_insn "*vec_extractv2di_1"
7552 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7554 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7555 (parallel [(const_int 1)])))]
7556 "!TARGET_64BIT && TARGET_SSE
7557 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7559 %vmovhps\t{%1, %0|%0, %1}
7560 psrldq\t{$8, %0|%0, 8}
7561 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7562 %vmovq\t{%H1, %0|%0, %H1}
7563 movhlps\t{%1, %0|%0, %1}
7564 movlps\t{%H1, %0|%0, %H1}"
7565 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7566 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7567 (set_attr "length_immediate" "*,1,1,*,*,*")
7568 (set_attr "memory" "*,none,none,*,*,*")
7569 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7570 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7572 (define_insn "*vec_dupv4si"
7573 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7575 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7578 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7579 vbroadcastss\t{%1, %0|%0, %1}
7580 shufps\t{$0, %0, %0|%0, %0, 0}"
7581 [(set_attr "isa" "sse2,avx,noavx")
7582 (set_attr "type" "sselog1,ssemov,sselog1")
7583 (set_attr "length_immediate" "1,0,1")
7584 (set_attr "prefix_extra" "0,1,*")
7585 (set_attr "prefix" "maybe_vex,vex,orig")
7586 (set_attr "mode" "TI,V4SF,V4SF")])
7588 (define_insn "*vec_dupv2di"
7589 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7591 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7595 vpunpcklqdq\t{%d1, %0|%0, %d1}
7596 %vmovddup\t{%1, %0|%0, %1}
7598 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7599 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7600 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7601 (set_attr "mode" "TI,TI,DF,V4SF")])
7603 (define_insn "*vec_concatv2si_sse4_1"
7604 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7606 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7607 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7610 pinsrd\t{$1, %2, %0|%0, %2, 1}
7611 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7612 punpckldq\t{%2, %0|%0, %2}
7613 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7614 %vmovd\t{%1, %0|%0, %1}
7615 punpckldq\t{%2, %0|%0, %2}
7616 movd\t{%1, %0|%0, %1}"
7617 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7618 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7619 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7620 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7621 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7622 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7624 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7625 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7626 ;; alternatives pretty much forces the MMX alternative to be chosen.
7627 (define_insn "*vec_concatv2si_sse2"
7628 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7630 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7631 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7634 punpckldq\t{%2, %0|%0, %2}
7635 movd\t{%1, %0|%0, %1}
7636 punpckldq\t{%2, %0|%0, %2}
7637 movd\t{%1, %0|%0, %1}"
7638 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7639 (set_attr "mode" "TI,TI,DI,DI")])
7641 (define_insn "*vec_concatv2si_sse"
7642 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7644 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7645 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7648 unpcklps\t{%2, %0|%0, %2}
7649 movss\t{%1, %0|%0, %1}
7650 punpckldq\t{%2, %0|%0, %2}
7651 movd\t{%1, %0|%0, %1}"
7652 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7653 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7655 (define_insn "*vec_concatv4si"
7656 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7658 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7659 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7662 punpcklqdq\t{%2, %0|%0, %2}
7663 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7664 movlhps\t{%2, %0|%0, %2}
7665 movhps\t{%2, %0|%0, %2}
7666 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7667 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7668 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7669 (set_attr "prefix" "orig,vex,orig,orig,vex")
7670 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7672 ;; movd instead of movq is required to handle broken assemblers.
7673 (define_insn "*vec_concatv2di_rex64"
7674 [(set (match_operand:V2DI 0 "register_operand"
7675 "=x,x ,x ,Yi,!x,x,x,x,x")
7677 (match_operand:DI 1 "nonimmediate_operand"
7678 " 0,x ,xm,r ,*y,0,x,0,x")
7679 (match_operand:DI 2 "vector_move_operand"
7680 "rm,rm,C ,C ,C ,x,x,m,m")))]
7683 pinsrq\t{$1, %2, %0|%0, %2, 1}
7684 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7685 %vmovq\t{%1, %0|%0, %1}
7686 %vmovd\t{%1, %0|%0, %1}
7687 movq2dq\t{%1, %0|%0, %1}
7688 punpcklqdq\t{%2, %0|%0, %2}
7689 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7690 movhps\t{%2, %0|%0, %2}
7691 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7692 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7695 (eq_attr "alternative" "0,1,5,6")
7696 (const_string "sselog")
7697 (const_string "ssemov")))
7698 (set (attr "prefix_rex")
7700 (and (eq_attr "alternative" "0,3")
7701 (not (match_test "TARGET_AVX")))
7703 (const_string "*")))
7704 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7705 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7706 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7707 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7709 (define_insn "vec_concatv2di"
7710 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7712 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7713 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7714 "!TARGET_64BIT && TARGET_SSE"
7716 %vmovq\t{%1, %0|%0, %1}
7717 movq2dq\t{%1, %0|%0, %1}
7718 punpcklqdq\t{%2, %0|%0, %2}
7719 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7720 movlhps\t{%2, %0|%0, %2}
7721 movhps\t{%2, %0|%0, %2}
7722 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7723 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7724 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7725 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7726 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7728 (define_expand "vec_unpacks_lo_<mode>"
7729 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7730 (match_operand:VI124_AVX2 1 "register_operand" "")]
7732 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7734 (define_expand "vec_unpacks_hi_<mode>"
7735 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7736 (match_operand:VI124_AVX2 1 "register_operand" "")]
7738 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7740 (define_expand "vec_unpacku_lo_<mode>"
7741 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7742 (match_operand:VI124_AVX2 1 "register_operand" "")]
7744 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7746 (define_expand "vec_unpacku_hi_<mode>"
7747 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7748 (match_operand:VI124_AVX2 1 "register_operand" "")]
7750 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7752 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7756 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7758 (define_expand "avx2_uavgv32qi3"
7759 [(set (match_operand:V32QI 0 "register_operand" "")
7765 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7767 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7768 (const_vector:V32QI [(const_int 1) (const_int 1)
7769 (const_int 1) (const_int 1)
7770 (const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)
7773 (const_int 1) (const_int 1)
7774 (const_int 1) (const_int 1)
7775 (const_int 1) (const_int 1)
7776 (const_int 1) (const_int 1)
7777 (const_int 1) (const_int 1)
7778 (const_int 1) (const_int 1)
7779 (const_int 1) (const_int 1)
7780 (const_int 1) (const_int 1)
7781 (const_int 1) (const_int 1)
7782 (const_int 1) (const_int 1)
7783 (const_int 1) (const_int 1)]))
7786 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7788 (define_expand "sse2_uavgv16qi3"
7789 [(set (match_operand:V16QI 0 "register_operand" "")
7795 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7797 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7798 (const_vector:V16QI [(const_int 1) (const_int 1)
7799 (const_int 1) (const_int 1)
7800 (const_int 1) (const_int 1)
7801 (const_int 1) (const_int 1)
7802 (const_int 1) (const_int 1)
7803 (const_int 1) (const_int 1)
7804 (const_int 1) (const_int 1)
7805 (const_int 1) (const_int 1)]))
7808 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7810 (define_insn "*avx2_uavgv32qi3"
7811 [(set (match_operand:V32QI 0 "register_operand" "=x")
7817 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7819 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7820 (const_vector:V32QI [(const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)
7822 (const_int 1) (const_int 1)
7823 (const_int 1) (const_int 1)
7824 (const_int 1) (const_int 1)
7825 (const_int 1) (const_int 1)
7826 (const_int 1) (const_int 1)
7827 (const_int 1) (const_int 1)
7828 (const_int 1) (const_int 1)
7829 (const_int 1) (const_int 1)
7830 (const_int 1) (const_int 1)
7831 (const_int 1) (const_int 1)
7832 (const_int 1) (const_int 1)
7833 (const_int 1) (const_int 1)
7834 (const_int 1) (const_int 1)
7835 (const_int 1) (const_int 1)]))
7837 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7838 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7839 [(set_attr "type" "sseiadd")
7840 (set_attr "prefix" "vex")
7841 (set_attr "mode" "OI")])
7843 (define_insn "*sse2_uavgv16qi3"
7844 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7850 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7852 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7853 (const_vector:V16QI [(const_int 1) (const_int 1)
7854 (const_int 1) (const_int 1)
7855 (const_int 1) (const_int 1)
7856 (const_int 1) (const_int 1)
7857 (const_int 1) (const_int 1)
7858 (const_int 1) (const_int 1)
7859 (const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)]))
7862 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7864 pavgb\t{%2, %0|%0, %2}
7865 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7866 [(set_attr "isa" "noavx,avx")
7867 (set_attr "type" "sseiadd")
7868 (set_attr "prefix_data16" "1,*")
7869 (set_attr "prefix" "orig,vex")
7870 (set_attr "mode" "TI")])
7872 (define_expand "avx2_uavgv16hi3"
7873 [(set (match_operand:V16HI 0 "register_operand" "")
7879 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7881 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7882 (const_vector:V16HI [(const_int 1) (const_int 1)
7883 (const_int 1) (const_int 1)
7884 (const_int 1) (const_int 1)
7885 (const_int 1) (const_int 1)
7886 (const_int 1) (const_int 1)
7887 (const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)]))
7892 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7894 (define_expand "sse2_uavgv8hi3"
7895 [(set (match_operand:V8HI 0 "register_operand" "")
7901 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7903 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7904 (const_vector:V8HI [(const_int 1) (const_int 1)
7905 (const_int 1) (const_int 1)
7906 (const_int 1) (const_int 1)
7907 (const_int 1) (const_int 1)]))
7910 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7912 (define_insn "*avx2_uavgv16hi3"
7913 [(set (match_operand:V16HI 0 "register_operand" "=x")
7919 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7921 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7922 (const_vector:V16HI [(const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)
7925 (const_int 1) (const_int 1)
7926 (const_int 1) (const_int 1)
7927 (const_int 1) (const_int 1)
7928 (const_int 1) (const_int 1)
7929 (const_int 1) (const_int 1)]))
7931 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7932 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7933 [(set_attr "type" "sseiadd")
7934 (set_attr "prefix" "vex")
7935 (set_attr "mode" "OI")])
7937 (define_insn "*sse2_uavgv8hi3"
7938 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7944 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7946 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7947 (const_vector:V8HI [(const_int 1) (const_int 1)
7948 (const_int 1) (const_int 1)
7949 (const_int 1) (const_int 1)
7950 (const_int 1) (const_int 1)]))
7952 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7954 pavgw\t{%2, %0|%0, %2}
7955 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7956 [(set_attr "isa" "noavx,avx")
7957 (set_attr "type" "sseiadd")
7958 (set_attr "prefix_data16" "1,*")
7959 (set_attr "prefix" "orig,vex")
7960 (set_attr "mode" "TI")])
7962 ;; The correct representation for this is absolutely enormous, and
7963 ;; surely not generally useful.
7964 (define_insn "<sse2_avx2>_psadbw"
7965 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7966 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7967 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7971 psadbw\t{%2, %0|%0, %2}
7972 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7973 [(set_attr "isa" "noavx,avx")
7974 (set_attr "type" "sseiadd")
7975 (set_attr "atom_unit" "simul")
7976 (set_attr "prefix_data16" "1,*")
7977 (set_attr "prefix" "orig,vex")
7978 (set_attr "mode" "<sseinsnmode>")])
7980 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7981 [(set (match_operand:SI 0 "register_operand" "=r")
7983 [(match_operand:VF 1 "register_operand" "x")]
7986 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7987 [(set_attr "type" "ssemov")
7988 (set_attr "prefix" "maybe_vex")
7989 (set_attr "mode" "<MODE>")])
7991 (define_insn "avx2_pmovmskb"
7992 [(set (match_operand:SI 0 "register_operand" "=r")
7993 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7996 "vpmovmskb\t{%1, %0|%0, %1}"
7997 [(set_attr "type" "ssemov")
7998 (set_attr "prefix" "vex")
7999 (set_attr "mode" "DI")])
8001 (define_insn "sse2_pmovmskb"
8002 [(set (match_operand:SI 0 "register_operand" "=r")
8003 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8006 "%vpmovmskb\t{%1, %0|%0, %1}"
8007 [(set_attr "type" "ssemov")
8008 (set_attr "prefix_data16" "1")
8009 (set_attr "prefix" "maybe_vex")
8010 (set_attr "mode" "SI")])
8012 (define_expand "sse2_maskmovdqu"
8013 [(set (match_operand:V16QI 0 "memory_operand" "")
8014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8015 (match_operand:V16QI 2 "register_operand" "")
8020 (define_insn "*sse2_maskmovdqu"
8021 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8022 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8023 (match_operand:V16QI 2 "register_operand" "x")
8024 (mem:V16QI (match_dup 0))]
8027 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8028 [(set_attr "type" "ssemov")
8029 (set_attr "prefix_data16" "1")
8030 ;; The implicit %rdi operand confuses default length_vex computation.
8031 (set (attr "length_vex")
8032 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8033 (set_attr "prefix" "maybe_vex")
8034 (set_attr "mode" "TI")])
8036 (define_insn "sse_ldmxcsr"
8037 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8041 [(set_attr "type" "sse")
8042 (set_attr "atom_sse_attr" "mxcsr")
8043 (set_attr "prefix" "maybe_vex")
8044 (set_attr "memory" "load")])
8046 (define_insn "sse_stmxcsr"
8047 [(set (match_operand:SI 0 "memory_operand" "=m")
8048 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8051 [(set_attr "type" "sse")
8052 (set_attr "atom_sse_attr" "mxcsr")
8053 (set_attr "prefix" "maybe_vex")
8054 (set_attr "memory" "store")])
8056 (define_insn "sse2_clflush"
8057 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8061 [(set_attr "type" "sse")
8062 (set_attr "atom_sse_attr" "fence")
8063 (set_attr "memory" "unknown")])
8066 (define_insn "sse3_mwait"
8067 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8068 (match_operand:SI 1 "register_operand" "c")]
8071 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8072 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8073 ;; we only need to set up 32bit registers.
8075 [(set_attr "length" "3")])
8077 (define_insn "sse3_monitor"
8078 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8079 (match_operand:SI 1 "register_operand" "c")
8080 (match_operand:SI 2 "register_operand" "d")]
8082 "TARGET_SSE3 && !TARGET_64BIT"
8083 "monitor\t%0, %1, %2"
8084 [(set_attr "length" "3")])
8086 (define_insn "sse3_monitor64"
8087 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8088 (match_operand:SI 1 "register_operand" "c")
8089 (match_operand:SI 2 "register_operand" "d")]
8091 "TARGET_SSE3 && TARGET_64BIT"
8092 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8093 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8094 ;; zero extended to 64bit, we only need to set up 32bit registers.
8096 [(set_attr "length" "3")])
8098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8100 ;; SSSE3 instructions
8102 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8104 (define_insn "avx2_phaddwv16hi3"
8105 [(set (match_operand:V16HI 0 "register_operand" "=x")
8112 (match_operand:V16HI 1 "register_operand" "x")
8113 (parallel [(const_int 0)]))
8114 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8116 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8117 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8120 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8121 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8123 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8124 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8128 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8129 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8131 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8132 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8135 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8136 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8138 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8139 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8145 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8146 (parallel [(const_int 0)]))
8147 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8149 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8150 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8153 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8154 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8156 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8157 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8161 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8164 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8165 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8168 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8169 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8171 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8172 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8174 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8175 [(set_attr "type" "sseiadd")
8176 (set_attr "prefix_extra" "1")
8177 (set_attr "prefix" "vex")
8178 (set_attr "mode" "OI")])
8180 (define_insn "ssse3_phaddwv8hi3"
8181 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8187 (match_operand:V8HI 1 "register_operand" "0,x")
8188 (parallel [(const_int 0)]))
8189 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8191 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8198 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8199 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8204 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8205 (parallel [(const_int 0)]))
8206 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8208 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8209 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8212 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8215 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8216 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8219 phaddw\t{%2, %0|%0, %2}
8220 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8221 [(set_attr "isa" "noavx,avx")
8222 (set_attr "type" "sseiadd")
8223 (set_attr "atom_unit" "complex")
8224 (set_attr "prefix_data16" "1,*")
8225 (set_attr "prefix_extra" "1")
8226 (set_attr "prefix" "orig,vex")
8227 (set_attr "mode" "TI")])
8229 (define_insn "ssse3_phaddwv4hi3"
8230 [(set (match_operand:V4HI 0 "register_operand" "=y")
8235 (match_operand:V4HI 1 "register_operand" "0")
8236 (parallel [(const_int 0)]))
8237 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8239 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8240 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8244 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8245 (parallel [(const_int 0)]))
8246 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8248 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8249 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8251 "phaddw\t{%2, %0|%0, %2}"
8252 [(set_attr "type" "sseiadd")
8253 (set_attr "atom_unit" "complex")
8254 (set_attr "prefix_extra" "1")
8255 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8256 (set_attr "mode" "DI")])
8258 (define_insn "avx2_phadddv8si3"
8259 [(set (match_operand:V8SI 0 "register_operand" "=x")
8265 (match_operand:V8SI 1 "register_operand" "x")
8266 (parallel [(const_int 0)]))
8267 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8269 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8270 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8273 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8274 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8276 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8277 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8282 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8283 (parallel [(const_int 0)]))
8284 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8286 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8287 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8290 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8291 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8293 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8294 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8296 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8297 [(set_attr "type" "sseiadd")
8298 (set_attr "prefix_extra" "1")
8299 (set_attr "prefix" "vex")
8300 (set_attr "mode" "OI")])
8302 (define_insn "ssse3_phadddv4si3"
8303 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8308 (match_operand:V4SI 1 "register_operand" "0,x")
8309 (parallel [(const_int 0)]))
8310 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8312 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8313 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8317 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8318 (parallel [(const_int 0)]))
8319 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8321 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8322 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8325 phaddd\t{%2, %0|%0, %2}
8326 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8327 [(set_attr "isa" "noavx,avx")
8328 (set_attr "type" "sseiadd")
8329 (set_attr "atom_unit" "complex")
8330 (set_attr "prefix_data16" "1,*")
8331 (set_attr "prefix_extra" "1")
8332 (set_attr "prefix" "orig,vex")
8333 (set_attr "mode" "TI")])
8335 (define_insn "ssse3_phadddv2si3"
8336 [(set (match_operand:V2SI 0 "register_operand" "=y")
8340 (match_operand:V2SI 1 "register_operand" "0")
8341 (parallel [(const_int 0)]))
8342 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8345 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8346 (parallel [(const_int 0)]))
8347 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8349 "phaddd\t{%2, %0|%0, %2}"
8350 [(set_attr "type" "sseiadd")
8351 (set_attr "atom_unit" "complex")
8352 (set_attr "prefix_extra" "1")
8353 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8354 (set_attr "mode" "DI")])
8356 (define_insn "avx2_phaddswv16hi3"
8357 [(set (match_operand:V16HI 0 "register_operand" "=x")
8364 (match_operand:V16HI 1 "register_operand" "x")
8365 (parallel [(const_int 0)]))
8366 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8368 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8369 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8372 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8373 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8375 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8376 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8380 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8383 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8387 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8388 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8390 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8397 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8398 (parallel [(const_int 0)]))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8405 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8406 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8413 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8416 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8417 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8420 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8421 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8423 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8424 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8426 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8427 [(set_attr "type" "sseiadd")
8428 (set_attr "prefix_extra" "1")
8429 (set_attr "prefix" "vex")
8430 (set_attr "mode" "OI")])
8432 (define_insn "ssse3_phaddswv8hi3"
8433 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8439 (match_operand:V8HI 1 "register_operand" "0,x")
8440 (parallel [(const_int 0)]))
8441 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8450 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8456 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8457 (parallel [(const_int 0)]))
8458 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8460 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8461 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8464 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8465 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8467 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8468 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8471 phaddsw\t{%2, %0|%0, %2}
8472 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8473 [(set_attr "isa" "noavx,avx")
8474 (set_attr "type" "sseiadd")
8475 (set_attr "atom_unit" "complex")
8476 (set_attr "prefix_data16" "1,*")
8477 (set_attr "prefix_extra" "1")
8478 (set_attr "prefix" "orig,vex")
8479 (set_attr "mode" "TI")])
8481 (define_insn "ssse3_phaddswv4hi3"
8482 [(set (match_operand:V4HI 0 "register_operand" "=y")
8487 (match_operand:V4HI 1 "register_operand" "0")
8488 (parallel [(const_int 0)]))
8489 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8491 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8492 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8496 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8497 (parallel [(const_int 0)]))
8498 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8500 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8501 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8503 "phaddsw\t{%2, %0|%0, %2}"
8504 [(set_attr "type" "sseiadd")
8505 (set_attr "atom_unit" "complex")
8506 (set_attr "prefix_extra" "1")
8507 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8508 (set_attr "mode" "DI")])
8510 (define_insn "avx2_phsubwv16hi3"
8511 [(set (match_operand:V16HI 0 "register_operand" "=x")
8518 (match_operand:V16HI 1 "register_operand" "x")
8519 (parallel [(const_int 0)]))
8520 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8522 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8527 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8534 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8535 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8537 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8538 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8541 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8542 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8544 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8545 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8551 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8552 (parallel [(const_int 0)]))
8553 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8555 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8556 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8559 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8560 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8562 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8563 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8567 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8568 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8570 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8571 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8574 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8575 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8577 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8578 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8580 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8581 [(set_attr "type" "sseiadd")
8582 (set_attr "prefix_extra" "1")
8583 (set_attr "prefix" "vex")
8584 (set_attr "mode" "OI")])
8586 (define_insn "ssse3_phsubwv8hi3"
8587 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8593 (match_operand:V8HI 1 "register_operand" "0,x")
8594 (parallel [(const_int 0)]))
8595 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8598 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8604 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8605 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8610 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8611 (parallel [(const_int 0)]))
8612 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8614 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8615 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8618 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8619 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8621 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8622 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8625 phsubw\t{%2, %0|%0, %2}
8626 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8627 [(set_attr "isa" "noavx,avx")
8628 (set_attr "type" "sseiadd")
8629 (set_attr "atom_unit" "complex")
8630 (set_attr "prefix_data16" "1,*")
8631 (set_attr "prefix_extra" "1")
8632 (set_attr "prefix" "orig,vex")
8633 (set_attr "mode" "TI")])
8635 (define_insn "ssse3_phsubwv4hi3"
8636 [(set (match_operand:V4HI 0 "register_operand" "=y")
8641 (match_operand:V4HI 1 "register_operand" "0")
8642 (parallel [(const_int 0)]))
8643 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8645 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8646 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8650 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8651 (parallel [(const_int 0)]))
8652 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8654 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8655 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8657 "phsubw\t{%2, %0|%0, %2}"
8658 [(set_attr "type" "sseiadd")
8659 (set_attr "atom_unit" "complex")
8660 (set_attr "prefix_extra" "1")
8661 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8662 (set_attr "mode" "DI")])
8664 (define_insn "avx2_phsubdv8si3"
8665 [(set (match_operand:V8SI 0 "register_operand" "=x")
8671 (match_operand:V8SI 1 "register_operand" "x")
8672 (parallel [(const_int 0)]))
8673 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8675 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8676 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8679 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8680 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8682 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8683 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8688 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8689 (parallel [(const_int 0)]))
8690 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8692 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8693 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8696 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8697 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8699 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8700 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8702 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8703 [(set_attr "type" "sseiadd")
8704 (set_attr "prefix_extra" "1")
8705 (set_attr "prefix" "vex")
8706 (set_attr "mode" "OI")])
8708 (define_insn "ssse3_phsubdv4si3"
8709 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8714 (match_operand:V4SI 1 "register_operand" "0,x")
8715 (parallel [(const_int 0)]))
8716 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8718 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8719 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8723 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8724 (parallel [(const_int 0)]))
8725 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8727 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8728 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8731 phsubd\t{%2, %0|%0, %2}
8732 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8734 [(set_attr "isa" "noavx,avx")
8735 (set_attr "type" "sseiadd")
8736 (set_attr "atom_unit" "complex")
8737 (set_attr "prefix_data16" "1,*")
8738 (set_attr "prefix_extra" "1")
8739 (set_attr "prefix" "orig,vex")
8740 (set_attr "mode" "TI")])
8742 (define_insn "ssse3_phsubdv2si3"
8743 [(set (match_operand:V2SI 0 "register_operand" "=y")
8747 (match_operand:V2SI 1 "register_operand" "0")
8748 (parallel [(const_int 0)]))
8749 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8752 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8753 (parallel [(const_int 0)]))
8754 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8756 "phsubd\t{%2, %0|%0, %2}"
8757 [(set_attr "type" "sseiadd")
8758 (set_attr "atom_unit" "complex")
8759 (set_attr "prefix_extra" "1")
8760 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8761 (set_attr "mode" "DI")])
8763 (define_insn "avx2_phsubswv16hi3"
8764 [(set (match_operand:V16HI 0 "register_operand" "=x")
8771 (match_operand:V16HI 1 "register_operand" "x")
8772 (parallel [(const_int 0)]))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8776 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8779 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8780 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8782 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8783 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8787 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8788 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8790 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8791 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8794 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8795 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8797 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8798 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8804 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8805 (parallel [(const_int 0)]))
8806 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8808 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8809 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8812 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8813 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8815 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8816 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8820 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8821 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8823 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8824 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8827 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8828 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8830 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8831 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8833 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8834 [(set_attr "type" "sseiadd")
8835 (set_attr "prefix_extra" "1")
8836 (set_attr "prefix" "vex")
8837 (set_attr "mode" "OI")])
8839 (define_insn "ssse3_phsubswv8hi3"
8840 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8846 (match_operand:V8HI 1 "register_operand" "0,x")
8847 (parallel [(const_int 0)]))
8848 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8850 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8851 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8854 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8857 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8858 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8863 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8864 (parallel [(const_int 0)]))
8865 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8867 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8868 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8871 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8872 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8874 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8875 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8878 phsubsw\t{%2, %0|%0, %2}
8879 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8880 [(set_attr "isa" "noavx,avx")
8881 (set_attr "type" "sseiadd")
8882 (set_attr "atom_unit" "complex")
8883 (set_attr "prefix_data16" "1,*")
8884 (set_attr "prefix_extra" "1")
8885 (set_attr "prefix" "orig,vex")
8886 (set_attr "mode" "TI")])
8888 (define_insn "ssse3_phsubswv4hi3"
8889 [(set (match_operand:V4HI 0 "register_operand" "=y")
8894 (match_operand:V4HI 1 "register_operand" "0")
8895 (parallel [(const_int 0)]))
8896 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8898 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8899 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8903 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8904 (parallel [(const_int 0)]))
8905 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8907 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8908 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8910 "phsubsw\t{%2, %0|%0, %2}"
8911 [(set_attr "type" "sseiadd")
8912 (set_attr "atom_unit" "complex")
8913 (set_attr "prefix_extra" "1")
8914 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8915 (set_attr "mode" "DI")])
8917 (define_insn "avx2_pmaddubsw256"
8918 [(set (match_operand:V16HI 0 "register_operand" "=x")
8923 (match_operand:V32QI 1 "register_operand" "x")
8924 (parallel [(const_int 0)
8942 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8943 (parallel [(const_int 0)
8961 (vec_select:V16QI (match_dup 1)
8962 (parallel [(const_int 1)
8979 (vec_select:V16QI (match_dup 2)
8980 (parallel [(const_int 1)
8995 (const_int 31)]))))))]
8997 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8998 [(set_attr "type" "sseiadd")
8999 (set_attr "prefix_extra" "1")
9000 (set_attr "prefix" "vex")
9001 (set_attr "mode" "OI")])
9003 (define_insn "ssse3_pmaddubsw128"
9004 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9009 (match_operand:V16QI 1 "register_operand" "0,x")
9010 (parallel [(const_int 0)
9020 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9021 (parallel [(const_int 0)
9031 (vec_select:V8QI (match_dup 1)
9032 (parallel [(const_int 1)
9041 (vec_select:V8QI (match_dup 2)
9042 (parallel [(const_int 1)
9049 (const_int 15)]))))))]
9052 pmaddubsw\t{%2, %0|%0, %2}
9053 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9054 [(set_attr "isa" "noavx,avx")
9055 (set_attr "type" "sseiadd")
9056 (set_attr "atom_unit" "simul")
9057 (set_attr "prefix_data16" "1,*")
9058 (set_attr "prefix_extra" "1")
9059 (set_attr "prefix" "orig,vex")
9060 (set_attr "mode" "TI")])
9062 (define_insn "ssse3_pmaddubsw"
9063 [(set (match_operand:V4HI 0 "register_operand" "=y")
9068 (match_operand:V8QI 1 "register_operand" "0")
9069 (parallel [(const_int 0)
9075 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9076 (parallel [(const_int 0)
9082 (vec_select:V4QI (match_dup 1)
9083 (parallel [(const_int 1)
9088 (vec_select:V4QI (match_dup 2)
9089 (parallel [(const_int 1)
9092 (const_int 7)]))))))]
9094 "pmaddubsw\t{%2, %0|%0, %2}"
9095 [(set_attr "type" "sseiadd")
9096 (set_attr "atom_unit" "simul")
9097 (set_attr "prefix_extra" "1")
9098 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9099 (set_attr "mode" "DI")])
9101 (define_expand "avx2_umulhrswv16hi3"
9102 [(set (match_operand:V16HI 0 "register_operand" "")
9109 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9111 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9113 (const_vector:V16HI [(const_int 1) (const_int 1)
9114 (const_int 1) (const_int 1)
9115 (const_int 1) (const_int 1)
9116 (const_int 1) (const_int 1)
9117 (const_int 1) (const_int 1)
9118 (const_int 1) (const_int 1)
9119 (const_int 1) (const_int 1)
9120 (const_int 1) (const_int 1)]))
9123 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9125 (define_insn "*avx2_umulhrswv16hi3"
9126 [(set (match_operand:V16HI 0 "register_operand" "=x")
9133 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9135 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9137 (const_vector:V16HI [(const_int 1) (const_int 1)
9138 (const_int 1) (const_int 1)
9139 (const_int 1) (const_int 1)
9140 (const_int 1) (const_int 1)
9141 (const_int 1) (const_int 1)
9142 (const_int 1) (const_int 1)
9143 (const_int 1) (const_int 1)
9144 (const_int 1) (const_int 1)]))
9146 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9147 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9148 [(set_attr "type" "sseimul")
9149 (set_attr "prefix_extra" "1")
9150 (set_attr "prefix" "vex")
9151 (set_attr "mode" "OI")])
9153 (define_expand "ssse3_pmulhrswv8hi3"
9154 [(set (match_operand:V8HI 0 "register_operand" "")
9161 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9163 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9165 (const_vector:V8HI [(const_int 1) (const_int 1)
9166 (const_int 1) (const_int 1)
9167 (const_int 1) (const_int 1)
9168 (const_int 1) (const_int 1)]))
9171 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9173 (define_insn "*ssse3_pmulhrswv8hi3"
9174 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9181 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9183 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9185 (const_vector:V8HI [(const_int 1) (const_int 1)
9186 (const_int 1) (const_int 1)
9187 (const_int 1) (const_int 1)
9188 (const_int 1) (const_int 1)]))
9190 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9192 pmulhrsw\t{%2, %0|%0, %2}
9193 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9194 [(set_attr "isa" "noavx,avx")
9195 (set_attr "type" "sseimul")
9196 (set_attr "prefix_data16" "1,*")
9197 (set_attr "prefix_extra" "1")
9198 (set_attr "prefix" "orig,vex")
9199 (set_attr "mode" "TI")])
9201 (define_expand "ssse3_pmulhrswv4hi3"
9202 [(set (match_operand:V4HI 0 "register_operand" "")
9209 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9211 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9213 (const_vector:V4HI [(const_int 1) (const_int 1)
9214 (const_int 1) (const_int 1)]))
9217 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9219 (define_insn "*ssse3_pmulhrswv4hi3"
9220 [(set (match_operand:V4HI 0 "register_operand" "=y")
9227 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9229 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9231 (const_vector:V4HI [(const_int 1) (const_int 1)
9232 (const_int 1) (const_int 1)]))
9234 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9235 "pmulhrsw\t{%2, %0|%0, %2}"
9236 [(set_attr "type" "sseimul")
9237 (set_attr "prefix_extra" "1")
9238 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9239 (set_attr "mode" "DI")])
9241 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9242 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9243 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9244 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9248 pshufb\t{%2, %0|%0, %2}
9249 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9250 [(set_attr "isa" "noavx,avx")
9251 (set_attr "type" "sselog1")
9252 (set_attr "prefix_data16" "1,*")
9253 (set_attr "prefix_extra" "1")
9254 (set_attr "prefix" "orig,vex")
9255 (set_attr "mode" "<sseinsnmode>")])
9257 (define_insn "ssse3_pshufbv8qi3"
9258 [(set (match_operand:V8QI 0 "register_operand" "=y")
9259 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9260 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9263 "pshufb\t{%2, %0|%0, %2}";
9264 [(set_attr "type" "sselog1")
9265 (set_attr "prefix_extra" "1")
9266 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9267 (set_attr "mode" "DI")])
9269 (define_insn "<ssse3_avx2>_psign<mode>3"
9270 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9272 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9273 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9277 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9278 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9279 [(set_attr "isa" "noavx,avx")
9280 (set_attr "type" "sselog1")
9281 (set_attr "prefix_data16" "1,*")
9282 (set_attr "prefix_extra" "1")
9283 (set_attr "prefix" "orig,vex")
9284 (set_attr "mode" "<sseinsnmode>")])
9286 (define_insn "ssse3_psign<mode>3"
9287 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9289 [(match_operand:MMXMODEI 1 "register_operand" "0")
9290 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9293 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9294 [(set_attr "type" "sselog1")
9295 (set_attr "prefix_extra" "1")
9296 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9297 (set_attr "mode" "DI")])
9299 (define_insn "<ssse3_avx2>_palignr<mode>"
9300 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9301 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9302 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9303 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9307 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9309 switch (which_alternative)
9312 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9314 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9319 [(set_attr "isa" "noavx,avx")
9320 (set_attr "type" "sseishft")
9321 (set_attr "atom_unit" "sishuf")
9322 (set_attr "prefix_data16" "1,*")
9323 (set_attr "prefix_extra" "1")
9324 (set_attr "length_immediate" "1")
9325 (set_attr "prefix" "orig,vex")
9326 (set_attr "mode" "<sseinsnmode>")])
9328 (define_insn "ssse3_palignrdi"
9329 [(set (match_operand:DI 0 "register_operand" "=y")
9330 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9331 (match_operand:DI 2 "nonimmediate_operand" "ym")
9332 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9336 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9337 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9339 [(set_attr "type" "sseishft")
9340 (set_attr "atom_unit" "sishuf")
9341 (set_attr "prefix_extra" "1")
9342 (set_attr "length_immediate" "1")
9343 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9344 (set_attr "mode" "DI")])
9346 (define_insn "abs<mode>2"
9347 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9349 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9351 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9352 [(set_attr "type" "sselog1")
9353 (set_attr "prefix_data16" "1")
9354 (set_attr "prefix_extra" "1")
9355 (set_attr "prefix" "maybe_vex")
9356 (set_attr "mode" "<sseinsnmode>")])
9358 (define_insn "abs<mode>2"
9359 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9361 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9363 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9364 [(set_attr "type" "sselog1")
9365 (set_attr "prefix_rep" "0")
9366 (set_attr "prefix_extra" "1")
9367 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9368 (set_attr "mode" "DI")])
9370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9372 ;; AMD SSE4A instructions
9374 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9376 (define_insn "sse4a_movnt<mode>"
9377 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9379 [(match_operand:MODEF 1 "register_operand" "x")]
9382 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9383 [(set_attr "type" "ssemov")
9384 (set_attr "mode" "<MODE>")])
9386 (define_insn "sse4a_vmmovnt<mode>"
9387 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9388 (unspec:<ssescalarmode>
9389 [(vec_select:<ssescalarmode>
9390 (match_operand:VF_128 1 "register_operand" "x")
9391 (parallel [(const_int 0)]))]
9394 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9395 [(set_attr "type" "ssemov")
9396 (set_attr "mode" "<ssescalarmode>")])
9398 (define_insn "sse4a_extrqi"
9399 [(set (match_operand:V2DI 0 "register_operand" "=x")
9400 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9401 (match_operand 2 "const_0_to_255_operand" "")
9402 (match_operand 3 "const_0_to_255_operand" "")]
9405 "extrq\t{%3, %2, %0|%0, %2, %3}"
9406 [(set_attr "type" "sse")
9407 (set_attr "prefix_data16" "1")
9408 (set_attr "length_immediate" "2")
9409 (set_attr "mode" "TI")])
9411 (define_insn "sse4a_extrq"
9412 [(set (match_operand:V2DI 0 "register_operand" "=x")
9413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9414 (match_operand:V16QI 2 "register_operand" "x")]
9417 "extrq\t{%2, %0|%0, %2}"
9418 [(set_attr "type" "sse")
9419 (set_attr "prefix_data16" "1")
9420 (set_attr "mode" "TI")])
9422 (define_insn "sse4a_insertqi"
9423 [(set (match_operand:V2DI 0 "register_operand" "=x")
9424 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9425 (match_operand:V2DI 2 "register_operand" "x")
9426 (match_operand 3 "const_0_to_255_operand" "")
9427 (match_operand 4 "const_0_to_255_operand" "")]
9430 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9431 [(set_attr "type" "sseins")
9432 (set_attr "prefix_data16" "0")
9433 (set_attr "prefix_rep" "1")
9434 (set_attr "length_immediate" "2")
9435 (set_attr "mode" "TI")])
9437 (define_insn "sse4a_insertq"
9438 [(set (match_operand:V2DI 0 "register_operand" "=x")
9439 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9440 (match_operand:V2DI 2 "register_operand" "x")]
9443 "insertq\t{%2, %0|%0, %2}"
9444 [(set_attr "type" "sseins")
9445 (set_attr "prefix_data16" "0")
9446 (set_attr "prefix_rep" "1")
9447 (set_attr "mode" "TI")])
9449 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9451 ;; Intel SSE4.1 instructions
9453 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9455 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9456 [(set (match_operand:VF 0 "register_operand" "=x,x")
9458 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9459 (match_operand:VF 1 "register_operand" "0,x")
9460 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9463 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9464 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9465 [(set_attr "isa" "noavx,avx")
9466 (set_attr "type" "ssemov")
9467 (set_attr "length_immediate" "1")
9468 (set_attr "prefix_data16" "1,*")
9469 (set_attr "prefix_extra" "1")
9470 (set_attr "prefix" "orig,vex")
9471 (set_attr "mode" "<MODE>")])
9473 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9474 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9476 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9477 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9478 (match_operand:VF 3 "register_operand" "Yz,x")]
9482 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9483 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9484 [(set_attr "isa" "noavx,avx")
9485 (set_attr "type" "ssemov")
9486 (set_attr "length_immediate" "1")
9487 (set_attr "prefix_data16" "1,*")
9488 (set_attr "prefix_extra" "1")
9489 (set_attr "prefix" "orig,vex")
9490 (set_attr "mode" "<MODE>")])
9492 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9493 [(set (match_operand:VF 0 "register_operand" "=x,x")
9495 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9496 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9497 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9501 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9502 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9503 [(set_attr "isa" "noavx,avx")
9504 (set_attr "type" "ssemul")
9505 (set_attr "length_immediate" "1")
9506 (set_attr "prefix_data16" "1,*")
9507 (set_attr "prefix_extra" "1")
9508 (set_attr "prefix" "orig,vex")
9509 (set_attr "mode" "<MODE>")])
9511 (define_insn "<sse4_1_avx2>_movntdqa"
9512 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9513 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9516 "%vmovntdqa\t{%1, %0|%0, %1}"
9517 [(set_attr "type" "ssemov")
9518 (set_attr "prefix_extra" "1")
9519 (set_attr "prefix" "maybe_vex")
9520 (set_attr "mode" "<sseinsnmode>")])
9522 (define_insn "<sse4_1_avx2>_mpsadbw"
9523 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9524 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9525 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9526 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9530 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9531 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9532 [(set_attr "isa" "noavx,avx")
9533 (set_attr "type" "sselog1")
9534 (set_attr "length_immediate" "1")
9535 (set_attr "prefix_extra" "1")
9536 (set_attr "prefix" "orig,vex")
9537 (set_attr "mode" "<sseinsnmode>")])
9539 (define_insn "avx2_packusdw"
9540 [(set (match_operand:V16HI 0 "register_operand" "=x")
9543 (match_operand:V8SI 1 "register_operand" "x"))
9545 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9547 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9548 [(set_attr "type" "sselog")
9549 (set_attr "prefix_extra" "1")
9550 (set_attr "prefix" "vex")
9551 (set_attr "mode" "OI")])
9553 (define_insn "sse4_1_packusdw"
9554 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9557 (match_operand:V4SI 1 "register_operand" "0,x"))
9559 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9562 packusdw\t{%2, %0|%0, %2}
9563 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9564 [(set_attr "isa" "noavx,avx")
9565 (set_attr "type" "sselog")
9566 (set_attr "prefix_extra" "1")
9567 (set_attr "prefix" "orig,vex")
9568 (set_attr "mode" "TI")])
9570 (define_insn "<sse4_1_avx2>_pblendvb"
9571 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9573 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9574 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9575 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9579 pblendvb\t{%3, %2, %0|%0, %2, %3}
9580 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9581 [(set_attr "isa" "noavx,avx")
9582 (set_attr "type" "ssemov")
9583 (set_attr "prefix_extra" "1")
9584 (set_attr "length_immediate" "*,1")
9585 (set_attr "prefix" "orig,vex")
9586 (set_attr "mode" "<sseinsnmode>")])
9588 (define_insn "sse4_1_pblendw"
9589 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9591 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9592 (match_operand:V8HI 1 "register_operand" "0,x")
9593 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9596 pblendw\t{%3, %2, %0|%0, %2, %3}
9597 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9598 [(set_attr "isa" "noavx,avx")
9599 (set_attr "type" "ssemov")
9600 (set_attr "prefix_extra" "1")
9601 (set_attr "length_immediate" "1")
9602 (set_attr "prefix" "orig,vex")
9603 (set_attr "mode" "TI")])
9605 ;; The builtin uses an 8-bit immediate. Expand that.
9606 (define_expand "avx2_pblendw"
9607 [(set (match_operand:V16HI 0 "register_operand" "")
9609 (match_operand:V16HI 2 "nonimmediate_operand" "")
9610 (match_operand:V16HI 1 "register_operand" "")
9611 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9614 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9615 operands[3] = GEN_INT (val << 8 | val);
9618 (define_insn "*avx2_pblendw"
9619 [(set (match_operand:V16HI 0 "register_operand" "=x")
9621 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9622 (match_operand:V16HI 1 "register_operand" "x")
9623 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9626 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9627 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9629 [(set_attr "type" "ssemov")
9630 (set_attr "prefix_extra" "1")
9631 (set_attr "length_immediate" "1")
9632 (set_attr "prefix" "vex")
9633 (set_attr "mode" "OI")])
9635 (define_insn "avx2_pblendd<mode>"
9636 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9638 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9639 (match_operand:VI4_AVX2 1 "register_operand" "x")
9640 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9642 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9643 [(set_attr "type" "ssemov")
9644 (set_attr "prefix_extra" "1")
9645 (set_attr "length_immediate" "1")
9646 (set_attr "prefix" "vex")
9647 (set_attr "mode" "<sseinsnmode>")])
9649 (define_insn "sse4_1_phminposuw"
9650 [(set (match_operand:V8HI 0 "register_operand" "=x")
9651 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9652 UNSPEC_PHMINPOSUW))]
9654 "%vphminposuw\t{%1, %0|%0, %1}"
9655 [(set_attr "type" "sselog1")
9656 (set_attr "prefix_extra" "1")
9657 (set_attr "prefix" "maybe_vex")
9658 (set_attr "mode" "TI")])
9660 (define_insn "avx2_<code>v16qiv16hi2"
9661 [(set (match_operand:V16HI 0 "register_operand" "=x")
9663 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9665 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9666 [(set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "prefix" "vex")
9669 (set_attr "mode" "OI")])
9671 (define_insn "sse4_1_<code>v8qiv8hi2"
9672 [(set (match_operand:V8HI 0 "register_operand" "=x")
9675 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9676 (parallel [(const_int 0)
9685 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9686 [(set_attr "type" "ssemov")
9687 (set_attr "prefix_extra" "1")
9688 (set_attr "prefix" "maybe_vex")
9689 (set_attr "mode" "TI")])
9691 (define_insn "avx2_<code>v8qiv8si2"
9692 [(set (match_operand:V8SI 0 "register_operand" "=x")
9695 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9696 (parallel [(const_int 0)
9705 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9706 [(set_attr "type" "ssemov")
9707 (set_attr "prefix_extra" "1")
9708 (set_attr "prefix" "vex")
9709 (set_attr "mode" "OI")])
9711 (define_insn "sse4_1_<code>v4qiv4si2"
9712 [(set (match_operand:V4SI 0 "register_operand" "=x")
9715 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9716 (parallel [(const_int 0)
9721 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9722 [(set_attr "type" "ssemov")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "prefix" "maybe_vex")
9725 (set_attr "mode" "TI")])
9727 (define_insn "avx2_<code>v8hiv8si2"
9728 [(set (match_operand:V8SI 0 "register_operand" "=x")
9730 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9732 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9733 [(set_attr "type" "ssemov")
9734 (set_attr "prefix_extra" "1")
9735 (set_attr "prefix" "vex")
9736 (set_attr "mode" "OI")])
9738 (define_insn "sse4_1_<code>v4hiv4si2"
9739 [(set (match_operand:V4SI 0 "register_operand" "=x")
9742 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9743 (parallel [(const_int 0)
9748 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9749 [(set_attr "type" "ssemov")
9750 (set_attr "prefix_extra" "1")
9751 (set_attr "prefix" "maybe_vex")
9752 (set_attr "mode" "TI")])
9754 (define_insn "avx2_<code>v4qiv4di2"
9755 [(set (match_operand:V4DI 0 "register_operand" "=x")
9758 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9759 (parallel [(const_int 0)
9764 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9765 [(set_attr "type" "ssemov")
9766 (set_attr "prefix_extra" "1")
9767 (set_attr "prefix" "vex")
9768 (set_attr "mode" "OI")])
9770 (define_insn "sse4_1_<code>v2qiv2di2"
9771 [(set (match_operand:V2DI 0 "register_operand" "=x")
9774 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9775 (parallel [(const_int 0)
9778 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9779 [(set_attr "type" "ssemov")
9780 (set_attr "prefix_extra" "1")
9781 (set_attr "prefix" "maybe_vex")
9782 (set_attr "mode" "TI")])
9784 (define_insn "avx2_<code>v4hiv4di2"
9785 [(set (match_operand:V4DI 0 "register_operand" "=x")
9788 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9789 (parallel [(const_int 0)
9794 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9795 [(set_attr "type" "ssemov")
9796 (set_attr "prefix_extra" "1")
9797 (set_attr "prefix" "vex")
9798 (set_attr "mode" "OI")])
9800 (define_insn "sse4_1_<code>v2hiv2di2"
9801 [(set (match_operand:V2DI 0 "register_operand" "=x")
9804 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9805 (parallel [(const_int 0)
9808 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9809 [(set_attr "type" "ssemov")
9810 (set_attr "prefix_extra" "1")
9811 (set_attr "prefix" "maybe_vex")
9812 (set_attr "mode" "TI")])
9814 (define_insn "avx2_<code>v4siv4di2"
9815 [(set (match_operand:V4DI 0 "register_operand" "=x")
9817 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9819 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9820 [(set_attr "type" "ssemov")
9821 (set_attr "prefix_extra" "1")
9822 (set_attr "mode" "OI")])
9824 (define_insn "sse4_1_<code>v2siv2di2"
9825 [(set (match_operand:V2DI 0 "register_operand" "=x")
9828 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9829 (parallel [(const_int 0)
9832 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9833 [(set_attr "type" "ssemov")
9834 (set_attr "prefix_extra" "1")
9835 (set_attr "prefix" "maybe_vex")
9836 (set_attr "mode" "TI")])
9838 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9839 ;; setting FLAGS_REG. But it is not a really compare instruction.
9840 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9841 [(set (reg:CC FLAGS_REG)
9842 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9843 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9846 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9847 [(set_attr "type" "ssecomi")
9848 (set_attr "prefix_extra" "1")
9849 (set_attr "prefix" "vex")
9850 (set_attr "mode" "<MODE>")])
9852 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9853 ;; But it is not a really compare instruction.
9854 (define_insn "avx_ptest256"
9855 [(set (reg:CC FLAGS_REG)
9856 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9857 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9860 "vptest\t{%1, %0|%0, %1}"
9861 [(set_attr "type" "ssecomi")
9862 (set_attr "prefix_extra" "1")
9863 (set_attr "prefix" "vex")
9864 (set_attr "mode" "OI")])
9866 (define_insn "sse4_1_ptest"
9867 [(set (reg:CC FLAGS_REG)
9868 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9869 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9872 "%vptest\t{%1, %0|%0, %1}"
9873 [(set_attr "type" "ssecomi")
9874 (set_attr "prefix_extra" "1")
9875 (set_attr "prefix" "maybe_vex")
9876 (set_attr "mode" "TI")])
9878 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9879 [(set (match_operand:VF 0 "register_operand" "=x")
9881 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9882 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9885 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9886 [(set_attr "type" "ssecvt")
9887 (set (attr "prefix_data16")
9889 (match_test "TARGET_AVX")
9891 (const_string "1")))
9892 (set_attr "prefix_extra" "1")
9893 (set_attr "length_immediate" "1")
9894 (set_attr "prefix" "maybe_vex")
9895 (set_attr "mode" "<MODE>")])
9897 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9898 [(match_operand:<sseintvecmode> 0 "register_operand" "")
9899 (match_operand:VF1 1 "nonimmediate_operand" "")
9900 (match_operand:SI 2 "const_0_to_15_operand" "")]
9903 rtx tmp = gen_reg_rtx (<MODE>mode);
9906 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9909 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9913 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9914 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9915 (match_operand:VF2 1 "nonimmediate_operand" "")
9916 (match_operand:VF2 2 "nonimmediate_operand" "")
9917 (match_operand:SI 3 "const_0_to_15_operand" "")]
9922 if (<MODE>mode == V2DFmode
9923 && TARGET_AVX && !TARGET_PREFER_AVX128)
9925 rtx tmp2 = gen_reg_rtx (V4DFmode);
9927 tmp0 = gen_reg_rtx (V4DFmode);
9928 tmp1 = force_reg (V2DFmode, operands[1]);
9930 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9931 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9932 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9936 tmp0 = gen_reg_rtx (<MODE>mode);
9937 tmp1 = gen_reg_rtx (<MODE>mode);
9940 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9943 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9946 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9951 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9952 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9955 [(match_operand:VF_128 2 "register_operand" "x,x")
9956 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9958 (match_operand:VF_128 1 "register_operand" "0,x")
9962 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9963 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9964 [(set_attr "isa" "noavx,avx")
9965 (set_attr "type" "ssecvt")
9966 (set_attr "length_immediate" "1")
9967 (set_attr "prefix_data16" "1,*")
9968 (set_attr "prefix_extra" "1")
9969 (set_attr "prefix" "orig,vex")
9970 (set_attr "mode" "<MODE>")])
9972 (define_expand "round<mode>2"
9975 (match_operand:VF 1 "register_operand" "")
9977 (set (match_operand:VF 0 "register_operand" "")
9979 [(match_dup 4) (match_dup 5)]
9981 "TARGET_ROUND && !flag_trapping_math"
9983 enum machine_mode scalar_mode;
9984 const struct real_format *fmt;
9985 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9988 scalar_mode = GET_MODE_INNER (<MODE>mode);
9990 /* load nextafter (0.5, 0.0) */
9991 fmt = REAL_MODE_FORMAT (scalar_mode);
9992 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9993 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9994 half = const_double_from_real_value (pred_half, scalar_mode);
9996 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9997 vec_half = force_reg (<MODE>mode, vec_half);
9999 operands[3] = gen_reg_rtx (<MODE>mode);
10000 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
10002 operands[4] = gen_reg_rtx (<MODE>mode);
10003 operands[5] = GEN_INT (ROUND_TRUNC);
10006 (define_expand "round<mode>2_sfix"
10007 [(match_operand:<sseintvecmode> 0 "register_operand" "")
10008 (match_operand:VF1 1 "register_operand" "")]
10009 "TARGET_ROUND && !flag_trapping_math"
10011 rtx tmp = gen_reg_rtx (<MODE>mode);
10013 emit_insn (gen_round<mode>2 (tmp, operands[1]));
10016 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
10020 (define_expand "round<mode>2_vec_pack_sfix"
10021 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
10022 (match_operand:VF2 1 "register_operand" "")
10023 (match_operand:VF2 2 "register_operand" "")]
10024 "TARGET_ROUND && !flag_trapping_math"
10028 if (<MODE>mode == V2DFmode
10029 && TARGET_AVX && !TARGET_PREFER_AVX128)
10031 rtx tmp2 = gen_reg_rtx (V4DFmode);
10033 tmp0 = gen_reg_rtx (V4DFmode);
10034 tmp1 = force_reg (V2DFmode, operands[1]);
10036 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10037 emit_insn (gen_roundv4df2 (tmp2, tmp0));
10038 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10042 tmp0 = gen_reg_rtx (<MODE>mode);
10043 tmp1 = gen_reg_rtx (<MODE>mode);
10045 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10046 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10049 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10056 ;; Intel SSE4.2 string/text processing instructions
10058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10060 (define_insn_and_split "sse4_2_pcmpestr"
10061 [(set (match_operand:SI 0 "register_operand" "=c,c")
10063 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10064 (match_operand:SI 3 "register_operand" "a,a")
10065 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10066 (match_operand:SI 5 "register_operand" "d,d")
10067 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10069 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10077 (set (reg:CC FLAGS_REG)
10086 && can_create_pseudo_p ()"
10091 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10092 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10093 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10096 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10097 operands[3], operands[4],
10098 operands[5], operands[6]));
10100 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10101 operands[3], operands[4],
10102 operands[5], operands[6]));
10103 if (flags && !(ecx || xmm0))
10104 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10105 operands[2], operands[3],
10106 operands[4], operands[5],
10108 if (!(flags || ecx || xmm0))
10109 emit_note (NOTE_INSN_DELETED);
10113 [(set_attr "type" "sselog")
10114 (set_attr "prefix_data16" "1")
10115 (set_attr "prefix_extra" "1")
10116 (set_attr "length_immediate" "1")
10117 (set_attr "memory" "none,load")
10118 (set_attr "mode" "TI")])
10120 (define_insn "sse4_2_pcmpestri"
10121 [(set (match_operand:SI 0 "register_operand" "=c,c")
10123 [(match_operand:V16QI 1 "register_operand" "x,x")
10124 (match_operand:SI 2 "register_operand" "a,a")
10125 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10126 (match_operand:SI 4 "register_operand" "d,d")
10127 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10129 (set (reg:CC FLAGS_REG)
10138 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10139 [(set_attr "type" "sselog")
10140 (set_attr "prefix_data16" "1")
10141 (set_attr "prefix_extra" "1")
10142 (set_attr "prefix" "maybe_vex")
10143 (set_attr "length_immediate" "1")
10144 (set_attr "memory" "none,load")
10145 (set_attr "mode" "TI")])
10147 (define_insn "sse4_2_pcmpestrm"
10148 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10150 [(match_operand:V16QI 1 "register_operand" "x,x")
10151 (match_operand:SI 2 "register_operand" "a,a")
10152 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10153 (match_operand:SI 4 "register_operand" "d,d")
10154 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10156 (set (reg:CC FLAGS_REG)
10165 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10166 [(set_attr "type" "sselog")
10167 (set_attr "prefix_data16" "1")
10168 (set_attr "prefix_extra" "1")
10169 (set_attr "length_immediate" "1")
10170 (set_attr "prefix" "maybe_vex")
10171 (set_attr "memory" "none,load")
10172 (set_attr "mode" "TI")])
10174 (define_insn "sse4_2_pcmpestr_cconly"
10175 [(set (reg:CC FLAGS_REG)
10177 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10178 (match_operand:SI 3 "register_operand" "a,a,a,a")
10179 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10180 (match_operand:SI 5 "register_operand" "d,d,d,d")
10181 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10183 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10184 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10187 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10188 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10189 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10190 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10191 [(set_attr "type" "sselog")
10192 (set_attr "prefix_data16" "1")
10193 (set_attr "prefix_extra" "1")
10194 (set_attr "length_immediate" "1")
10195 (set_attr "memory" "none,load,none,load")
10196 (set_attr "prefix" "maybe_vex")
10197 (set_attr "mode" "TI")])
10199 (define_insn_and_split "sse4_2_pcmpistr"
10200 [(set (match_operand:SI 0 "register_operand" "=c,c")
10202 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10203 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10204 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10206 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10212 (set (reg:CC FLAGS_REG)
10219 && can_create_pseudo_p ()"
10224 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10225 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10226 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10229 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10230 operands[3], operands[4]));
10232 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10233 operands[3], operands[4]));
10234 if (flags && !(ecx || xmm0))
10235 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10236 operands[2], operands[3],
10238 if (!(flags || ecx || xmm0))
10239 emit_note (NOTE_INSN_DELETED);
10243 [(set_attr "type" "sselog")
10244 (set_attr "prefix_data16" "1")
10245 (set_attr "prefix_extra" "1")
10246 (set_attr "length_immediate" "1")
10247 (set_attr "memory" "none,load")
10248 (set_attr "mode" "TI")])
10250 (define_insn "sse4_2_pcmpistri"
10251 [(set (match_operand:SI 0 "register_operand" "=c,c")
10253 [(match_operand:V16QI 1 "register_operand" "x,x")
10254 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10255 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10257 (set (reg:CC FLAGS_REG)
10264 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10265 [(set_attr "type" "sselog")
10266 (set_attr "prefix_data16" "1")
10267 (set_attr "prefix_extra" "1")
10268 (set_attr "length_immediate" "1")
10269 (set_attr "prefix" "maybe_vex")
10270 (set_attr "memory" "none,load")
10271 (set_attr "mode" "TI")])
10273 (define_insn "sse4_2_pcmpistrm"
10274 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10276 [(match_operand:V16QI 1 "register_operand" "x,x")
10277 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10278 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10280 (set (reg:CC FLAGS_REG)
10287 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10288 [(set_attr "type" "sselog")
10289 (set_attr "prefix_data16" "1")
10290 (set_attr "prefix_extra" "1")
10291 (set_attr "length_immediate" "1")
10292 (set_attr "prefix" "maybe_vex")
10293 (set_attr "memory" "none,load")
10294 (set_attr "mode" "TI")])
10296 (define_insn "sse4_2_pcmpistr_cconly"
10297 [(set (reg:CC FLAGS_REG)
10299 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10300 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10301 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10303 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10304 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10307 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10308 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10309 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10310 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10311 [(set_attr "type" "sselog")
10312 (set_attr "prefix_data16" "1")
10313 (set_attr "prefix_extra" "1")
10314 (set_attr "length_immediate" "1")
10315 (set_attr "memory" "none,load,none,load")
10316 (set_attr "prefix" "maybe_vex")
10317 (set_attr "mode" "TI")])
10319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10321 ;; XOP instructions
10323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10325 ;; XOP parallel integer multiply/add instructions.
10326 ;; Note the XOP multiply/add instructions
10327 ;; a[i] = b[i] * c[i] + d[i];
10328 ;; do not allow the value being added to be a memory operation.
10329 (define_insn "xop_pmacsww"
10330 [(set (match_operand:V8HI 0 "register_operand" "=x")
10333 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10334 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10335 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10337 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10338 [(set_attr "type" "ssemuladd")
10339 (set_attr "mode" "TI")])
10341 (define_insn "xop_pmacssww"
10342 [(set (match_operand:V8HI 0 "register_operand" "=x")
10344 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10345 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10346 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10348 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10349 [(set_attr "type" "ssemuladd")
10350 (set_attr "mode" "TI")])
10352 (define_insn "xop_pmacsdd"
10353 [(set (match_operand:V4SI 0 "register_operand" "=x")
10356 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10357 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10358 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10360 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10361 [(set_attr "type" "ssemuladd")
10362 (set_attr "mode" "TI")])
10364 (define_insn "xop_pmacssdd"
10365 [(set (match_operand:V4SI 0 "register_operand" "=x")
10367 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10368 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10369 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10371 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10372 [(set_attr "type" "ssemuladd")
10373 (set_attr "mode" "TI")])
10375 (define_insn "xop_pmacssdql"
10376 [(set (match_operand:V2DI 0 "register_operand" "=x")
10381 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10382 (parallel [(const_int 0)
10385 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10386 (parallel [(const_int 0)
10388 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10390 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10391 [(set_attr "type" "ssemuladd")
10392 (set_attr "mode" "TI")])
10394 (define_insn "xop_pmacssdqh"
10395 [(set (match_operand:V2DI 0 "register_operand" "=x")
10400 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10401 (parallel [(const_int 1)
10405 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10406 (parallel [(const_int 1)
10408 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10410 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10411 [(set_attr "type" "ssemuladd")
10412 (set_attr "mode" "TI")])
10414 (define_insn "xop_pmacsdql"
10415 [(set (match_operand:V2DI 0 "register_operand" "=x")
10420 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10421 (parallel [(const_int 0)
10425 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10426 (parallel [(const_int 0)
10428 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10430 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10431 [(set_attr "type" "ssemuladd")
10432 (set_attr "mode" "TI")])
10434 (define_insn "xop_pmacsdqh"
10435 [(set (match_operand:V2DI 0 "register_operand" "=x")
10440 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10441 (parallel [(const_int 1)
10445 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10446 (parallel [(const_int 1)
10448 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10450 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10451 [(set_attr "type" "ssemuladd")
10452 (set_attr "mode" "TI")])
10454 ;; XOP parallel integer multiply/add instructions for the intrinisics
10455 (define_insn "xop_pmacsswd"
10456 [(set (match_operand:V4SI 0 "register_operand" "=x")
10461 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10462 (parallel [(const_int 1)
10468 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10469 (parallel [(const_int 1)
10473 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10475 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10476 [(set_attr "type" "ssemuladd")
10477 (set_attr "mode" "TI")])
10479 (define_insn "xop_pmacswd"
10480 [(set (match_operand:V4SI 0 "register_operand" "=x")
10485 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10486 (parallel [(const_int 1)
10492 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10493 (parallel [(const_int 1)
10497 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10499 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10500 [(set_attr "type" "ssemuladd")
10501 (set_attr "mode" "TI")])
10503 (define_insn "xop_pmadcsswd"
10504 [(set (match_operand:V4SI 0 "register_operand" "=x")
10510 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10511 (parallel [(const_int 0)
10517 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 0)
10526 (parallel [(const_int 1)
10533 (parallel [(const_int 1)
10536 (const_int 7)])))))
10537 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10539 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10540 [(set_attr "type" "ssemuladd")
10541 (set_attr "mode" "TI")])
10543 (define_insn "xop_pmadcswd"
10544 [(set (match_operand:V4SI 0 "register_operand" "=x")
10550 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10551 (parallel [(const_int 0)
10557 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10558 (parallel [(const_int 0)
10566 (parallel [(const_int 1)
10573 (parallel [(const_int 1)
10576 (const_int 7)])))))
10577 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10579 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10580 [(set_attr "type" "ssemuladd")
10581 (set_attr "mode" "TI")])
10583 ;; XOP parallel XMM conditional moves
10584 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10585 [(set (match_operand:V 0 "register_operand" "=x,x")
10587 (match_operand:V 3 "nonimmediate_operand" "x,m")
10588 (match_operand:V 1 "register_operand" "x,x")
10589 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10591 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10592 [(set_attr "type" "sse4arg")])
10594 ;; XOP horizontal add/subtract instructions
10595 (define_insn "xop_phaddbw"
10596 [(set (match_operand:V8HI 0 "register_operand" "=x")
10600 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10601 (parallel [(const_int 0)
10612 (parallel [(const_int 1)
10619 (const_int 15)])))))]
10621 "vphaddbw\t{%1, %0|%0, %1}"
10622 [(set_attr "type" "sseiadd1")])
10624 (define_insn "xop_phaddbd"
10625 [(set (match_operand:V4SI 0 "register_operand" "=x")
10630 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10631 (parallel [(const_int 0)
10638 (parallel [(const_int 1)
10641 (const_int 13)]))))
10646 (parallel [(const_int 2)
10653 (parallel [(const_int 3)
10656 (const_int 15)]))))))]
10658 "vphaddbd\t{%1, %0|%0, %1}"
10659 [(set_attr "type" "sseiadd1")])
10661 (define_insn "xop_phaddbq"
10662 [(set (match_operand:V2DI 0 "register_operand" "=x")
10668 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10669 (parallel [(const_int 0)
10674 (parallel [(const_int 1)
10680 (parallel [(const_int 2)
10685 (parallel [(const_int 3)
10686 (const_int 11)])))))
10692 (parallel [(const_int 4)
10697 (parallel [(const_int 5)
10698 (const_int 13)]))))
10703 (parallel [(const_int 6)
10708 (parallel [(const_int 7)
10709 (const_int 15)])))))))]
10711 "vphaddbq\t{%1, %0|%0, %1}"
10712 [(set_attr "type" "sseiadd1")])
10714 (define_insn "xop_phaddwd"
10715 [(set (match_operand:V4SI 0 "register_operand" "=x")
10719 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10720 (parallel [(const_int 0)
10727 (parallel [(const_int 1)
10730 (const_int 7)])))))]
10732 "vphaddwd\t{%1, %0|%0, %1}"
10733 [(set_attr "type" "sseiadd1")])
10735 (define_insn "xop_phaddwq"
10736 [(set (match_operand:V2DI 0 "register_operand" "=x")
10741 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10742 (parallel [(const_int 0)
10747 (parallel [(const_int 1)
10753 (parallel [(const_int 2)
10758 (parallel [(const_int 3)
10759 (const_int 7)]))))))]
10761 "vphaddwq\t{%1, %0|%0, %1}"
10762 [(set_attr "type" "sseiadd1")])
10764 (define_insn "xop_phadddq"
10765 [(set (match_operand:V2DI 0 "register_operand" "=x")
10769 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10770 (parallel [(const_int 0)
10775 (parallel [(const_int 1)
10776 (const_int 3)])))))]
10778 "vphadddq\t{%1, %0|%0, %1}"
10779 [(set_attr "type" "sseiadd1")])
10781 (define_insn "xop_phaddubw"
10782 [(set (match_operand:V8HI 0 "register_operand" "=x")
10786 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10787 (parallel [(const_int 0)
10798 (parallel [(const_int 1)
10805 (const_int 15)])))))]
10807 "vphaddubw\t{%1, %0|%0, %1}"
10808 [(set_attr "type" "sseiadd1")])
10810 (define_insn "xop_phaddubd"
10811 [(set (match_operand:V4SI 0 "register_operand" "=x")
10816 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10817 (parallel [(const_int 0)
10824 (parallel [(const_int 1)
10827 (const_int 13)]))))
10832 (parallel [(const_int 2)
10839 (parallel [(const_int 3)
10842 (const_int 15)]))))))]
10844 "vphaddubd\t{%1, %0|%0, %1}"
10845 [(set_attr "type" "sseiadd1")])
10847 (define_insn "xop_phaddubq"
10848 [(set (match_operand:V2DI 0 "register_operand" "=x")
10854 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10855 (parallel [(const_int 0)
10860 (parallel [(const_int 1)
10866 (parallel [(const_int 2)
10871 (parallel [(const_int 3)
10872 (const_int 11)])))))
10878 (parallel [(const_int 4)
10883 (parallel [(const_int 5)
10884 (const_int 13)]))))
10889 (parallel [(const_int 6)
10894 (parallel [(const_int 7)
10895 (const_int 15)])))))))]
10897 "vphaddubq\t{%1, %0|%0, %1}"
10898 [(set_attr "type" "sseiadd1")])
10900 (define_insn "xop_phadduwd"
10901 [(set (match_operand:V4SI 0 "register_operand" "=x")
10905 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10906 (parallel [(const_int 0)
10913 (parallel [(const_int 1)
10916 (const_int 7)])))))]
10918 "vphadduwd\t{%1, %0|%0, %1}"
10919 [(set_attr "type" "sseiadd1")])
10921 (define_insn "xop_phadduwq"
10922 [(set (match_operand:V2DI 0 "register_operand" "=x")
10927 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10928 (parallel [(const_int 0)
10933 (parallel [(const_int 1)
10939 (parallel [(const_int 2)
10944 (parallel [(const_int 3)
10945 (const_int 7)]))))))]
10947 "vphadduwq\t{%1, %0|%0, %1}"
10948 [(set_attr "type" "sseiadd1")])
10950 (define_insn "xop_phaddudq"
10951 [(set (match_operand:V2DI 0 "register_operand" "=x")
10955 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10956 (parallel [(const_int 0)
10961 (parallel [(const_int 1)
10962 (const_int 3)])))))]
10964 "vphaddudq\t{%1, %0|%0, %1}"
10965 [(set_attr "type" "sseiadd1")])
10967 (define_insn "xop_phsubbw"
10968 [(set (match_operand:V8HI 0 "register_operand" "=x")
10972 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10973 (parallel [(const_int 0)
10984 (parallel [(const_int 1)
10991 (const_int 15)])))))]
10993 "vphsubbw\t{%1, %0|%0, %1}"
10994 [(set_attr "type" "sseiadd1")])
10996 (define_insn "xop_phsubwd"
10997 [(set (match_operand:V4SI 0 "register_operand" "=x")
11001 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11002 (parallel [(const_int 0)
11009 (parallel [(const_int 1)
11012 (const_int 7)])))))]
11014 "vphsubwd\t{%1, %0|%0, %1}"
11015 [(set_attr "type" "sseiadd1")])
11017 (define_insn "xop_phsubdq"
11018 [(set (match_operand:V2DI 0 "register_operand" "=x")
11022 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11023 (parallel [(const_int 0)
11028 (parallel [(const_int 1)
11029 (const_int 3)])))))]
11031 "vphsubdq\t{%1, %0|%0, %1}"
11032 [(set_attr "type" "sseiadd1")])
11034 ;; XOP permute instructions
11035 (define_insn "xop_pperm"
11036 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11038 [(match_operand:V16QI 1 "register_operand" "x,x")
11039 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11040 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11041 UNSPEC_XOP_PERMUTE))]
11042 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11043 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11044 [(set_attr "type" "sse4arg")
11045 (set_attr "mode" "TI")])
11047 ;; XOP pack instructions that combine two vectors into a smaller vector
11048 (define_insn "xop_pperm_pack_v2di_v4si"
11049 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11052 (match_operand:V2DI 1 "register_operand" "x,x"))
11054 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11055 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11056 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11057 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11058 [(set_attr "type" "sse4arg")
11059 (set_attr "mode" "TI")])
11061 (define_insn "xop_pperm_pack_v4si_v8hi"
11062 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11065 (match_operand:V4SI 1 "register_operand" "x,x"))
11067 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11068 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11069 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11070 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11071 [(set_attr "type" "sse4arg")
11072 (set_attr "mode" "TI")])
11074 (define_insn "xop_pperm_pack_v8hi_v16qi"
11075 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11078 (match_operand:V8HI 1 "register_operand" "x,x"))
11080 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11081 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11082 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11083 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11084 [(set_attr "type" "sse4arg")
11085 (set_attr "mode" "TI")])
11087 ;; XOP packed rotate instructions
11088 (define_expand "rotl<mode>3"
11089 [(set (match_operand:VI_128 0 "register_operand" "")
11091 (match_operand:VI_128 1 "nonimmediate_operand" "")
11092 (match_operand:SI 2 "general_operand")))]
11095 /* If we were given a scalar, convert it to parallel */
11096 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11098 rtvec vs = rtvec_alloc (<ssescalarnum>);
11099 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11100 rtx reg = gen_reg_rtx (<MODE>mode);
11101 rtx op2 = operands[2];
11104 if (GET_MODE (op2) != <ssescalarmode>mode)
11106 op2 = gen_reg_rtx (<ssescalarmode>mode);
11107 convert_move (op2, operands[2], false);
11110 for (i = 0; i < <ssescalarnum>; i++)
11111 RTVEC_ELT (vs, i) = op2;
11113 emit_insn (gen_vec_init<mode> (reg, par));
11114 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11119 (define_expand "rotr<mode>3"
11120 [(set (match_operand:VI_128 0 "register_operand" "")
11122 (match_operand:VI_128 1 "nonimmediate_operand" "")
11123 (match_operand:SI 2 "general_operand")))]
11126 /* If we were given a scalar, convert it to parallel */
11127 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11129 rtvec vs = rtvec_alloc (<ssescalarnum>);
11130 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11131 rtx neg = gen_reg_rtx (<MODE>mode);
11132 rtx reg = gen_reg_rtx (<MODE>mode);
11133 rtx op2 = operands[2];
11136 if (GET_MODE (op2) != <ssescalarmode>mode)
11138 op2 = gen_reg_rtx (<ssescalarmode>mode);
11139 convert_move (op2, operands[2], false);
11142 for (i = 0; i < <ssescalarnum>; i++)
11143 RTVEC_ELT (vs, i) = op2;
11145 emit_insn (gen_vec_init<mode> (reg, par));
11146 emit_insn (gen_neg<mode>2 (neg, reg));
11147 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11152 (define_insn "xop_rotl<mode>3"
11153 [(set (match_operand:VI_128 0 "register_operand" "=x")
11155 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11156 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11158 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11159 [(set_attr "type" "sseishft")
11160 (set_attr "length_immediate" "1")
11161 (set_attr "mode" "TI")])
11163 (define_insn "xop_rotr<mode>3"
11164 [(set (match_operand:VI_128 0 "register_operand" "=x")
11166 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11167 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11171 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
11172 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11174 [(set_attr "type" "sseishft")
11175 (set_attr "length_immediate" "1")
11176 (set_attr "mode" "TI")])
11178 (define_expand "vrotr<mode>3"
11179 [(match_operand:VI_128 0 "register_operand" "")
11180 (match_operand:VI_128 1 "register_operand" "")
11181 (match_operand:VI_128 2 "register_operand" "")]
11184 rtx reg = gen_reg_rtx (<MODE>mode);
11185 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11186 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11190 (define_expand "vrotl<mode>3"
11191 [(match_operand:VI_128 0 "register_operand" "")
11192 (match_operand:VI_128 1 "register_operand" "")
11193 (match_operand:VI_128 2 "register_operand" "")]
11196 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11200 (define_insn "xop_vrotl<mode>3"
11201 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11202 (if_then_else:VI_128
11204 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11207 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11211 (neg:VI_128 (match_dup 2)))))]
11212 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11213 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11214 [(set_attr "type" "sseishft")
11215 (set_attr "prefix_data16" "0")
11216 (set_attr "prefix_extra" "2")
11217 (set_attr "mode" "TI")])
11219 ;; XOP packed shift instructions.
11220 (define_expand "vlshr<mode>3"
11221 [(set (match_operand:VI12_128 0 "register_operand" "")
11223 (match_operand:VI12_128 1 "register_operand" "")
11224 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11227 rtx neg = gen_reg_rtx (<MODE>mode);
11228 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11229 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11233 (define_expand "vlshr<mode>3"
11234 [(set (match_operand:VI48_128 0 "register_operand" "")
11236 (match_operand:VI48_128 1 "register_operand" "")
11237 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11238 "TARGET_AVX2 || TARGET_XOP"
11242 rtx neg = gen_reg_rtx (<MODE>mode);
11243 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11244 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11249 (define_expand "vlshr<mode>3"
11250 [(set (match_operand:VI48_256 0 "register_operand" "")
11252 (match_operand:VI48_256 1 "register_operand" "")
11253 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11256 (define_expand "vashr<mode>3"
11257 [(set (match_operand:VI128_128 0 "register_operand" "")
11258 (ashiftrt:VI128_128
11259 (match_operand:VI128_128 1 "register_operand" "")
11260 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11263 rtx neg = gen_reg_rtx (<MODE>mode);
11264 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11265 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11269 (define_expand "vashrv4si3"
11270 [(set (match_operand:V4SI 0 "register_operand" "")
11271 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11272 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11273 "TARGET_AVX2 || TARGET_XOP"
11277 rtx neg = gen_reg_rtx (V4SImode);
11278 emit_insn (gen_negv4si2 (neg, operands[2]));
11279 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11284 (define_expand "vashrv8si3"
11285 [(set (match_operand:V8SI 0 "register_operand" "")
11286 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11287 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11290 (define_expand "vashl<mode>3"
11291 [(set (match_operand:VI12_128 0 "register_operand" "")
11293 (match_operand:VI12_128 1 "register_operand" "")
11294 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11297 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11301 (define_expand "vashl<mode>3"
11302 [(set (match_operand:VI48_128 0 "register_operand" "")
11304 (match_operand:VI48_128 1 "register_operand" "")
11305 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11306 "TARGET_AVX2 || TARGET_XOP"
11310 operands[2] = force_reg (<MODE>mode, operands[2]);
11311 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11316 (define_expand "vashl<mode>3"
11317 [(set (match_operand:VI48_256 0 "register_operand" "")
11319 (match_operand:VI48_256 1 "register_operand" "")
11320 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11323 (define_insn "xop_sha<mode>3"
11324 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11325 (if_then_else:VI_128
11327 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11330 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11334 (neg:VI_128 (match_dup 2)))))]
11335 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11336 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11337 [(set_attr "type" "sseishft")
11338 (set_attr "prefix_data16" "0")
11339 (set_attr "prefix_extra" "2")
11340 (set_attr "mode" "TI")])
11342 (define_insn "xop_shl<mode>3"
11343 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11344 (if_then_else:VI_128
11346 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11349 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11353 (neg:VI_128 (match_dup 2)))))]
11354 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11355 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11356 [(set_attr "type" "sseishft")
11357 (set_attr "prefix_data16" "0")
11358 (set_attr "prefix_extra" "2")
11359 (set_attr "mode" "TI")])
11361 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11362 (define_expand "ashlv16qi3"
11363 [(set (match_operand:V16QI 0 "register_operand" "")
11365 (match_operand:V16QI 1 "register_operand" "")
11366 (match_operand:SI 2 "nonmemory_operand" "")))]
11369 rtx reg = gen_reg_rtx (V16QImode);
11373 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11374 for (i = 0; i < 16; i++)
11375 XVECEXP (par, 0, i) = operands[2];
11377 emit_insn (gen_vec_initv16qi (reg, par));
11378 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11382 (define_expand "<shift_insn>v16qi3"
11383 [(set (match_operand:V16QI 0 "register_operand" "")
11385 (match_operand:V16QI 1 "register_operand" "")
11386 (match_operand:SI 2 "nonmemory_operand" "")))]
11389 rtx reg = gen_reg_rtx (V16QImode);
11391 bool negate = false;
11392 rtx (*shift_insn)(rtx, rtx, rtx);
11395 if (CONST_INT_P (operands[2]))
11396 operands[2] = GEN_INT (-INTVAL (operands[2]));
11400 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11401 for (i = 0; i < 16; i++)
11402 XVECEXP (par, 0, i) = operands[2];
11404 emit_insn (gen_vec_initv16qi (reg, par));
11407 emit_insn (gen_negv16qi2 (reg, reg));
11409 if (<CODE> == LSHIFTRT)
11410 shift_insn = gen_xop_shlv16qi3;
11412 shift_insn = gen_xop_shav16qi3;
11414 emit_insn (shift_insn (operands[0], operands[1], reg));
11418 (define_expand "ashrv2di3"
11419 [(set (match_operand:V2DI 0 "register_operand" "")
11421 (match_operand:V2DI 1 "register_operand" "")
11422 (match_operand:DI 2 "nonmemory_operand" "")))]
11425 rtx reg = gen_reg_rtx (V2DImode);
11427 bool negate = false;
11430 if (CONST_INT_P (operands[2]))
11431 operands[2] = GEN_INT (-INTVAL (operands[2]));
11435 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11436 for (i = 0; i < 2; i++)
11437 XVECEXP (par, 0, i) = operands[2];
11439 emit_insn (gen_vec_initv2di (reg, par));
11442 emit_insn (gen_negv2di2 (reg, reg));
11444 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11448 ;; XOP FRCZ support
11449 (define_insn "xop_frcz<mode>2"
11450 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11452 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11455 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11456 [(set_attr "type" "ssecvt1")
11457 (set_attr "mode" "<MODE>")])
11459 (define_expand "xop_vmfrcz<mode>2"
11460 [(set (match_operand:VF_128 0 "register_operand")
11463 [(match_operand:VF_128 1 "nonimmediate_operand")]
11468 "operands[2] = CONST0_RTX (<MODE>mode);")
11470 (define_insn "*xop_vmfrcz<mode>2"
11471 [(set (match_operand:VF_128 0 "register_operand" "=x")
11474 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11476 (match_operand:VF_128 2 "const0_operand")
11479 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11480 [(set_attr "type" "ssecvt1")
11481 (set_attr "mode" "<MODE>")])
11483 (define_insn "xop_maskcmp<mode>3"
11484 [(set (match_operand:VI_128 0 "register_operand" "=x")
11485 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11486 [(match_operand:VI_128 2 "register_operand" "x")
11487 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11489 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11490 [(set_attr "type" "sse4arg")
11491 (set_attr "prefix_data16" "0")
11492 (set_attr "prefix_rep" "0")
11493 (set_attr "prefix_extra" "2")
11494 (set_attr "length_immediate" "1")
11495 (set_attr "mode" "TI")])
11497 (define_insn "xop_maskcmp_uns<mode>3"
11498 [(set (match_operand:VI_128 0 "register_operand" "=x")
11499 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11500 [(match_operand:VI_128 2 "register_operand" "x")
11501 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11503 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11504 [(set_attr "type" "ssecmp")
11505 (set_attr "prefix_data16" "0")
11506 (set_attr "prefix_rep" "0")
11507 (set_attr "prefix_extra" "2")
11508 (set_attr "length_immediate" "1")
11509 (set_attr "mode" "TI")])
11511 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11512 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11513 ;; the exact instruction generated for the intrinsic.
11514 (define_insn "xop_maskcmp_uns2<mode>3"
11515 [(set (match_operand:VI_128 0 "register_operand" "=x")
11517 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11518 [(match_operand:VI_128 2 "register_operand" "x")
11519 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11520 UNSPEC_XOP_UNSIGNED_CMP))]
11522 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11523 [(set_attr "type" "ssecmp")
11524 (set_attr "prefix_data16" "0")
11525 (set_attr "prefix_extra" "2")
11526 (set_attr "length_immediate" "1")
11527 (set_attr "mode" "TI")])
11529 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11530 ;; being added here to be complete.
11531 (define_insn "xop_pcom_tf<mode>3"
11532 [(set (match_operand:VI_128 0 "register_operand" "=x")
11534 [(match_operand:VI_128 1 "register_operand" "x")
11535 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11536 (match_operand:SI 3 "const_int_operand" "n")]
11537 UNSPEC_XOP_TRUEFALSE))]
11540 return ((INTVAL (operands[3]) != 0)
11541 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11542 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11544 [(set_attr "type" "ssecmp")
11545 (set_attr "prefix_data16" "0")
11546 (set_attr "prefix_extra" "2")
11547 (set_attr "length_immediate" "1")
11548 (set_attr "mode" "TI")])
11550 (define_insn "xop_vpermil2<mode>3"
11551 [(set (match_operand:VF 0 "register_operand" "=x")
11553 [(match_operand:VF 1 "register_operand" "x")
11554 (match_operand:VF 2 "nonimmediate_operand" "%x")
11555 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11556 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11559 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11560 [(set_attr "type" "sse4arg")
11561 (set_attr "length_immediate" "1")
11562 (set_attr "mode" "<MODE>")])
11564 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11566 (define_insn "aesenc"
11567 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11568 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11569 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11573 aesenc\t{%2, %0|%0, %2}
11574 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11575 [(set_attr "isa" "noavx,avx")
11576 (set_attr "type" "sselog1")
11577 (set_attr "prefix_extra" "1")
11578 (set_attr "prefix" "orig,vex")
11579 (set_attr "mode" "TI")])
11581 (define_insn "aesenclast"
11582 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11583 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11584 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11585 UNSPEC_AESENCLAST))]
11588 aesenclast\t{%2, %0|%0, %2}
11589 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11590 [(set_attr "isa" "noavx,avx")
11591 (set_attr "type" "sselog1")
11592 (set_attr "prefix_extra" "1")
11593 (set_attr "prefix" "orig,vex")
11594 (set_attr "mode" "TI")])
11596 (define_insn "aesdec"
11597 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11598 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11599 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11603 aesdec\t{%2, %0|%0, %2}
11604 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11605 [(set_attr "isa" "noavx,avx")
11606 (set_attr "type" "sselog1")
11607 (set_attr "prefix_extra" "1")
11608 (set_attr "prefix" "orig,vex")
11609 (set_attr "mode" "TI")])
11611 (define_insn "aesdeclast"
11612 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11613 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11614 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11615 UNSPEC_AESDECLAST))]
11618 aesdeclast\t{%2, %0|%0, %2}
11619 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11620 [(set_attr "isa" "noavx,avx")
11621 (set_attr "type" "sselog1")
11622 (set_attr "prefix_extra" "1")
11623 (set_attr "prefix" "orig,vex")
11624 (set_attr "mode" "TI")])
11626 (define_insn "aesimc"
11627 [(set (match_operand:V2DI 0 "register_operand" "=x")
11628 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11631 "%vaesimc\t{%1, %0|%0, %1}"
11632 [(set_attr "type" "sselog1")
11633 (set_attr "prefix_extra" "1")
11634 (set_attr "prefix" "maybe_vex")
11635 (set_attr "mode" "TI")])
11637 (define_insn "aeskeygenassist"
11638 [(set (match_operand:V2DI 0 "register_operand" "=x")
11639 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11640 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11641 UNSPEC_AESKEYGENASSIST))]
11643 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11644 [(set_attr "type" "sselog1")
11645 (set_attr "prefix_extra" "1")
11646 (set_attr "length_immediate" "1")
11647 (set_attr "prefix" "maybe_vex")
11648 (set_attr "mode" "TI")])
11650 (define_insn "pclmulqdq"
11651 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11652 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11653 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11654 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11658 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11659 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11660 [(set_attr "isa" "noavx,avx")
11661 (set_attr "type" "sselog1")
11662 (set_attr "prefix_extra" "1")
11663 (set_attr "length_immediate" "1")
11664 (set_attr "prefix" "orig,vex")
11665 (set_attr "mode" "TI")])
11667 (define_expand "avx_vzeroall"
11668 [(match_par_dup 0 [(const_int 0)])]
11671 int nregs = TARGET_64BIT ? 16 : 8;
11674 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11676 XVECEXP (operands[0], 0, 0)
11677 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11680 for (regno = 0; regno < nregs; regno++)
11681 XVECEXP (operands[0], 0, regno + 1)
11682 = gen_rtx_SET (VOIDmode,
11683 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11684 CONST0_RTX (V8SImode));
11687 (define_insn "*avx_vzeroall"
11688 [(match_parallel 0 "vzeroall_operation"
11689 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11692 [(set_attr "type" "sse")
11693 (set_attr "modrm" "0")
11694 (set_attr "memory" "none")
11695 (set_attr "prefix" "vex")
11696 (set_attr "mode" "OI")])
11698 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11699 ;; if the upper 128bits are unused.
11700 (define_insn "avx_vzeroupper"
11701 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11702 UNSPECV_VZEROUPPER)]
11705 [(set_attr "type" "sse")
11706 (set_attr "modrm" "0")
11707 (set_attr "memory" "none")
11708 (set_attr "prefix" "vex")
11709 (set_attr "mode" "OI")])
11711 (define_mode_attr AVXTOSSEMODE
11712 [(V4DI "V2DI") (V2DI "V2DI")
11713 (V8SI "V4SI") (V4SI "V4SI")
11714 (V16HI "V8HI") (V8HI "V8HI")
11715 (V32QI "V16QI") (V16QI "V16QI")])
11717 (define_insn "avx2_pbroadcast<mode>"
11718 [(set (match_operand:VI 0 "register_operand" "=x")
11720 (vec_select:<ssescalarmode>
11721 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11722 (parallel [(const_int 0)]))))]
11724 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11725 [(set_attr "type" "ssemov")
11726 (set_attr "prefix_extra" "1")
11727 (set_attr "prefix" "vex")
11728 (set_attr "mode" "<sseinsnmode>")])
11730 (define_insn "avx2_permvarv8si"
11731 [(set (match_operand:V8SI 0 "register_operand" "=x")
11733 [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
11734 (match_operand:V8SI 2 "register_operand" "x")]
11737 "vpermd\t{%1, %2, %0|%0, %2, %1}"
11738 [(set_attr "type" "sselog")
11739 (set_attr "prefix" "vex")
11740 (set_attr "mode" "OI")])
11742 (define_insn "avx2_permv4df"
11743 [(set (match_operand:V4DF 0 "register_operand" "=x")
11745 [(match_operand:V4DF 1 "register_operand" "xm")
11746 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11749 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11750 [(set_attr "type" "sselog")
11751 (set_attr "prefix_extra" "1")
11752 (set_attr "prefix" "vex")
11753 (set_attr "mode" "OI")])
11755 (define_insn "avx2_permvarv8sf"
11756 [(set (match_operand:V8SF 0 "register_operand" "=x")
11758 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
11759 (match_operand:V8SI 2 "register_operand" "x")]
11762 "vpermps\t{%1, %2, %0|%0, %2, %1}"
11763 [(set_attr "type" "sselog")
11764 (set_attr "prefix" "vex")
11765 (set_attr "mode" "OI")])
11767 (define_expand "avx2_permv4di"
11768 [(match_operand:V4DI 0 "register_operand" "")
11769 (match_operand:V4DI 1 "nonimmediate_operand" "")
11770 (match_operand:SI 2 "const_0_to_255_operand" "")]
11773 int mask = INTVAL (operands[2]);
11774 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11775 GEN_INT ((mask >> 0) & 3),
11776 GEN_INT ((mask >> 2) & 3),
11777 GEN_INT ((mask >> 4) & 3),
11778 GEN_INT ((mask >> 6) & 3)));
11782 (define_insn "avx2_permv4di_1"
11783 [(set (match_operand:V4DI 0 "register_operand" "=x")
11785 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11786 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11787 (match_operand 3 "const_0_to_3_operand" "")
11788 (match_operand 4 "const_0_to_3_operand" "")
11789 (match_operand 5 "const_0_to_3_operand" "")])))]
11793 mask |= INTVAL (operands[2]) << 0;
11794 mask |= INTVAL (operands[3]) << 2;
11795 mask |= INTVAL (operands[4]) << 4;
11796 mask |= INTVAL (operands[5]) << 6;
11797 operands[2] = GEN_INT (mask);
11798 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11800 [(set_attr "type" "sselog")
11801 (set_attr "prefix" "vex")
11802 (set_attr "mode" "OI")])
11804 (define_insn "avx2_permv2ti"
11805 [(set (match_operand:V4DI 0 "register_operand" "=x")
11807 [(match_operand:V4DI 1 "register_operand" "x")
11808 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11809 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11812 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11813 [(set_attr "type" "sselog")
11814 (set_attr "prefix" "vex")
11815 (set_attr "mode" "OI")])
11817 (define_insn "avx2_vec_dupv4df"
11818 [(set (match_operand:V4DF 0 "register_operand" "=x")
11819 (vec_duplicate:V4DF
11821 (match_operand:V2DF 1 "register_operand" "x")
11822 (parallel [(const_int 0)]))))]
11824 "vbroadcastsd\t{%1, %0|%0, %1}"
11825 [(set_attr "type" "sselog1")
11826 (set_attr "prefix" "vex")
11827 (set_attr "mode" "V4DF")])
11829 ;; Modes handled by AVX vec_dup patterns.
11830 (define_mode_iterator AVX_VEC_DUP_MODE
11831 [V8SI V8SF V4DI V4DF])
11833 (define_insn "vec_dup<mode>"
11834 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11835 (vec_duplicate:AVX_VEC_DUP_MODE
11836 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11839 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11841 [(set_attr "type" "ssemov")
11842 (set_attr "prefix_extra" "1")
11843 (set_attr "prefix" "vex")
11844 (set_attr "mode" "V8SF")])
11846 (define_insn "avx2_vbroadcasti128_<mode>"
11847 [(set (match_operand:VI_256 0 "register_operand" "=x")
11849 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11852 "vbroadcasti128\t{%1, %0|%0, %1}"
11853 [(set_attr "type" "ssemov")
11854 (set_attr "prefix_extra" "1")
11855 (set_attr "prefix" "vex")
11856 (set_attr "mode" "OI")])
11859 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11860 (vec_duplicate:AVX_VEC_DUP_MODE
11861 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11862 "TARGET_AVX && reload_completed"
11863 [(set (match_dup 2)
11864 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11866 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11867 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11869 (define_insn "avx_vbroadcastf128_<mode>"
11870 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11872 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11876 vbroadcast<i128>\t{%1, %0|%0, %1}
11877 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11878 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11879 [(set_attr "type" "ssemov,sselog1,sselog1")
11880 (set_attr "prefix_extra" "1")
11881 (set_attr "length_immediate" "0,1,1")
11882 (set_attr "prefix" "vex")
11883 (set_attr "mode" "<sseinsnmode>")])
11885 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11886 ;; If it so happens that the input is in memory, use vbroadcast.
11887 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11888 (define_insn "*avx_vperm_broadcast_v4sf"
11889 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11891 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11892 (match_parallel 2 "avx_vbroadcast_operand"
11893 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11896 int elt = INTVAL (operands[3]);
11897 switch (which_alternative)
11901 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11902 return "vbroadcastss\t{%1, %0|%0, %1}";
11904 operands[2] = GEN_INT (elt * 0x55);
11905 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11907 gcc_unreachable ();
11910 [(set_attr "type" "ssemov,ssemov,sselog1")
11911 (set_attr "prefix_extra" "1")
11912 (set_attr "length_immediate" "0,0,1")
11913 (set_attr "prefix" "vex")
11914 (set_attr "mode" "SF,SF,V4SF")])
11916 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11917 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11919 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11920 (match_parallel 2 "avx_vbroadcast_operand"
11921 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11924 "&& reload_completed"
11925 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11927 rtx op0 = operands[0], op1 = operands[1];
11928 int elt = INTVAL (operands[3]);
11934 /* Shuffle element we care about into all elements of the 128-bit lane.
11935 The other lane gets shuffled too, but we don't care. */
11936 if (<MODE>mode == V4DFmode)
11937 mask = (elt & 1 ? 15 : 0);
11939 mask = (elt & 3) * 0x55;
11940 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11942 /* Shuffle the lane we care about into both lanes of the dest. */
11943 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11944 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11948 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11949 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11952 (define_expand "avx_vpermil<mode>"
11953 [(set (match_operand:VF2 0 "register_operand" "")
11955 (match_operand:VF2 1 "nonimmediate_operand" "")
11956 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11959 int mask = INTVAL (operands[2]);
11960 rtx perm[<ssescalarnum>];
11962 perm[0] = GEN_INT (mask & 1);
11963 perm[1] = GEN_INT ((mask >> 1) & 1);
11964 if (<MODE>mode == V4DFmode)
11966 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11967 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11971 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11974 (define_expand "avx_vpermil<mode>"
11975 [(set (match_operand:VF1 0 "register_operand" "")
11977 (match_operand:VF1 1 "nonimmediate_operand" "")
11978 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11981 int mask = INTVAL (operands[2]);
11982 rtx perm[<ssescalarnum>];
11984 perm[0] = GEN_INT (mask & 3);
11985 perm[1] = GEN_INT ((mask >> 2) & 3);
11986 perm[2] = GEN_INT ((mask >> 4) & 3);
11987 perm[3] = GEN_INT ((mask >> 6) & 3);
11988 if (<MODE>mode == V8SFmode)
11990 perm[4] = GEN_INT ((mask & 3) + 4);
11991 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11992 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11993 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11997 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12000 (define_insn "*avx_vpermilp<mode>"
12001 [(set (match_operand:VF 0 "register_operand" "=x")
12003 (match_operand:VF 1 "nonimmediate_operand" "xm")
12004 (match_parallel 2 ""
12005 [(match_operand 3 "const_int_operand" "")])))]
12007 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12009 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12010 operands[2] = GEN_INT (mask);
12011 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12013 [(set_attr "type" "sselog")
12014 (set_attr "prefix_extra" "1")
12015 (set_attr "length_immediate" "1")
12016 (set_attr "prefix" "vex")
12017 (set_attr "mode" "<MODE>")])
12019 (define_insn "avx_vpermilvar<mode>3"
12020 [(set (match_operand:VF 0 "register_operand" "=x")
12022 [(match_operand:VF 1 "register_operand" "x")
12023 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12026 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12027 [(set_attr "type" "sselog")
12028 (set_attr "prefix_extra" "1")
12029 (set_attr "prefix" "vex")
12030 (set_attr "mode" "<MODE>")])
12032 (define_expand "avx_vperm2f128<mode>3"
12033 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12034 (unspec:AVX256MODE2P
12035 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12036 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12037 (match_operand:SI 3 "const_0_to_255_operand" "")]
12038 UNSPEC_VPERMIL2F128))]
12041 int mask = INTVAL (operands[3]);
12042 if ((mask & 0x88) == 0)
12044 rtx perm[<ssescalarnum>], t1, t2;
12045 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12047 base = (mask & 3) * nelt2;
12048 for (i = 0; i < nelt2; ++i)
12049 perm[i] = GEN_INT (base + i);
12051 base = ((mask >> 4) & 3) * nelt2;
12052 for (i = 0; i < nelt2; ++i)
12053 perm[i + nelt2] = GEN_INT (base + i);
12055 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12056 operands[1], operands[2]);
12057 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12058 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12059 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12065 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12066 ;; means that in order to represent this properly in rtl we'd have to
12067 ;; nest *another* vec_concat with a zero operand and do the select from
12068 ;; a 4x wide vector. That doesn't seem very nice.
12069 (define_insn "*avx_vperm2f128<mode>_full"
12070 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12071 (unspec:AVX256MODE2P
12072 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12073 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12074 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12075 UNSPEC_VPERMIL2F128))]
12077 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12078 [(set_attr "type" "sselog")
12079 (set_attr "prefix_extra" "1")
12080 (set_attr "length_immediate" "1")
12081 (set_attr "prefix" "vex")
12082 (set_attr "mode" "<sseinsnmode>")])
12084 (define_insn "*avx_vperm2f128<mode>_nozero"
12085 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12086 (vec_select:AVX256MODE2P
12087 (vec_concat:<ssedoublevecmode>
12088 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12089 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12090 (match_parallel 3 ""
12091 [(match_operand 4 "const_int_operand" "")])))]
12093 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12095 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12097 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12099 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12100 operands[3] = GEN_INT (mask);
12101 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12103 [(set_attr "type" "sselog")
12104 (set_attr "prefix_extra" "1")
12105 (set_attr "length_immediate" "1")
12106 (set_attr "prefix" "vex")
12107 (set_attr "mode" "<sseinsnmode>")])
12109 (define_expand "avx_vinsertf128<mode>"
12110 [(match_operand:V_256 0 "register_operand" "")
12111 (match_operand:V_256 1 "register_operand" "")
12112 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12113 (match_operand:SI 3 "const_0_to_1_operand" "")]
12116 rtx (*insn)(rtx, rtx, rtx);
12118 switch (INTVAL (operands[3]))
12121 insn = gen_vec_set_lo_<mode>;
12124 insn = gen_vec_set_hi_<mode>;
12127 gcc_unreachable ();
12130 emit_insn (insn (operands[0], operands[1], operands[2]));
12134 (define_insn "avx2_vec_set_lo_v4di"
12135 [(set (match_operand:V4DI 0 "register_operand" "=x")
12137 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12139 (match_operand:V4DI 1 "register_operand" "x")
12140 (parallel [(const_int 2) (const_int 3)]))))]
12142 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12143 [(set_attr "type" "sselog")
12144 (set_attr "prefix_extra" "1")
12145 (set_attr "length_immediate" "1")
12146 (set_attr "prefix" "vex")
12147 (set_attr "mode" "OI")])
12149 (define_insn "avx2_vec_set_hi_v4di"
12150 [(set (match_operand:V4DI 0 "register_operand" "=x")
12153 (match_operand:V4DI 1 "register_operand" "x")
12154 (parallel [(const_int 0) (const_int 1)]))
12155 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12157 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12158 [(set_attr "type" "sselog")
12159 (set_attr "prefix_extra" "1")
12160 (set_attr "length_immediate" "1")
12161 (set_attr "prefix" "vex")
12162 (set_attr "mode" "OI")])
12164 (define_insn "vec_set_lo_<mode>"
12165 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12166 (vec_concat:VI8F_256
12167 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12168 (vec_select:<ssehalfvecmode>
12169 (match_operand:VI8F_256 1 "register_operand" "x")
12170 (parallel [(const_int 2) (const_int 3)]))))]
12172 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12173 [(set_attr "type" "sselog")
12174 (set_attr "prefix_extra" "1")
12175 (set_attr "length_immediate" "1")
12176 (set_attr "prefix" "vex")
12177 (set_attr "mode" "<sseinsnmode>")])
12179 (define_insn "vec_set_hi_<mode>"
12180 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12181 (vec_concat:VI8F_256
12182 (vec_select:<ssehalfvecmode>
12183 (match_operand:VI8F_256 1 "register_operand" "x")
12184 (parallel [(const_int 0) (const_int 1)]))
12185 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12187 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12188 [(set_attr "type" "sselog")
12189 (set_attr "prefix_extra" "1")
12190 (set_attr "length_immediate" "1")
12191 (set_attr "prefix" "vex")
12192 (set_attr "mode" "<sseinsnmode>")])
12194 (define_insn "vec_set_lo_<mode>"
12195 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12196 (vec_concat:VI4F_256
12197 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12198 (vec_select:<ssehalfvecmode>
12199 (match_operand:VI4F_256 1 "register_operand" "x")
12200 (parallel [(const_int 4) (const_int 5)
12201 (const_int 6) (const_int 7)]))))]
12203 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12204 [(set_attr "type" "sselog")
12205 (set_attr "prefix_extra" "1")
12206 (set_attr "length_immediate" "1")
12207 (set_attr "prefix" "vex")
12208 (set_attr "mode" "<sseinsnmode>")])
12210 (define_insn "vec_set_hi_<mode>"
12211 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12212 (vec_concat:VI4F_256
12213 (vec_select:<ssehalfvecmode>
12214 (match_operand:VI4F_256 1 "register_operand" "x")
12215 (parallel [(const_int 0) (const_int 1)
12216 (const_int 2) (const_int 3)]))
12217 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12219 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12220 [(set_attr "type" "sselog")
12221 (set_attr "prefix_extra" "1")
12222 (set_attr "length_immediate" "1")
12223 (set_attr "prefix" "vex")
12224 (set_attr "mode" "<sseinsnmode>")])
12226 (define_insn "vec_set_lo_v16hi"
12227 [(set (match_operand:V16HI 0 "register_operand" "=x")
12229 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12231 (match_operand:V16HI 1 "register_operand" "x")
12232 (parallel [(const_int 8) (const_int 9)
12233 (const_int 10) (const_int 11)
12234 (const_int 12) (const_int 13)
12235 (const_int 14) (const_int 15)]))))]
12237 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12238 [(set_attr "type" "sselog")
12239 (set_attr "prefix_extra" "1")
12240 (set_attr "length_immediate" "1")
12241 (set_attr "prefix" "vex")
12242 (set_attr "mode" "OI")])
12244 (define_insn "vec_set_hi_v16hi"
12245 [(set (match_operand:V16HI 0 "register_operand" "=x")
12248 (match_operand:V16HI 1 "register_operand" "x")
12249 (parallel [(const_int 0) (const_int 1)
12250 (const_int 2) (const_int 3)
12251 (const_int 4) (const_int 5)
12252 (const_int 6) (const_int 7)]))
12253 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12255 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12256 [(set_attr "type" "sselog")
12257 (set_attr "prefix_extra" "1")
12258 (set_attr "length_immediate" "1")
12259 (set_attr "prefix" "vex")
12260 (set_attr "mode" "OI")])
12262 (define_insn "vec_set_lo_v32qi"
12263 [(set (match_operand:V32QI 0 "register_operand" "=x")
12265 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12267 (match_operand:V32QI 1 "register_operand" "x")
12268 (parallel [(const_int 16) (const_int 17)
12269 (const_int 18) (const_int 19)
12270 (const_int 20) (const_int 21)
12271 (const_int 22) (const_int 23)
12272 (const_int 24) (const_int 25)
12273 (const_int 26) (const_int 27)
12274 (const_int 28) (const_int 29)
12275 (const_int 30) (const_int 31)]))))]
12277 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12278 [(set_attr "type" "sselog")
12279 (set_attr "prefix_extra" "1")
12280 (set_attr "length_immediate" "1")
12281 (set_attr "prefix" "vex")
12282 (set_attr "mode" "OI")])
12284 (define_insn "vec_set_hi_v32qi"
12285 [(set (match_operand:V32QI 0 "register_operand" "=x")
12288 (match_operand:V32QI 1 "register_operand" "x")
12289 (parallel [(const_int 0) (const_int 1)
12290 (const_int 2) (const_int 3)
12291 (const_int 4) (const_int 5)
12292 (const_int 6) (const_int 7)
12293 (const_int 8) (const_int 9)
12294 (const_int 10) (const_int 11)
12295 (const_int 12) (const_int 13)
12296 (const_int 14) (const_int 15)]))
12297 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12299 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12300 [(set_attr "type" "sselog")
12301 (set_attr "prefix_extra" "1")
12302 (set_attr "length_immediate" "1")
12303 (set_attr "prefix" "vex")
12304 (set_attr "mode" "OI")])
12306 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12307 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12309 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12310 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12313 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12314 [(set_attr "type" "sselog1")
12315 (set_attr "prefix_extra" "1")
12316 (set_attr "prefix" "vex")
12317 (set_attr "mode" "<sseinsnmode>")])
12319 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12320 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
12322 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12323 (match_operand:V48_AVX2 2 "register_operand" "x")
12327 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12328 [(set_attr "type" "sselog1")
12329 (set_attr "prefix_extra" "1")
12330 (set_attr "prefix" "vex")
12331 (set_attr "mode" "<sseinsnmode>")])
12333 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12334 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12335 (unspec:AVX256MODE2P
12336 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12340 "&& reload_completed"
12343 rtx op0 = operands[0];
12344 rtx op1 = operands[1];
12346 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12348 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12349 emit_move_insn (op0, op1);
12353 (define_expand "vec_init<mode>"
12354 [(match_operand:V_256 0 "register_operand" "")
12355 (match_operand 1 "" "")]
12358 ix86_expand_vector_init (false, operands[0], operands[1]);
12362 (define_expand "avx2_extracti128"
12363 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12364 (match_operand:V4DI 1 "register_operand" "")
12365 (match_operand:SI 2 "const_0_to_1_operand" "")]
12368 rtx (*insn)(rtx, rtx);
12370 switch (INTVAL (operands[2]))
12373 insn = gen_vec_extract_lo_v4di;
12376 insn = gen_vec_extract_hi_v4di;
12379 gcc_unreachable ();
12382 emit_insn (insn (operands[0], operands[1]));
12386 (define_expand "avx2_inserti128"
12387 [(match_operand:V4DI 0 "register_operand" "")
12388 (match_operand:V4DI 1 "register_operand" "")
12389 (match_operand:V2DI 2 "nonimmediate_operand" "")
12390 (match_operand:SI 3 "const_0_to_1_operand" "")]
12393 rtx (*insn)(rtx, rtx, rtx);
12395 switch (INTVAL (operands[3]))
12398 insn = gen_avx2_vec_set_lo_v4di;
12401 insn = gen_avx2_vec_set_hi_v4di;
12404 gcc_unreachable ();
12407 emit_insn (insn (operands[0], operands[1], operands[2]));
12411 (define_insn "avx2_ashrv<mode>"
12412 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12414 (match_operand:VI4_AVX2 1 "register_operand" "x")
12415 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12417 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12418 [(set_attr "type" "sseishft")
12419 (set_attr "prefix" "vex")
12420 (set_attr "mode" "<sseinsnmode>")])
12422 (define_insn "avx2_<shift_insn>v<mode>"
12423 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12424 (any_lshift:VI48_AVX2
12425 (match_operand:VI48_AVX2 1 "register_operand" "x")
12426 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12428 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12429 [(set_attr "type" "sseishft")
12430 (set_attr "prefix" "vex")
12431 (set_attr "mode" "<sseinsnmode>")])
12433 (define_insn "avx_vec_concat<mode>"
12434 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12436 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12437 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12440 switch (which_alternative)
12443 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12445 switch (get_attr_mode (insn))
12448 return "vmovaps\t{%1, %x0|%x0, %1}";
12450 return "vmovapd\t{%1, %x0|%x0, %1}";
12452 return "vmovdqa\t{%1, %x0|%x0, %1}";
12455 gcc_unreachable ();
12458 [(set_attr "type" "sselog,ssemov")
12459 (set_attr "prefix_extra" "1,*")
12460 (set_attr "length_immediate" "1,*")
12461 (set_attr "prefix" "vex")
12462 (set_attr "mode" "<sseinsnmode>")])
12464 (define_insn "vcvtph2ps"
12465 [(set (match_operand:V4SF 0 "register_operand" "=x")
12467 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12469 (parallel [(const_int 0) (const_int 1)
12470 (const_int 2) (const_int 3)])))]
12472 "vcvtph2ps\t{%1, %0|%0, %1}"
12473 [(set_attr "type" "ssecvt")
12474 (set_attr "prefix" "vex")
12475 (set_attr "mode" "V4SF")])
12477 (define_insn "*vcvtph2ps_load"
12478 [(set (match_operand:V4SF 0 "register_operand" "=x")
12479 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12480 UNSPEC_VCVTPH2PS))]
12482 "vcvtph2ps\t{%1, %0|%0, %1}"
12483 [(set_attr "type" "ssecvt")
12484 (set_attr "prefix" "vex")
12485 (set_attr "mode" "V8SF")])
12487 (define_insn "vcvtph2ps256"
12488 [(set (match_operand:V8SF 0 "register_operand" "=x")
12489 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12490 UNSPEC_VCVTPH2PS))]
12492 "vcvtph2ps\t{%1, %0|%0, %1}"
12493 [(set_attr "type" "ssecvt")
12494 (set_attr "prefix" "vex")
12495 (set_attr "mode" "V8SF")])
12497 (define_expand "vcvtps2ph"
12498 [(set (match_operand:V8HI 0 "register_operand" "")
12500 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12501 (match_operand:SI 2 "const_0_to_255_operand" "")]
12505 "operands[3] = CONST0_RTX (V4HImode);")
12507 (define_insn "*vcvtps2ph"
12508 [(set (match_operand:V8HI 0 "register_operand" "=x")
12510 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12511 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12513 (match_operand:V4HI 3 "const0_operand" "")))]
12515 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12516 [(set_attr "type" "ssecvt")
12517 (set_attr "prefix" "vex")
12518 (set_attr "mode" "V4SF")])
12520 (define_insn "*vcvtps2ph_store"
12521 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12522 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12523 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12524 UNSPEC_VCVTPS2PH))]
12526 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12527 [(set_attr "type" "ssecvt")
12528 (set_attr "prefix" "vex")
12529 (set_attr "mode" "V4SF")])
12531 (define_insn "vcvtps2ph256"
12532 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12533 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12534 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12535 UNSPEC_VCVTPS2PH))]
12537 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12538 [(set_attr "type" "ssecvt")
12539 (set_attr "prefix" "vex")
12540 (set_attr "mode" "V8SF")])
12542 ;; For gather* insn patterns
12543 (define_mode_iterator VEC_GATHER_MODE
12544 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12545 (define_mode_attr VEC_GATHER_IDXSI
12546 [(V2DI "V4SI") (V2DF "V4SI")
12547 (V4DI "V4SI") (V4DF "V4SI")
12548 (V4SI "V4SI") (V4SF "V4SI")
12549 (V8SI "V8SI") (V8SF "V8SI")])
12550 (define_mode_attr VEC_GATHER_IDXDI
12551 [(V2DI "V2DI") (V2DF "V2DI")
12552 (V4DI "V4DI") (V4DF "V4DI")
12553 (V4SI "V2DI") (V4SF "V2DI")
12554 (V8SI "V4DI") (V8SF "V4DI")])
12555 (define_mode_attr VEC_GATHER_SRCDI
12556 [(V2DI "V2DI") (V2DF "V2DF")
12557 (V4DI "V4DI") (V4DF "V4DF")
12558 (V4SI "V4SI") (V4SF "V4SF")
12559 (V8SI "V4SI") (V8SF "V4SF")])
12561 (define_expand "avx2_gathersi<mode>"
12562 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12563 (unspec:VEC_GATHER_MODE
12564 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12565 (mem:<ssescalarmode>
12567 [(match_operand 2 "vsib_address_operand" "")
12568 (match_operand:<VEC_GATHER_IDXSI>
12569 3 "register_operand" "")
12570 (match_operand:SI 5 "const1248_operand " "")]))
12571 (mem:BLK (scratch))
12572 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12574 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12578 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12579 operands[5]), UNSPEC_VSIBADDR);
12582 (define_insn "*avx2_gathersi<mode>"
12583 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12584 (unspec:VEC_GATHER_MODE
12585 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12586 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12588 [(match_operand:P 3 "vsib_address_operand" "p")
12589 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12590 (match_operand:SI 6 "const1248_operand" "n")]
12592 (mem:BLK (scratch))
12593 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12595 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12597 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12598 [(set_attr "type" "ssemov")
12599 (set_attr "prefix" "vex")
12600 (set_attr "mode" "<sseinsnmode>")])
12602 (define_insn "*avx2_gathersi<mode>_2"
12603 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12604 (unspec:VEC_GATHER_MODE
12606 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12608 [(match_operand:P 2 "vsib_address_operand" "p")
12609 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12610 (match_operand:SI 5 "const1248_operand" "n")]
12612 (mem:BLK (scratch))
12613 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12615 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12617 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12618 [(set_attr "type" "ssemov")
12619 (set_attr "prefix" "vex")
12620 (set_attr "mode" "<sseinsnmode>")])
12622 (define_expand "avx2_gatherdi<mode>"
12623 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12624 (unspec:VEC_GATHER_MODE
12625 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12626 (mem:<ssescalarmode>
12628 [(match_operand 2 "vsib_address_operand" "")
12629 (match_operand:<VEC_GATHER_IDXDI>
12630 3 "register_operand" "")
12631 (match_operand:SI 5 "const1248_operand " "")]))
12632 (mem:BLK (scratch))
12633 (match_operand:<VEC_GATHER_SRCDI>
12634 4 "register_operand" "")]
12636 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12640 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12641 operands[5]), UNSPEC_VSIBADDR);
12644 (define_insn "*avx2_gatherdi<mode>"
12645 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12646 (unspec:VEC_GATHER_MODE
12647 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12648 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12650 [(match_operand:P 3 "vsib_address_operand" "p")
12651 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12652 (match_operand:SI 6 "const1248_operand" "n")]
12654 (mem:BLK (scratch))
12655 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12657 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12659 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12660 [(set_attr "type" "ssemov")
12661 (set_attr "prefix" "vex")
12662 (set_attr "mode" "<sseinsnmode>")])
12664 (define_insn "*avx2_gatherdi<mode>_2"
12665 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12666 (unspec:VEC_GATHER_MODE
12668 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12670 [(match_operand:P 2 "vsib_address_operand" "p")
12671 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12672 (match_operand:SI 5 "const1248_operand" "n")]
12674 (mem:BLK (scratch))
12675 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12677 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12680 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12681 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12682 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12684 [(set_attr "type" "ssemov")
12685 (set_attr "prefix" "vex")
12686 (set_attr "mode" "<sseinsnmode>")])
12688 (define_insn "*avx2_gatherdi<mode>_3"
12689 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12690 (vec_select:<VEC_GATHER_SRCDI>
12692 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12693 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12695 [(match_operand:P 3 "vsib_address_operand" "p")
12696 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12697 (match_operand:SI 6 "const1248_operand" "n")]
12699 (mem:BLK (scratch))
12700 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12702 (parallel [(const_int 0) (const_int 1)
12703 (const_int 2) (const_int 3)])))
12704 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12706 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
12707 [(set_attr "type" "ssemov")
12708 (set_attr "prefix" "vex")
12709 (set_attr "mode" "<sseinsnmode>")])
12711 (define_insn "*avx2_gatherdi<mode>_4"
12712 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12713 (vec_select:<VEC_GATHER_SRCDI>
12716 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12718 [(match_operand:P 2 "vsib_address_operand" "p")
12719 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12720 (match_operand:SI 5 "const1248_operand" "n")]
12722 (mem:BLK (scratch))
12723 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12725 (parallel [(const_int 0) (const_int 1)
12726 (const_int 2) (const_int 3)])))
12727 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12729 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
12730 [(set_attr "type" "ssemov")
12731 (set_attr "prefix" "vex")
12732 (set_attr "mode" "<sseinsnmode>")])