Merge branch 'vendor/GCC47'
[dragonfly.git] / contrib / gcc-4.7 / gcc / config / i386 / sse.md
CommitLineData
e4b17023
JM
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_c_enum "unspec" [
22 ;; SSE
23 UNSPEC_MOVNT
5ce9237c
JM
24 UNSPEC_LOADU
25 UNSPEC_STOREU
e4b17023
JM
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSHUFB
32 UNSPEC_PSIGN
33 UNSPEC_PALIGNR
34
35 ;; For SSE4A support
36 UNSPEC_EXTRQI
37 UNSPEC_EXTRQ
38 UNSPEC_INSERTQI
39 UNSPEC_INSERTQ
40
41 ;; For SSE4.1 support
42 UNSPEC_BLENDV
43 UNSPEC_INSERTPS
44 UNSPEC_DP
45 UNSPEC_MOVNTDQA
46 UNSPEC_MPSADBW
47 UNSPEC_PHMINPOSUW
48 UNSPEC_PTEST
49
50 ;; For SSE4.2 support
51 UNSPEC_PCMPESTR
52 UNSPEC_PCMPISTR
53
54 ;; For FMA4 support
55 UNSPEC_FMADDSUB
56 UNSPEC_XOP_UNSIGNED_CMP
57 UNSPEC_XOP_TRUEFALSE
58 UNSPEC_XOP_PERMUTE
59 UNSPEC_FRCZ
60
61 ;; For AES support
62 UNSPEC_AESENC
63 UNSPEC_AESENCLAST
64 UNSPEC_AESDEC
65 UNSPEC_AESDECLAST
66 UNSPEC_AESIMC
67 UNSPEC_AESKEYGENASSIST
68
69 ;; For PCLMUL support
70 UNSPEC_PCLMUL
71
72 ;; For AVX support
73 UNSPEC_PCMP
74 UNSPEC_VPERMIL
75 UNSPEC_VPERMIL2
76 UNSPEC_VPERMIL2F128
77 UNSPEC_CAST
78 UNSPEC_VTESTP
79 UNSPEC_VCVTPH2PS
80 UNSPEC_VCVTPS2PH
81
82 ;; For AVX2 support
83 UNSPEC_VPERMSI
84 UNSPEC_VPERMDF
85 UNSPEC_VPERMSF
86 UNSPEC_VPERMTI
87 UNSPEC_GATHER
88 UNSPEC_VSIBADDR
89])
90
91(define_c_enum "unspecv" [
92 UNSPECV_LDMXCSR
93 UNSPECV_STMXCSR
94 UNSPECV_CLFLUSH
95 UNSPECV_MONITOR
96 UNSPECV_MWAIT
97 UNSPECV_VZEROALL
98 UNSPECV_VZEROUPPER
99])
100
101;; All vector modes including V?TImode, used in move patterns.
102(define_mode_iterator V16
103 [(V32QI "TARGET_AVX") V16QI
104 (V16HI "TARGET_AVX") V8HI
105 (V8SI "TARGET_AVX") V4SI
106 (V4DI "TARGET_AVX") V2DI
107 (V2TI "TARGET_AVX") V1TI
108 (V8SF "TARGET_AVX") V4SF
109 (V4DF "TARGET_AVX") V2DF])
110
111;; All vector modes
112(define_mode_iterator V
113 [(V32QI "TARGET_AVX") V16QI
114 (V16HI "TARGET_AVX") V8HI
115 (V8SI "TARGET_AVX") V4SI
116 (V4DI "TARGET_AVX") V2DI
117 (V8SF "TARGET_AVX") V4SF
118 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
119
120;; All 128bit vector modes
121(define_mode_iterator V_128
122 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
123
124;; All 256bit vector modes
125(define_mode_iterator V_256
126 [V32QI V16HI V8SI V4DI V8SF V4DF])
127
128;; All vector float modes
129(define_mode_iterator VF
130 [(V8SF "TARGET_AVX") V4SF
131 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
132
133;; All SFmode vector float modes
134(define_mode_iterator VF1
135 [(V8SF "TARGET_AVX") V4SF])
136
137;; All DFmode vector float modes
138(define_mode_iterator VF2
139 [(V4DF "TARGET_AVX") V2DF])
140
141;; All 128bit vector float modes
142(define_mode_iterator VF_128
143 [V4SF (V2DF "TARGET_SSE2")])
144
145;; All 256bit vector float modes
146(define_mode_iterator VF_256
147 [V8SF V4DF])
148
149;; All vector integer modes
150(define_mode_iterator VI
151 [(V32QI "TARGET_AVX") V16QI
152 (V16HI "TARGET_AVX") V8HI
153 (V8SI "TARGET_AVX") V4SI
154 (V4DI "TARGET_AVX") V2DI])
155
156(define_mode_iterator VI_AVX2
157 [(V32QI "TARGET_AVX2") V16QI
158 (V16HI "TARGET_AVX2") V8HI
159 (V8SI "TARGET_AVX2") V4SI
160 (V4DI "TARGET_AVX2") V2DI])
161
162;; All QImode vector integer modes
163(define_mode_iterator VI1
164 [(V32QI "TARGET_AVX") V16QI])
165
166;; All DImode vector integer modes
167(define_mode_iterator VI8
168 [(V4DI "TARGET_AVX") V2DI])
169
170(define_mode_iterator VI1_AVX2
171 [(V32QI "TARGET_AVX2") V16QI])
172
173(define_mode_iterator VI2_AVX2
174 [(V16HI "TARGET_AVX2") V8HI])
175
176(define_mode_iterator VI4_AVX2
177 [(V8SI "TARGET_AVX2") V4SI])
178
179(define_mode_iterator VI8_AVX2
180 [(V4DI "TARGET_AVX2") V2DI])
181
182;; ??? We should probably use TImode instead.
183(define_mode_iterator VIMAX_AVX2
184 [(V2TI "TARGET_AVX2") V1TI])
185
186;; ??? This should probably be dropped in favor of VIMAX_AVX2.
187(define_mode_iterator SSESCALARMODE
188 [(V2TI "TARGET_AVX2") TI])
189
190(define_mode_iterator VI12_AVX2
191 [(V32QI "TARGET_AVX2") V16QI
192 (V16HI "TARGET_AVX2") V8HI])
193
194(define_mode_iterator VI24_AVX2
195 [(V16HI "TARGET_AVX2") V8HI
196 (V8SI "TARGET_AVX2") V4SI])
197
198(define_mode_iterator VI124_AVX2
199 [(V32QI "TARGET_AVX2") V16QI
200 (V16HI "TARGET_AVX2") V8HI
201 (V8SI "TARGET_AVX2") V4SI])
202
203(define_mode_iterator VI248_AVX2
204 [(V16HI "TARGET_AVX2") V8HI
205 (V8SI "TARGET_AVX2") V4SI
206 (V4DI "TARGET_AVX2") V2DI])
207
208(define_mode_iterator VI48_AVX2
209 [(V8SI "TARGET_AVX2") V4SI
210 (V4DI "TARGET_AVX2") V2DI])
211
212(define_mode_iterator V48_AVX2
213 [V4SF V2DF
214 V8SF V4DF
215 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
216 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
217
218(define_mode_attr sse2_avx2
219 [(V16QI "sse2") (V32QI "avx2")
220 (V8HI "sse2") (V16HI "avx2")
221 (V4SI "sse2") (V8SI "avx2")
222 (V2DI "sse2") (V4DI "avx2")
223 (V1TI "sse2") (V2TI "avx2")])
224
225(define_mode_attr ssse3_avx2
226 [(V16QI "ssse3") (V32QI "avx2")
227 (V8HI "ssse3") (V16HI "avx2")
228 (V4SI "ssse3") (V8SI "avx2")
229 (V2DI "ssse3") (V4DI "avx2")
230 (TI "ssse3") (V2TI "avx2")])
231
232(define_mode_attr sse4_1_avx2
233 [(V16QI "sse4_1") (V32QI "avx2")
234 (V8HI "sse4_1") (V16HI "avx2")
235 (V4SI "sse4_1") (V8SI "avx2")
236 (V2DI "sse4_1") (V4DI "avx2")])
237
238(define_mode_attr avx_avx2
239 [(V4SF "avx") (V2DF "avx")
240 (V8SF "avx") (V4DF "avx")
241 (V4SI "avx2") (V2DI "avx2")
242 (V8SI "avx2") (V4DI "avx2")])
243
244(define_mode_attr vec_avx2
245 [(V16QI "vec") (V32QI "avx2")
246 (V8HI "vec") (V16HI "avx2")
247 (V4SI "vec") (V8SI "avx2")
248 (V2DI "vec") (V4DI "avx2")])
249
250(define_mode_attr ssedoublemode
251 [(V16HI "V16SI") (V8HI "V8SI")])
252
253(define_mode_attr ssebytemode
254 [(V4DI "V32QI") (V2DI "V16QI")])
255
256;; All 128bit vector integer modes
257(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
258
259;; All 256bit vector integer modes
260(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
261
262;; Random 128bit vector integer mode combinations
263(define_mode_iterator VI12_128 [V16QI V8HI])
264(define_mode_iterator VI14_128 [V16QI V4SI])
265(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
266(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
267(define_mode_iterator VI24_128 [V8HI V4SI])
268(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
269(define_mode_iterator VI48_128 [V4SI V2DI])
270
271;; Random 256bit vector integer mode combinations
272(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
273(define_mode_iterator VI48_256 [V8SI V4DI])
274
275;; Int-float size matches
276(define_mode_iterator VI4F_128 [V4SI V4SF])
277(define_mode_iterator VI8F_128 [V2DI V2DF])
278(define_mode_iterator VI4F_256 [V8SI V8SF])
279(define_mode_iterator VI8F_256 [V4DI V4DF])
280
281;; Mapping from float mode to required SSE level
282(define_mode_attr sse
283 [(SF "sse") (DF "sse2")
284 (V4SF "sse") (V2DF "sse2")
285 (V8SF "avx") (V4DF "avx")])
286
287(define_mode_attr sse2
288 [(V16QI "sse2") (V32QI "avx")
289 (V2DI "sse2") (V4DI "avx")])
290
291(define_mode_attr sse3
292 [(V16QI "sse3") (V32QI "avx")])
293
294(define_mode_attr sse4_1
295 [(V4SF "sse4_1") (V2DF "sse4_1")
296 (V8SF "avx") (V4DF "avx")])
297
298(define_mode_attr avxsizesuffix
299 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
300 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
301 (V8SF "256") (V4DF "256")
302 (V4SF "") (V2DF "")])
303
304;; SSE instruction mode
305(define_mode_attr sseinsnmode
306 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
307 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
308 (V8SF "V8SF") (V4DF "V4DF")
309 (V4SF "V4SF") (V2DF "V2DF")
310 (TI "TI")])
311
312;; Mapping of vector float modes to an integer mode of the same size
313(define_mode_attr sseintvecmode
314 [(V8SF "V8SI") (V4DF "V4DI")
315 (V4SF "V4SI") (V2DF "V2DI")
316 (V8SI "V8SI") (V4DI "V4DI")
317 (V4SI "V4SI") (V2DI "V2DI")
318 (V16HI "V16HI") (V8HI "V8HI")
319 (V32QI "V32QI") (V16QI "V16QI")])
320
321(define_mode_attr sseintvecmodelower
322 [(V8SF "v8si") (V4DF "v4di")
323 (V4SF "v4si") (V2DF "v2di")
324 (V8SI "v8si") (V4DI "v4di")
325 (V4SI "v4si") (V2DI "v2di")
326 (V16HI "v16hi") (V8HI "v8hi")
327 (V32QI "v32qi") (V16QI "v16qi")])
328
329;; Mapping of vector modes to a vector mode of double size
330(define_mode_attr ssedoublevecmode
331 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
332 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
333 (V8SF "V16SF") (V4DF "V8DF")
334 (V4SF "V8SF") (V2DF "V4DF")])
335
336;; Mapping of vector modes to a vector mode of half size
337(define_mode_attr ssehalfvecmode
338 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
339 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
340 (V8SF "V4SF") (V4DF "V2DF")
341 (V4SF "V2SF")])
342
343;; Mapping of vector modes back to the scalar modes
344(define_mode_attr ssescalarmode
345 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
346 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
347 (V8SF "SF") (V4DF "DF")
348 (V4SF "SF") (V2DF "DF")])
349
350;; Number of scalar elements in each vector type
351(define_mode_attr ssescalarnum
352 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
353 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
354 (V8SF "8") (V4DF "4")
355 (V4SF "4") (V2DF "2")])
356
357;; SSE prefix for integer vector modes
358(define_mode_attr sseintprefix
359 [(V2DI "p") (V2DF "")
360 (V4DI "p") (V4DF "")
361 (V4SI "p") (V4SF "")
362 (V8SI "p") (V8SF "")])
363
364;; SSE scalar suffix for vector modes
365(define_mode_attr ssescalarmodesuffix
366 [(SF "ss") (DF "sd")
367 (V8SF "ss") (V4DF "sd")
368 (V4SF "ss") (V2DF "sd")
369 (V8SI "ss") (V4DI "sd")
370 (V4SI "d")])
371
372;; Pack/unpack vector modes
373(define_mode_attr sseunpackmode
374 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
375 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
376
377(define_mode_attr ssepackmode
378 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
379 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
380
381;; Mapping of the max integer size for xop rotate immediate constraint
382(define_mode_attr sserotatemax
383 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
384
385;; Mapping of mode to cast intrinsic name
386(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
387
388;; Instruction suffix for sign and zero extensions.
389(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
390
391;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
392(define_mode_attr i128
393 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
394 (V8SI "%~128") (V4DI "%~128")])
395
396;; Mix-n-match
397(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
398
399;; Mapping of immediate bits for blend instructions
400(define_mode_attr blendbits
401 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
402
403;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
404
405;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
406;;
407;; Move patterns
408;;
409;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410
411;; All of these patterns are enabled for SSE1 as well as SSE2.
412;; This is essential for maintaining stable calling conventions.
413
414(define_expand "mov<mode>"
415 [(set (match_operand:V16 0 "nonimmediate_operand" "")
416 (match_operand:V16 1 "nonimmediate_operand" ""))]
417 "TARGET_SSE"
418{
419 ix86_expand_vector_move (<MODE>mode, operands);
420 DONE;
421})
422
423(define_insn "*mov<mode>_internal"
424 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
425 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
426 "TARGET_SSE
427 && (register_operand (operands[0], <MODE>mode)
428 || register_operand (operands[1], <MODE>mode))"
429{
430 switch (which_alternative)
431 {
432 case 0:
433 return standard_sse_constant_opcode (insn, operands[1]);
434 case 1:
435 case 2:
436 switch (get_attr_mode (insn))
437 {
438 case MODE_V8SF:
439 case MODE_V4SF:
440 if (TARGET_AVX
441 && (misaligned_operand (operands[0], <MODE>mode)
442 || misaligned_operand (operands[1], <MODE>mode)))
443 return "vmovups\t{%1, %0|%0, %1}";
444 else
445 return "%vmovaps\t{%1, %0|%0, %1}";
446
447 case MODE_V4DF:
448 case MODE_V2DF:
449 if (TARGET_AVX
450 && (misaligned_operand (operands[0], <MODE>mode)
451 || misaligned_operand (operands[1], <MODE>mode)))
452 return "vmovupd\t{%1, %0|%0, %1}";
453 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
454 return "%vmovaps\t{%1, %0|%0, %1}";
455 else
456 return "%vmovapd\t{%1, %0|%0, %1}";
457
458 case MODE_OI:
459 case MODE_TI:
460 if (TARGET_AVX
461 && (misaligned_operand (operands[0], <MODE>mode)
462 || misaligned_operand (operands[1], <MODE>mode)))
463 return "vmovdqu\t{%1, %0|%0, %1}";
464 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
465 return "%vmovaps\t{%1, %0|%0, %1}";
466 else
467 return "%vmovdqa\t{%1, %0|%0, %1}";
468
469 default:
470 gcc_unreachable ();
471 }
472 default:
473 gcc_unreachable ();
474 }
475}
476 [(set_attr "type" "sselog1,ssemov,ssemov")
477 (set_attr "prefix" "maybe_vex")
478 (set (attr "mode")
479 (cond [(match_test "TARGET_AVX")
480 (const_string "<sseinsnmode>")
481 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
482 (not (match_test "TARGET_SSE2")))
483 (and (eq_attr "alternative" "2")
484 (match_test "TARGET_SSE_TYPELESS_STORES")))
485 (const_string "V4SF")
486 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
487 (const_string "V4SF")
488 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
489 (const_string "V2DF")
490 ]
491 (const_string "TI")))])
492
493(define_insn "sse2_movq128"
494 [(set (match_operand:V2DI 0 "register_operand" "=x")
495 (vec_concat:V2DI
496 (vec_select:DI
497 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
498 (parallel [(const_int 0)]))
499 (const_int 0)))]
500 "TARGET_SSE2"
501 "%vmovq\t{%1, %0|%0, %1}"
502 [(set_attr "type" "ssemov")
503 (set_attr "prefix" "maybe_vex")
504 (set_attr "mode" "TI")])
505
506;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
507;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
508;; from memory, we'd prefer to load the memory directly into the %xmm
509;; register. To facilitate this happy circumstance, this pattern won't
510;; split until after register allocation. If the 64-bit value didn't
511;; come from memory, this is the best we can do. This is much better
512;; than storing %edx:%eax into a stack temporary and loading an %xmm
513;; from there.
514
515(define_insn_and_split "movdi_to_sse"
516 [(parallel
517 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
518 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
519 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
520 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
521 "#"
522 "&& reload_completed"
523 [(const_int 0)]
524{
525 if (register_operand (operands[1], DImode))
526 {
527 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
528 Assemble the 64-bit DImode value in an xmm register. */
529 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
530 gen_rtx_SUBREG (SImode, operands[1], 0)));
531 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
532 gen_rtx_SUBREG (SImode, operands[1], 4)));
533 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
534 operands[2]));
535 }
536 else if (memory_operand (operands[1], DImode))
537 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
538 operands[1], const0_rtx));
539 else
540 gcc_unreachable ();
541})
542
543(define_split
544 [(set (match_operand:V4SF 0 "register_operand" "")
545 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
546 "TARGET_SSE && reload_completed"
547 [(set (match_dup 0)
548 (vec_merge:V4SF
549 (vec_duplicate:V4SF (match_dup 1))
550 (match_dup 2)
551 (const_int 1)))]
552{
553 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
554 operands[2] = CONST0_RTX (V4SFmode);
555})
556
557(define_split
558 [(set (match_operand:V2DF 0 "register_operand" "")
559 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
560 "TARGET_SSE2 && reload_completed"
561 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
562{
563 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
564 operands[2] = CONST0_RTX (DFmode);
565})
566
567(define_expand "push<mode>1"
568 [(match_operand:V16 0 "register_operand" "")]
569 "TARGET_SSE"
570{
571 ix86_expand_push (<MODE>mode, operands[0]);
572 DONE;
573})
574
575(define_expand "movmisalign<mode>"
576 [(set (match_operand:V16 0 "nonimmediate_operand" "")
577 (match_operand:V16 1 "nonimmediate_operand" ""))]
578 "TARGET_SSE"
579{
580 ix86_expand_vector_move_misalign (<MODE>mode, operands);
581 DONE;
582})
583
5ce9237c
JM
584(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
585 [(set (match_operand:VF 0 "register_operand" "=x")
e4b17023 586 (unspec:VF
5ce9237c
JM
587 [(match_operand:VF 1 "memory_operand" "m")]
588 UNSPEC_LOADU))]
589 "TARGET_SSE"
e4b17023
JM
590 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
591 [(set_attr "type" "ssemov")
592 (set_attr "movu" "1")
593 (set_attr "prefix" "maybe_vex")
594 (set_attr "mode" "<MODE>")])
595
5ce9237c
JM
596(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
597 [(set (match_operand:VF 0 "memory_operand" "=m")
598 (unspec:VF
599 [(match_operand:VF 1 "register_operand" "x")]
600 UNSPEC_STOREU))]
601 "TARGET_SSE"
602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssemov")
604 (set_attr "movu" "1")
605 (set_attr "prefix" "maybe_vex")
606 (set_attr "mode" "<MODE>")])
607
608(define_insn "<sse2>_loaddqu<avxsizesuffix>"
609 [(set (match_operand:VI1 0 "register_operand" "=x")
610 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
611 UNSPEC_LOADU))]
612 "TARGET_SSE2"
613 "%vmovdqu\t{%1, %0|%0, %1}"
614 [(set_attr "type" "ssemov")
615 (set_attr "movu" "1")
616 (set (attr "prefix_data16")
617 (if_then_else
618 (match_test "TARGET_AVX")
619 (const_string "*")
620 (const_string "1")))
621 (set_attr "prefix" "maybe_vex")
622 (set_attr "mode" "<sseinsnmode>")])
623
624(define_insn "<sse2>_storedqu<avxsizesuffix>"
625 [(set (match_operand:VI1 0 "memory_operand" "=m")
626 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
627 UNSPEC_STOREU))]
628 "TARGET_SSE2"
e4b17023
JM
629 "%vmovdqu\t{%1, %0|%0, %1}"
630 [(set_attr "type" "ssemov")
631 (set_attr "movu" "1")
632 (set (attr "prefix_data16")
633 (if_then_else
634 (match_test "TARGET_AVX")
635 (const_string "*")
636 (const_string "1")))
637 (set_attr "prefix" "maybe_vex")
638 (set_attr "mode" "<sseinsnmode>")])
639
640(define_insn "<sse3>_lddqu<avxsizesuffix>"
641 [(set (match_operand:VI1 0 "register_operand" "=x")
642 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
643 UNSPEC_LDDQU))]
644 "TARGET_SSE3"
645 "%vlddqu\t{%1, %0|%0, %1}"
646 [(set_attr "type" "ssemov")
647 (set_attr "movu" "1")
648 (set (attr "prefix_data16")
649 (if_then_else
650 (match_test "TARGET_AVX")
651 (const_string "*")
652 (const_string "0")))
653 (set (attr "prefix_rep")
654 (if_then_else
655 (match_test "TARGET_AVX")
656 (const_string "*")
657 (const_string "1")))
658 (set_attr "prefix" "maybe_vex")
659 (set_attr "mode" "<sseinsnmode>")])
660
661(define_insn "sse2_movnti<mode>"
662 [(set (match_operand:SWI48 0 "memory_operand" "=m")
663 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
664 UNSPEC_MOVNT))]
665 "TARGET_SSE2"
666 "movnti\t{%1, %0|%0, %1}"
667 [(set_attr "type" "ssemov")
668 (set_attr "prefix_data16" "0")
669 (set_attr "mode" "<MODE>")])
670
671(define_insn "<sse>_movnt<mode>"
672 [(set (match_operand:VF 0 "memory_operand" "=m")
673 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
674 UNSPEC_MOVNT))]
675 "TARGET_SSE"
676 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
677 [(set_attr "type" "ssemov")
678 (set_attr "prefix" "maybe_vex")
679 (set_attr "mode" "<MODE>")])
680
681(define_insn "<sse2>_movnt<mode>"
682 [(set (match_operand:VI8 0 "memory_operand" "=m")
683 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
684 UNSPEC_MOVNT))]
685 "TARGET_SSE2"
686 "%vmovntdq\t{%1, %0|%0, %1}"
687 [(set_attr "type" "ssecvt")
688 (set (attr "prefix_data16")
689 (if_then_else
690 (match_test "TARGET_AVX")
691 (const_string "*")
692 (const_string "1")))
693 (set_attr "prefix" "maybe_vex")
694 (set_attr "mode" "<sseinsnmode>")])
695
696; Expand patterns for non-temporal stores. At the moment, only those
697; that directly map to insns are defined; it would be possible to
698; define patterns for other modes that would expand to several insns.
699
700;; Modes handled by storent patterns.
701(define_mode_iterator STORENT_MODE
702 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
703 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
704 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
705 (V8SF "TARGET_AVX") V4SF
706 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
707
708(define_expand "storent<mode>"
709 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
710 (unspec:STORENT_MODE
711 [(match_operand:STORENT_MODE 1 "register_operand" "")]
712 UNSPEC_MOVNT))]
713 "TARGET_SSE")
714
715;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
716;;
717;; Parallel floating point arithmetic
718;;
719;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720
721(define_expand "<code><mode>2"
722 [(set (match_operand:VF 0 "register_operand" "")
723 (absneg:VF
724 (match_operand:VF 1 "register_operand" "")))]
725 "TARGET_SSE"
726 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
727
728(define_insn_and_split "*absneg<mode>2"
729 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
730 (match_operator:VF 3 "absneg_operator"
731 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
732 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
733 "TARGET_SSE"
734 "#"
735 "&& reload_completed"
736 [(const_int 0)]
737{
738 enum rtx_code absneg_op;
739 rtx op1, op2;
740 rtx t;
741
742 if (TARGET_AVX)
743 {
744 if (MEM_P (operands[1]))
745 op1 = operands[2], op2 = operands[1];
746 else
747 op1 = operands[1], op2 = operands[2];
748 }
749 else
750 {
751 op1 = operands[0];
752 if (rtx_equal_p (operands[0], operands[1]))
753 op2 = operands[2];
754 else
755 op2 = operands[1];
756 }
757
758 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
759 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
760 t = gen_rtx_SET (VOIDmode, operands[0], t);
761 emit_insn (t);
762 DONE;
763}
764 [(set_attr "isa" "noavx,noavx,avx,avx")])
765
766(define_expand "<plusminus_insn><mode>3"
767 [(set (match_operand:VF 0 "register_operand" "")
768 (plusminus:VF
769 (match_operand:VF 1 "nonimmediate_operand" "")
770 (match_operand:VF 2 "nonimmediate_operand" "")))]
771 "TARGET_SSE"
772 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
773
774(define_insn "*<plusminus_insn><mode>3"
775 [(set (match_operand:VF 0 "register_operand" "=x,x")
776 (plusminus:VF
777 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
778 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
779 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
780 "@
781 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
782 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
783 [(set_attr "isa" "noavx,avx")
784 (set_attr "type" "sseadd")
785 (set_attr "prefix" "orig,vex")
786 (set_attr "mode" "<MODE>")])
787
788(define_insn "<sse>_vm<plusminus_insn><mode>3"
789 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
790 (vec_merge:VF_128
791 (plusminus:VF_128
792 (match_operand:VF_128 1 "register_operand" "0,x")
793 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
794 (match_dup 1)
795 (const_int 1)))]
796 "TARGET_SSE"
797 "@
798 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
799 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
800 [(set_attr "isa" "noavx,avx")
801 (set_attr "type" "sseadd")
802 (set_attr "prefix" "orig,vex")
803 (set_attr "mode" "<ssescalarmode>")])
804
805(define_expand "mul<mode>3"
806 [(set (match_operand:VF 0 "register_operand" "")
807 (mult:VF
808 (match_operand:VF 1 "nonimmediate_operand" "")
809 (match_operand:VF 2 "nonimmediate_operand" "")))]
810 "TARGET_SSE"
811 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
812
813(define_insn "*mul<mode>3"
814 [(set (match_operand:VF 0 "register_operand" "=x,x")
815 (mult:VF
816 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
817 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
818 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
819 "@
820 mul<ssemodesuffix>\t{%2, %0|%0, %2}
821 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
822 [(set_attr "isa" "noavx,avx")
823 (set_attr "type" "ssemul")
824 (set_attr "prefix" "orig,vex")
825 (set_attr "mode" "<MODE>")])
826
827(define_insn "<sse>_vmmul<mode>3"
828 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
829 (vec_merge:VF_128
830 (mult:VF_128
831 (match_operand:VF_128 1 "register_operand" "0,x")
832 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
833 (match_dup 1)
834 (const_int 1)))]
835 "TARGET_SSE"
836 "@
837 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
838 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
839 [(set_attr "isa" "noavx,avx")
840 (set_attr "type" "ssemul")
841 (set_attr "prefix" "orig,vex")
842 (set_attr "mode" "<ssescalarmode>")])
843
844(define_expand "div<mode>3"
845 [(set (match_operand:VF2 0 "register_operand" "")
846 (div:VF2 (match_operand:VF2 1 "register_operand" "")
847 (match_operand:VF2 2 "nonimmediate_operand" "")))]
848 "TARGET_SSE2"
849 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
850
851(define_expand "div<mode>3"
852 [(set (match_operand:VF1 0 "register_operand" "")
853 (div:VF1 (match_operand:VF1 1 "register_operand" "")
854 (match_operand:VF1 2 "nonimmediate_operand" "")))]
855 "TARGET_SSE"
856{
857 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
858
859 if (TARGET_SSE_MATH
860 && TARGET_RECIP_VEC_DIV
861 && !optimize_insn_for_size_p ()
862 && flag_finite_math_only && !flag_trapping_math
863 && flag_unsafe_math_optimizations)
864 {
865 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
866 DONE;
867 }
868})
869
870(define_insn "<sse>_div<mode>3"
871 [(set (match_operand:VF 0 "register_operand" "=x,x")
872 (div:VF
873 (match_operand:VF 1 "register_operand" "0,x")
874 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
875 "TARGET_SSE"
876 "@
877 div<ssemodesuffix>\t{%2, %0|%0, %2}
878 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
879 [(set_attr "isa" "noavx,avx")
880 (set_attr "type" "ssediv")
881 (set_attr "prefix" "orig,vex")
882 (set_attr "mode" "<MODE>")])
883
884(define_insn "<sse>_vmdiv<mode>3"
885 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
886 (vec_merge:VF_128
887 (div:VF_128
888 (match_operand:VF_128 1 "register_operand" "0,x")
889 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
890 (match_dup 1)
891 (const_int 1)))]
892 "TARGET_SSE"
893 "@
894 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
895 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
896 [(set_attr "isa" "noavx,avx")
897 (set_attr "type" "ssediv")
898 (set_attr "prefix" "orig,vex")
899 (set_attr "mode" "<ssescalarmode>")])
900
901(define_insn "<sse>_rcp<mode>2"
902 [(set (match_operand:VF1 0 "register_operand" "=x")
903 (unspec:VF1
904 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
905 "TARGET_SSE"
906 "%vrcpps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "atom_sse_attr" "rcp")
909 (set_attr "prefix" "maybe_vex")
910 (set_attr "mode" "<MODE>")])
911
912(define_insn "sse_vmrcpv4sf2"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914 (vec_merge:V4SF
915 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
916 UNSPEC_RCP)
917 (match_operand:V4SF 2 "register_operand" "0,x")
918 (const_int 1)))]
919 "TARGET_SSE"
920 "@
921 rcpss\t{%1, %0|%0, %1}
922 vrcpss\t{%1, %2, %0|%0, %2, %1}"
923 [(set_attr "isa" "noavx,avx")
924 (set_attr "type" "sse")
925 (set_attr "atom_sse_attr" "rcp")
926 (set_attr "prefix" "orig,vex")
927 (set_attr "mode" "SF")])
928
929(define_expand "sqrt<mode>2"
930 [(set (match_operand:VF2 0 "register_operand" "")
931 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
932 "TARGET_SSE2")
933
934(define_expand "sqrt<mode>2"
935 [(set (match_operand:VF1 0 "register_operand" "")
936 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
937 "TARGET_SSE"
938{
939 if (TARGET_SSE_MATH
940 && TARGET_RECIP_VEC_SQRT
941 && !optimize_insn_for_size_p ()
942 && flag_finite_math_only && !flag_trapping_math
943 && flag_unsafe_math_optimizations)
944 {
945 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
946 DONE;
947 }
948})
949
950(define_insn "<sse>_sqrt<mode>2"
951 [(set (match_operand:VF 0 "register_operand" "=x")
952 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
953 "TARGET_SSE"
954 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
955 [(set_attr "type" "sse")
956 (set_attr "atom_sse_attr" "sqrt")
957 (set_attr "prefix" "maybe_vex")
958 (set_attr "mode" "<MODE>")])
959
960(define_insn "<sse>_vmsqrt<mode>2"
961 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
962 (vec_merge:VF_128
963 (sqrt:VF_128
964 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
965 (match_operand:VF_128 2 "register_operand" "0,x")
966 (const_int 1)))]
967 "TARGET_SSE"
968 "@
969 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
970 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
971 [(set_attr "isa" "noavx,avx")
972 (set_attr "type" "sse")
973 (set_attr "atom_sse_attr" "sqrt")
974 (set_attr "prefix" "orig,vex")
975 (set_attr "mode" "<ssescalarmode>")])
976
977(define_expand "rsqrt<mode>2"
978 [(set (match_operand:VF1 0 "register_operand" "")
979 (unspec:VF1
980 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
981 "TARGET_SSE_MATH"
982{
983 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
984 DONE;
985})
986
987(define_insn "<sse>_rsqrt<mode>2"
988 [(set (match_operand:VF1 0 "register_operand" "=x")
989 (unspec:VF1
990 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
991 "TARGET_SSE"
992 "%vrsqrtps\t{%1, %0|%0, %1}"
993 [(set_attr "type" "sse")
994 (set_attr "prefix" "maybe_vex")
995 (set_attr "mode" "<MODE>")])
996
997(define_insn "sse_vmrsqrtv4sf2"
998 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
999 (vec_merge:V4SF
1000 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1001 UNSPEC_RSQRT)
1002 (match_operand:V4SF 2 "register_operand" "0,x")
1003 (const_int 1)))]
1004 "TARGET_SSE"
1005 "@
1006 rsqrtss\t{%1, %0|%0, %1}
1007 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1008 [(set_attr "isa" "noavx,avx")
1009 (set_attr "type" "sse")
1010 (set_attr "prefix" "orig,vex")
1011 (set_attr "mode" "SF")])
1012
1013;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1014;; isn't really correct, as those rtl operators aren't defined when
1015;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1016
1017(define_expand "<code><mode>3"
1018 [(set (match_operand:VF 0 "register_operand" "")
1019 (smaxmin:VF
1020 (match_operand:VF 1 "nonimmediate_operand" "")
1021 (match_operand:VF 2 "nonimmediate_operand" "")))]
1022 "TARGET_SSE"
1023{
1024 if (!flag_finite_math_only)
1025 operands[1] = force_reg (<MODE>mode, operands[1]);
1026 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1027})
1028
1029(define_insn "*<code><mode>3_finite"
1030 [(set (match_operand:VF 0 "register_operand" "=x,x")
1031 (smaxmin:VF
1032 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1033 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1034 "TARGET_SSE && flag_finite_math_only
1035 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1036 "@
1037 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1038 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1039 [(set_attr "isa" "noavx,avx")
1040 (set_attr "type" "sseadd")
1041 (set_attr "prefix" "orig,vex")
1042 (set_attr "mode" "<MODE>")])
1043
1044(define_insn "*<code><mode>3"
1045 [(set (match_operand:VF 0 "register_operand" "=x,x")
1046 (smaxmin:VF
1047 (match_operand:VF 1 "register_operand" "0,x")
1048 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1049 "TARGET_SSE && !flag_finite_math_only"
1050 "@
1051 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1052 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1053 [(set_attr "isa" "noavx,avx")
1054 (set_attr "type" "sseadd")
1055 (set_attr "prefix" "orig,vex")
1056 (set_attr "mode" "<MODE>")])
1057
1058(define_insn "<sse>_vm<code><mode>3"
1059 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1060 (vec_merge:VF_128
1061 (smaxmin:VF_128
1062 (match_operand:VF_128 1 "register_operand" "0,x")
1063 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1064 (match_dup 1)
1065 (const_int 1)))]
1066 "TARGET_SSE"
1067 "@
1068 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1069 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1070 [(set_attr "isa" "noavx,avx")
1071 (set_attr "type" "sse")
1072 (set_attr "prefix" "orig,vex")
1073 (set_attr "mode" "<ssescalarmode>")])
1074
1075;; These versions of the min/max patterns implement exactly the operations
1076;; min = (op1 < op2 ? op1 : op2)
1077;; max = (!(op1 < op2) ? op1 : op2)
1078;; Their operands are not commutative, and thus they may be used in the
1079;; presence of -0.0 and NaN.
1080
1081(define_insn "*ieee_smin<mode>3"
1082 [(set (match_operand:VF 0 "register_operand" "=x,x")
1083 (unspec:VF
1084 [(match_operand:VF 1 "register_operand" "0,x")
1085 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1086 UNSPEC_IEEE_MIN))]
1087 "TARGET_SSE"
1088 "@
1089 min<ssemodesuffix>\t{%2, %0|%0, %2}
1090 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1091 [(set_attr "isa" "noavx,avx")
1092 (set_attr "type" "sseadd")
1093 (set_attr "prefix" "orig,vex")
1094 (set_attr "mode" "<MODE>")])
1095
1096(define_insn "*ieee_smax<mode>3"
1097 [(set (match_operand:VF 0 "register_operand" "=x,x")
1098 (unspec:VF
1099 [(match_operand:VF 1 "register_operand" "0,x")
1100 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1101 UNSPEC_IEEE_MAX))]
1102 "TARGET_SSE"
1103 "@
1104 max<ssemodesuffix>\t{%2, %0|%0, %2}
1105 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1106 [(set_attr "isa" "noavx,avx")
1107 (set_attr "type" "sseadd")
1108 (set_attr "prefix" "orig,vex")
1109 (set_attr "mode" "<MODE>")])
1110
1111(define_insn "avx_addsubv4df3"
1112 [(set (match_operand:V4DF 0 "register_operand" "=x")
1113 (vec_merge:V4DF
1114 (plus:V4DF
1115 (match_operand:V4DF 1 "register_operand" "x")
1116 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1117 (minus:V4DF (match_dup 1) (match_dup 2))
1118 (const_int 10)))]
1119 "TARGET_AVX"
1120 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1121 [(set_attr "type" "sseadd")
1122 (set_attr "prefix" "vex")
1123 (set_attr "mode" "V4DF")])
1124
1125(define_insn "sse3_addsubv2df3"
1126 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1127 (vec_merge:V2DF
1128 (plus:V2DF
1129 (match_operand:V2DF 1 "register_operand" "0,x")
1130 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1131 (minus:V2DF (match_dup 1) (match_dup 2))
1132 (const_int 2)))]
1133 "TARGET_SSE3"
1134 "@
1135 addsubpd\t{%2, %0|%0, %2}
1136 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1137 [(set_attr "isa" "noavx,avx")
1138 (set_attr "type" "sseadd")
1139 (set_attr "atom_unit" "complex")
1140 (set_attr "prefix" "orig,vex")
1141 (set_attr "mode" "V2DF")])
1142
1143(define_insn "avx_addsubv8sf3"
1144 [(set (match_operand:V8SF 0 "register_operand" "=x")
1145 (vec_merge:V8SF
1146 (plus:V8SF
1147 (match_operand:V8SF 1 "register_operand" "x")
1148 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1149 (minus:V8SF (match_dup 1) (match_dup 2))
1150 (const_int 170)))]
1151 "TARGET_AVX"
1152 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1153 [(set_attr "type" "sseadd")
1154 (set_attr "prefix" "vex")
1155 (set_attr "mode" "V8SF")])
1156
1157(define_insn "sse3_addsubv4sf3"
1158 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1159 (vec_merge:V4SF
1160 (plus:V4SF
1161 (match_operand:V4SF 1 "register_operand" "0,x")
1162 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1163 (minus:V4SF (match_dup 1) (match_dup 2))
1164 (const_int 10)))]
1165 "TARGET_SSE3"
1166 "@
1167 addsubps\t{%2, %0|%0, %2}
1168 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1169 [(set_attr "isa" "noavx,avx")
1170 (set_attr "type" "sseadd")
1171 (set_attr "prefix" "orig,vex")
1172 (set_attr "prefix_rep" "1,*")
1173 (set_attr "mode" "V4SF")])
1174
1175(define_insn "avx_h<plusminus_insn>v4df3"
1176 [(set (match_operand:V4DF 0 "register_operand" "=x")
1177 (vec_concat:V4DF
1178 (vec_concat:V2DF
1179 (plusminus:DF
1180 (vec_select:DF
1181 (match_operand:V4DF 1 "register_operand" "x")
1182 (parallel [(const_int 0)]))
1183 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1184 (plusminus:DF
1185 (vec_select:DF
1186 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1187 (parallel [(const_int 0)]))
1188 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1189 (vec_concat:V2DF
1190 (plusminus:DF
1191 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1192 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1193 (plusminus:DF
1194 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1195 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1196 "TARGET_AVX"
1197 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1198 [(set_attr "type" "sseadd")
1199 (set_attr "prefix" "vex")
1200 (set_attr "mode" "V4DF")])
1201
1202(define_insn "sse3_h<plusminus_insn>v2df3"
1203 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1204 (vec_concat:V2DF
1205 (plusminus:DF
1206 (vec_select:DF
1207 (match_operand:V2DF 1 "register_operand" "0,x")
1208 (parallel [(const_int 0)]))
1209 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1210 (plusminus:DF
1211 (vec_select:DF
1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1213 (parallel [(const_int 0)]))
1214 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1215 "TARGET_SSE3"
1216 "@
1217 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1218 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1219 [(set_attr "isa" "noavx,avx")
1220 (set_attr "type" "sseadd")
1221 (set_attr "prefix" "orig,vex")
1222 (set_attr "mode" "V2DF")])
1223
1224(define_insn "avx_h<plusminus_insn>v8sf3"
1225 [(set (match_operand:V8SF 0 "register_operand" "=x")
1226 (vec_concat:V8SF
1227 (vec_concat:V4SF
1228 (vec_concat:V2SF
1229 (plusminus:SF
1230 (vec_select:SF
1231 (match_operand:V8SF 1 "register_operand" "x")
1232 (parallel [(const_int 0)]))
1233 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1234 (plusminus:SF
1235 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1236 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1237 (vec_concat:V2SF
1238 (plusminus:SF
1239 (vec_select:SF
1240 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1241 (parallel [(const_int 0)]))
1242 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1243 (plusminus:SF
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1245 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1246 (vec_concat:V4SF
1247 (vec_concat:V2SF
1248 (plusminus:SF
1249 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1250 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1251 (plusminus:SF
1252 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1253 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1254 (vec_concat:V2SF
1255 (plusminus:SF
1256 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1257 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1258 (plusminus:SF
1259 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1260 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1261 "TARGET_AVX"
1262 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1263 [(set_attr "type" "sseadd")
1264 (set_attr "prefix" "vex")
1265 (set_attr "mode" "V8SF")])
1266
1267(define_insn "sse3_h<plusminus_insn>v4sf3"
1268 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1269 (vec_concat:V4SF
1270 (vec_concat:V2SF
1271 (plusminus:SF
1272 (vec_select:SF
1273 (match_operand:V4SF 1 "register_operand" "0,x")
1274 (parallel [(const_int 0)]))
1275 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1276 (plusminus:SF
1277 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1278 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1279 (vec_concat:V2SF
1280 (plusminus:SF
1281 (vec_select:SF
1282 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1283 (parallel [(const_int 0)]))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1285 (plusminus:SF
1286 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1287 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1288 "TARGET_SSE3"
1289 "@
1290 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1291 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1292 [(set_attr "isa" "noavx,avx")
1293 (set_attr "type" "sseadd")
1294 (set_attr "atom_unit" "complex")
1295 (set_attr "prefix" "orig,vex")
1296 (set_attr "prefix_rep" "1,*")
1297 (set_attr "mode" "V4SF")])
1298
1299(define_expand "reduc_splus_v4df"
1300 [(match_operand:V4DF 0 "register_operand" "")
1301 (match_operand:V4DF 1 "register_operand" "")]
1302 "TARGET_AVX"
1303{
1304 rtx tmp = gen_reg_rtx (V4DFmode);
1305 rtx tmp2 = gen_reg_rtx (V4DFmode);
1306 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1307 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1308 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1309 DONE;
1310})
1311
1312(define_expand "reduc_splus_v2df"
1313 [(match_operand:V2DF 0 "register_operand" "")
1314 (match_operand:V2DF 1 "register_operand" "")]
1315 "TARGET_SSE3"
1316{
1317 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1318 DONE;
1319})
1320
1321(define_expand "reduc_splus_v8sf"
1322 [(match_operand:V8SF 0 "register_operand" "")
1323 (match_operand:V8SF 1 "register_operand" "")]
1324 "TARGET_AVX"
1325{
1326 rtx tmp = gen_reg_rtx (V8SFmode);
1327 rtx tmp2 = gen_reg_rtx (V8SFmode);
1328 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1329 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1330 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1331 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1332 DONE;
1333})
1334
1335(define_expand "reduc_splus_v4sf"
1336 [(match_operand:V4SF 0 "register_operand" "")
1337 (match_operand:V4SF 1 "register_operand" "")]
1338 "TARGET_SSE"
1339{
1340 if (TARGET_SSE3)
1341 {
1342 rtx tmp = gen_reg_rtx (V4SFmode);
1343 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1344 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1345 }
1346 else
1347 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1348 DONE;
1349})
1350
1351;; Modes handled by reduc_sm{in,ax}* patterns.
1352(define_mode_iterator REDUC_SMINMAX_MODE
1353 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1354 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1355 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1356 (V4SF "TARGET_SSE")])
1357
1358(define_expand "reduc_<code>_<mode>"
1359 [(smaxmin:REDUC_SMINMAX_MODE
1360 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1361 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1362 ""
1363{
1364 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1365 DONE;
1366})
1367
1368(define_expand "reduc_<code>_<mode>"
1369 [(umaxmin:VI_256
1370 (match_operand:VI_256 0 "register_operand" "")
1371 (match_operand:VI_256 1 "register_operand" ""))]
1372 "TARGET_AVX2"
1373{
1374 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1375 DONE;
1376})
1377
1378(define_expand "reduc_umin_v8hi"
1379 [(umin:V8HI
1380 (match_operand:V8HI 0 "register_operand" "")
1381 (match_operand:V8HI 1 "register_operand" ""))]
1382 "TARGET_SSE4_1"
1383{
1384 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1385 DONE;
1386})
1387
1388;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1389;;
1390;; Parallel floating point comparisons
1391;;
1392;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1393
1394(define_insn "avx_cmp<mode>3"
1395 [(set (match_operand:VF 0 "register_operand" "=x")
1396 (unspec:VF
1397 [(match_operand:VF 1 "register_operand" "x")
1398 (match_operand:VF 2 "nonimmediate_operand" "xm")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1400 UNSPEC_PCMP))]
1401 "TARGET_AVX"
1402 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp")
1404 (set_attr "length_immediate" "1")
1405 (set_attr "prefix" "vex")
1406 (set_attr "mode" "<MODE>")])
1407
1408(define_insn "avx_vmcmp<mode>3"
1409 [(set (match_operand:VF_128 0 "register_operand" "=x")
1410 (vec_merge:VF_128
1411 (unspec:VF_128
1412 [(match_operand:VF_128 1 "register_operand" "x")
1413 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1414 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1415 UNSPEC_PCMP)
1416 (match_dup 1)
1417 (const_int 1)))]
1418 "TARGET_AVX"
1419 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1420 [(set_attr "type" "ssecmp")
1421 (set_attr "length_immediate" "1")
1422 (set_attr "prefix" "vex")
1423 (set_attr "mode" "<ssescalarmode>")])
1424
1425(define_insn "*<sse>_maskcmp<mode>3_comm"
1426 [(set (match_operand:VF 0 "register_operand" "=x,x")
1427 (match_operator:VF 3 "sse_comparison_operator"
1428 [(match_operand:VF 1 "register_operand" "%0,x")
1429 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1430 "TARGET_SSE
1431 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1432 "@
1433 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1434 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1435 [(set_attr "isa" "noavx,avx")
1436 (set_attr "type" "ssecmp")
1437 (set_attr "length_immediate" "1")
1438 (set_attr "prefix" "orig,vex")
1439 (set_attr "mode" "<MODE>")])
1440
1441(define_insn "<sse>_maskcmp<mode>3"
1442 [(set (match_operand:VF 0 "register_operand" "=x,x")
1443 (match_operator:VF 3 "sse_comparison_operator"
1444 [(match_operand:VF 1 "register_operand" "0,x")
1445 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1446 "TARGET_SSE"
1447 "@
1448 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1449 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1450 [(set_attr "isa" "noavx,avx")
1451 (set_attr "type" "ssecmp")
1452 (set_attr "length_immediate" "1")
1453 (set_attr "prefix" "orig,vex")
1454 (set_attr "mode" "<MODE>")])
1455
1456(define_insn "<sse>_vmmaskcmp<mode>3"
1457 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1458 (vec_merge:VF_128
1459 (match_operator:VF_128 3 "sse_comparison_operator"
1460 [(match_operand:VF_128 1 "register_operand" "0,x")
1461 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1462 (match_dup 1)
1463 (const_int 1)))]
1464 "TARGET_SSE"
1465 "@
1466 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1467 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1468 [(set_attr "isa" "noavx,avx")
1469 (set_attr "type" "ssecmp")
1470 (set_attr "length_immediate" "1,*")
1471 (set_attr "prefix" "orig,vex")
1472 (set_attr "mode" "<ssescalarmode>")])
1473
1474(define_insn "<sse>_comi"
1475 [(set (reg:CCFP FLAGS_REG)
1476 (compare:CCFP
1477 (vec_select:MODEF
1478 (match_operand:<ssevecmode> 0 "register_operand" "x")
1479 (parallel [(const_int 0)]))
1480 (vec_select:MODEF
1481 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1482 (parallel [(const_int 0)]))))]
1483 "SSE_FLOAT_MODE_P (<MODE>mode)"
1484 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1485 [(set_attr "type" "ssecomi")
1486 (set_attr "prefix" "maybe_vex")
1487 (set_attr "prefix_rep" "0")
1488 (set (attr "prefix_data16")
1489 (if_then_else (eq_attr "mode" "DF")
1490 (const_string "1")
1491 (const_string "0")))
1492 (set_attr "mode" "<MODE>")])
1493
1494(define_insn "<sse>_ucomi"
1495 [(set (reg:CCFPU FLAGS_REG)
1496 (compare:CCFPU
1497 (vec_select:MODEF
1498 (match_operand:<ssevecmode> 0 "register_operand" "x")
1499 (parallel [(const_int 0)]))
1500 (vec_select:MODEF
1501 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1502 (parallel [(const_int 0)]))))]
1503 "SSE_FLOAT_MODE_P (<MODE>mode)"
1504 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1505 [(set_attr "type" "ssecomi")
1506 (set_attr "prefix" "maybe_vex")
1507 (set_attr "prefix_rep" "0")
1508 (set (attr "prefix_data16")
1509 (if_then_else (eq_attr "mode" "DF")
1510 (const_string "1")
1511 (const_string "0")))
1512 (set_attr "mode" "<MODE>")])
1513
1514(define_expand "vcond<V_256:mode><VF_256:mode>"
1515 [(set (match_operand:V_256 0 "register_operand" "")
1516 (if_then_else:V_256
1517 (match_operator 3 ""
1518 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1519 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1520 (match_operand:V_256 1 "general_operand" "")
1521 (match_operand:V_256 2 "general_operand" "")))]
1522 "TARGET_AVX
1523 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1524 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1525{
1526 bool ok = ix86_expand_fp_vcond (operands);
1527 gcc_assert (ok);
1528 DONE;
1529})
1530
1531(define_expand "vcond<V_128:mode><VF_128:mode>"
1532 [(set (match_operand:V_128 0 "register_operand" "")
1533 (if_then_else:V_128
1534 (match_operator 3 ""
1535 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1536 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1537 (match_operand:V_128 1 "general_operand" "")
1538 (match_operand:V_128 2 "general_operand" "")))]
1539 "TARGET_SSE
1540 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1541 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1542{
1543 bool ok = ix86_expand_fp_vcond (operands);
1544 gcc_assert (ok);
1545 DONE;
1546})
1547
1548;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1549;;
1550;; Parallel floating point logical operations
1551;;
1552;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1553
1554(define_insn "<sse>_andnot<mode>3"
1555 [(set (match_operand:VF 0 "register_operand" "=x,x")
1556 (and:VF
1557 (not:VF
1558 (match_operand:VF 1 "register_operand" "0,x"))
1559 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1560 "TARGET_SSE"
1561{
1562 static char buf[32];
1563 const char *insn;
1564 const char *suffix
1565 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1566
1567 switch (which_alternative)
1568 {
1569 case 0:
1570 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1571 break;
1572 case 1:
1573 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1574 break;
1575 default:
1576 gcc_unreachable ();
1577 }
1578
1579 snprintf (buf, sizeof (buf), insn, suffix);
1580 return buf;
1581}
1582 [(set_attr "isa" "noavx,avx")
1583 (set_attr "type" "sselog")
1584 (set_attr "prefix" "orig,vex")
1585 (set_attr "mode" "<MODE>")])
1586
1587(define_expand "<code><mode>3"
1588 [(set (match_operand:VF 0 "register_operand" "")
1589 (any_logic:VF
1590 (match_operand:VF 1 "nonimmediate_operand" "")
1591 (match_operand:VF 2 "nonimmediate_operand" "")))]
1592 "TARGET_SSE"
1593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1594
1595(define_insn "*<code><mode>3"
1596 [(set (match_operand:VF 0 "register_operand" "=x,x")
1597 (any_logic:VF
1598 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1599 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1600 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1601{
1602 static char buf[32];
1603 const char *insn;
1604 const char *suffix
1605 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1606
1607 switch (which_alternative)
1608 {
1609 case 0:
1610 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1611 break;
1612 case 1:
1613 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1614 break;
1615 default:
1616 gcc_unreachable ();
1617 }
1618
1619 snprintf (buf, sizeof (buf), insn, suffix);
1620 return buf;
1621}
1622 [(set_attr "isa" "noavx,avx")
1623 (set_attr "type" "sselog")
1624 (set_attr "prefix" "orig,vex")
1625 (set_attr "mode" "<MODE>")])
1626
1627(define_expand "copysign<mode>3"
1628 [(set (match_dup 4)
1629 (and:VF
1630 (not:VF (match_dup 3))
1631 (match_operand:VF 1 "nonimmediate_operand" "")))
1632 (set (match_dup 5)
1633 (and:VF (match_dup 3)
1634 (match_operand:VF 2 "nonimmediate_operand" "")))
1635 (set (match_operand:VF 0 "register_operand" "")
1636 (ior:VF (match_dup 4) (match_dup 5)))]
1637 "TARGET_SSE"
1638{
1639 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1640
1641 operands[4] = gen_reg_rtx (<MODE>mode);
1642 operands[5] = gen_reg_rtx (<MODE>mode);
1643})
1644
1645;; Also define scalar versions. These are used for abs, neg, and
1646;; conditional move. Using subregs into vector modes causes register
1647;; allocation lossage. These patterns do not allow memory operands
1648;; because the native instructions read the full 128-bits.
1649
1650(define_insn "*andnot<mode>3"
1651 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1652 (and:MODEF
1653 (not:MODEF
1654 (match_operand:MODEF 1 "register_operand" "0,x"))
1655 (match_operand:MODEF 2 "register_operand" "x,x")))]
1656 "SSE_FLOAT_MODE_P (<MODE>mode)"
1657{
1658 static char buf[32];
1659 const char *insn;
1660 const char *suffix
1661 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1662
1663 switch (which_alternative)
1664 {
1665 case 0:
1666 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1667 break;
1668 case 1:
1669 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1670 break;
1671 default:
1672 gcc_unreachable ();
1673 }
1674
1675 snprintf (buf, sizeof (buf), insn, suffix);
1676 return buf;
1677}
1678 [(set_attr "isa" "noavx,avx")
1679 (set_attr "type" "sselog")
1680 (set_attr "prefix" "orig,vex")
1681 (set_attr "mode" "<ssevecmode>")])
1682
1683(define_insn "*<code><mode>3"
1684 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1685 (any_logic:MODEF
1686 (match_operand:MODEF 1 "register_operand" "%0,x")
1687 (match_operand:MODEF 2 "register_operand" "x,x")))]
1688 "SSE_FLOAT_MODE_P (<MODE>mode)"
1689{
1690 static char buf[32];
1691 const char *insn;
1692 const char *suffix
1693 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1694
1695 switch (which_alternative)
1696 {
1697 case 0:
1698 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1699 break;
1700 case 1:
1701 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1702 break;
1703 default:
1704 gcc_unreachable ();
1705 }
1706
1707 snprintf (buf, sizeof (buf), insn, suffix);
1708 return buf;
1709}
1710 [(set_attr "isa" "noavx,avx")
1711 (set_attr "type" "sselog")
1712 (set_attr "prefix" "orig,vex")
1713 (set_attr "mode" "<ssevecmode>")])
1714
1715;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1716;;
1717;; FMA floating point multiply/accumulate instructions. These include
1718;; scalar versions of the instructions as well as vector versions.
1719;;
1720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1721
5ce9237c
JM
1722;; The standard names for scalar FMA are only available with SSE math enabled.
1723(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
1724 (DF "TARGET_SSE_MATH")
1725 V4SF V2DF V8SF V4DF])
e4b17023 1726
e4b17023 1727(define_expand "fma<mode>4"
5ce9237c
JM
1728 [(set (match_operand:FMAMODEM 0 "register_operand")
1729 (fma:FMAMODEM
1730 (match_operand:FMAMODEM 1 "nonimmediate_operand")
1731 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1732 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
1733 "TARGET_FMA || TARGET_FMA4")
e4b17023
JM
1734
1735(define_expand "fms<mode>4"
5ce9237c
JM
1736 [(set (match_operand:FMAMODEM 0 "register_operand")
1737 (fma:FMAMODEM
1738 (match_operand:FMAMODEM 1 "nonimmediate_operand")
1739 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1740 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
1741 "TARGET_FMA || TARGET_FMA4")
e4b17023
JM
1742
1743(define_expand "fnma<mode>4"
5ce9237c
JM
1744 [(set (match_operand:FMAMODEM 0 "register_operand")
1745 (fma:FMAMODEM
1746 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
1747 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1748 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
1749 "TARGET_FMA || TARGET_FMA4")
e4b17023
JM
1750
1751(define_expand "fnms<mode>4"
5ce9237c
JM
1752 [(set (match_operand:FMAMODEM 0 "register_operand")
1753 (fma:FMAMODEM
1754 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
1755 (match_operand:FMAMODEM 2 "nonimmediate_operand")
1756 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
1757 "TARGET_FMA || TARGET_FMA4")
1758
1759;; The builtins for intrinsics are not constrained by SSE math enabled.
1760(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
e4b17023 1761
e4b17023
JM
1762(define_expand "fma4i_fmadd_<mode>"
1763 [(set (match_operand:FMAMODE 0 "register_operand")
1764 (fma:FMAMODE
1765 (match_operand:FMAMODE 1 "nonimmediate_operand")
1766 (match_operand:FMAMODE 2 "nonimmediate_operand")
1767 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1768 "TARGET_FMA || TARGET_FMA4")
1769
1770(define_insn "*fma_fmadd_<mode>"
1771 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1772 (fma:FMAMODE
1773 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1774 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1775 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1776 "TARGET_FMA || TARGET_FMA4"
1777 "@
1778 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1779 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1780 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1781 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1782 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1783 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1784 (set_attr "type" "ssemuladd")
1785 (set_attr "mode" "<MODE>")])
1786
1787(define_insn "*fma_fmsub_<mode>"
1788 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1789 (fma:FMAMODE
1790 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1791 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1792 (neg:FMAMODE
1793 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1794 "TARGET_FMA || TARGET_FMA4"
1795 "@
1796 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1797 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1798 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1799 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1800 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1801 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1802 (set_attr "type" "ssemuladd")
1803 (set_attr "mode" "<MODE>")])
1804
1805(define_insn "*fma_fnmadd_<mode>"
1806 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1807 (fma:FMAMODE
1808 (neg:FMAMODE
1809 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1810 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1811 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1812 "TARGET_FMA || TARGET_FMA4"
1813 "@
1814 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1815 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1816 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1817 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1818 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1819 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1820 (set_attr "type" "ssemuladd")
1821 (set_attr "mode" "<MODE>")])
1822
1823(define_insn "*fma_fnmsub_<mode>"
1824 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1825 (fma:FMAMODE
1826 (neg:FMAMODE
1827 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1828 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1829 (neg:FMAMODE
1830 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1831 "TARGET_FMA || TARGET_FMA4"
1832 "@
1833 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1834 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1835 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1836 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1837 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1838 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1839 (set_attr "type" "ssemuladd")
1840 (set_attr "mode" "<MODE>")])
1841
1842;; FMA parallel floating point multiply addsub and subadd operations.
1843
1844;; It would be possible to represent these without the UNSPEC as
1845;;
1846;; (vec_merge
1847;; (fma op1 op2 op3)
1848;; (fma op1 op2 (neg op3))
1849;; (merge-const))
1850;;
1851;; But this doesn't seem useful in practice.
1852
1853(define_expand "fmaddsub_<mode>"
1854 [(set (match_operand:VF 0 "register_operand")
1855 (unspec:VF
1856 [(match_operand:VF 1 "nonimmediate_operand")
1857 (match_operand:VF 2 "nonimmediate_operand")
1858 (match_operand:VF 3 "nonimmediate_operand")]
1859 UNSPEC_FMADDSUB))]
1860 "TARGET_FMA || TARGET_FMA4")
1861
1862(define_insn "*fma_fmaddsub_<mode>"
1863 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1864 (unspec:VF
1865 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1866 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1867 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
1868 UNSPEC_FMADDSUB))]
1869 "TARGET_FMA || TARGET_FMA4"
1870 "@
1871 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1872 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1873 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1874 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1875 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1876 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1877 (set_attr "type" "ssemuladd")
1878 (set_attr "mode" "<MODE>")])
1879
1880(define_insn "*fma_fmsubadd_<mode>"
1881 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1882 (unspec:VF
1883 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1884 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1885 (neg:VF
1886 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
1887 UNSPEC_FMADDSUB))]
1888 "TARGET_FMA || TARGET_FMA4"
1889 "@
1890 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1891 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1892 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1893 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1894 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1895 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1896 (set_attr "type" "ssemuladd")
1897 (set_attr "mode" "<MODE>")])
1898
1899;; FMA3 floating point scalar intrinsics. These merge result with
1900;; high-order elements from the destination register.
1901
1902(define_expand "fmai_vmfmadd_<mode>"
1903 [(set (match_operand:VF_128 0 "register_operand")
1904 (vec_merge:VF_128
1905 (fma:VF_128
1906 (match_operand:VF_128 1 "nonimmediate_operand")
1907 (match_operand:VF_128 2 "nonimmediate_operand")
1908 (match_operand:VF_128 3 "nonimmediate_operand"))
5ce9237c 1909 (match_dup 1)
e4b17023
JM
1910 (const_int 1)))]
1911 "TARGET_FMA")
1912
1913(define_insn "*fmai_fmadd_<mode>"
5ce9237c 1914 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
e4b17023
JM
1915 (vec_merge:VF_128
1916 (fma:VF_128
5ce9237c
JM
1917 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1918 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
1919 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1920 (match_dup 1)
e4b17023
JM
1921 (const_int 1)))]
1922 "TARGET_FMA"
1923 "@
1924 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
5ce9237c 1925 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
e4b17023
JM
1926 [(set_attr "type" "ssemuladd")
1927 (set_attr "mode" "<MODE>")])
1928
1929(define_insn "*fmai_fmsub_<mode>"
5ce9237c 1930 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
e4b17023
JM
1931 (vec_merge:VF_128
1932 (fma:VF_128
5ce9237c
JM
1933 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1934 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
e4b17023 1935 (neg:VF_128
5ce9237c
JM
1936 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1937 (match_dup 1)
e4b17023
JM
1938 (const_int 1)))]
1939 "TARGET_FMA"
1940 "@
1941 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
5ce9237c 1942 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
e4b17023
JM
1943 [(set_attr "type" "ssemuladd")
1944 (set_attr "mode" "<MODE>")])
1945
1946(define_insn "*fmai_fnmadd_<mode>"
5ce9237c 1947 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
e4b17023
JM
1948 (vec_merge:VF_128
1949 (fma:VF_128
1950 (neg:VF_128
5ce9237c
JM
1951 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1952 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1953 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1954 (match_dup 1)
e4b17023
JM
1955 (const_int 1)))]
1956 "TARGET_FMA"
1957 "@
1958 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
5ce9237c 1959 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
e4b17023
JM
1960 [(set_attr "type" "ssemuladd")
1961 (set_attr "mode" "<MODE>")])
1962
1963(define_insn "*fmai_fnmsub_<mode>"
5ce9237c 1964 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
e4b17023
JM
1965 (vec_merge:VF_128
1966 (fma:VF_128
1967 (neg:VF_128
5ce9237c
JM
1968 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1969 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
e4b17023 1970 (neg:VF_128
5ce9237c
JM
1971 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1972 (match_dup 1)
e4b17023
JM
1973 (const_int 1)))]
1974 "TARGET_FMA"
1975 "@
1976 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
5ce9237c 1977 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
e4b17023
JM
1978 [(set_attr "type" "ssemuladd")
1979 (set_attr "mode" "<MODE>")])
1980
1981;; FMA4 floating point scalar intrinsics. These write the
1982;; entire destination register, with the high-order elements zeroed.
1983
1984(define_expand "fma4i_vmfmadd_<mode>"
1985 [(set (match_operand:VF_128 0 "register_operand")
1986 (vec_merge:VF_128
1987 (fma:VF_128
1988 (match_operand:VF_128 1 "nonimmediate_operand")
1989 (match_operand:VF_128 2 "nonimmediate_operand")
1990 (match_operand:VF_128 3 "nonimmediate_operand"))
1991 (match_dup 4)
1992 (const_int 1)))]
1993 "TARGET_FMA4"
1994{
1995 operands[4] = CONST0_RTX (<MODE>mode);
1996})
1997
1998(define_insn "*fma4i_vmfmadd_<mode>"
1999 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2000 (vec_merge:VF_128
2001 (fma:VF_128
2002 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2003 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2004 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2005 (match_operand:VF_128 4 "const0_operand" "")
2006 (const_int 1)))]
2007 "TARGET_FMA4"
2008 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2009 [(set_attr "type" "ssemuladd")
2010 (set_attr "mode" "<MODE>")])
2011
2012(define_insn "*fma4i_vmfmsub_<mode>"
2013 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2014 (vec_merge:VF_128
2015 (fma:VF_128
2016 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2017 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2018 (neg:VF_128
2019 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2020 (match_operand:VF_128 4 "const0_operand" "")
2021 (const_int 1)))]
2022 "TARGET_FMA4"
2023 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2026
2027(define_insn "*fma4i_vmfnmadd_<mode>"
2028 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2029 (vec_merge:VF_128
2030 (fma:VF_128
2031 (neg:VF_128
2032 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2033 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2034 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2035 (match_operand:VF_128 4 "const0_operand" "")
2036 (const_int 1)))]
2037 "TARGET_FMA4"
2038 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2039 [(set_attr "type" "ssemuladd")
2040 (set_attr "mode" "<MODE>")])
2041
2042(define_insn "*fma4i_vmfnmsub_<mode>"
2043 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2044 (vec_merge:VF_128
2045 (fma:VF_128
2046 (neg:VF_128
2047 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2048 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2049 (neg:VF_128
2050 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2051 (match_operand:VF_128 4 "const0_operand" "")
2052 (const_int 1)))]
2053 "TARGET_FMA4"
2054 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2055 [(set_attr "type" "ssemuladd")
2056 (set_attr "mode" "<MODE>")])
2057
2058;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2059;;
2060;; Parallel single-precision floating point conversion operations
2061;;
2062;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2063
2064(define_insn "sse_cvtpi2ps"
2065 [(set (match_operand:V4SF 0 "register_operand" "=x")
2066 (vec_merge:V4SF
2067 (vec_duplicate:V4SF
2068 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2069 (match_operand:V4SF 1 "register_operand" "0")
2070 (const_int 3)))]
2071 "TARGET_SSE"
2072 "cvtpi2ps\t{%2, %0|%0, %2}"
2073 [(set_attr "type" "ssecvt")
2074 (set_attr "mode" "V4SF")])
2075
2076(define_insn "sse_cvtps2pi"
2077 [(set (match_operand:V2SI 0 "register_operand" "=y")
2078 (vec_select:V2SI
2079 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2080 UNSPEC_FIX_NOTRUNC)
2081 (parallel [(const_int 0) (const_int 1)])))]
2082 "TARGET_SSE"
2083 "cvtps2pi\t{%1, %0|%0, %1}"
2084 [(set_attr "type" "ssecvt")
2085 (set_attr "unit" "mmx")
2086 (set_attr "mode" "DI")])
2087
2088(define_insn "sse_cvttps2pi"
2089 [(set (match_operand:V2SI 0 "register_operand" "=y")
2090 (vec_select:V2SI
2091 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2092 (parallel [(const_int 0) (const_int 1)])))]
2093 "TARGET_SSE"
2094 "cvttps2pi\t{%1, %0|%0, %1}"
2095 [(set_attr "type" "ssecvt")
2096 (set_attr "unit" "mmx")
2097 (set_attr "prefix_rep" "0")
2098 (set_attr "mode" "SF")])
2099
2100(define_insn "sse_cvtsi2ss"
2101 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2102 (vec_merge:V4SF
2103 (vec_duplicate:V4SF
2104 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2105 (match_operand:V4SF 1 "register_operand" "0,0,x")
2106 (const_int 1)))]
2107 "TARGET_SSE"
2108 "@
2109 cvtsi2ss\t{%2, %0|%0, %2}
2110 cvtsi2ss\t{%2, %0|%0, %2}
2111 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2112 [(set_attr "isa" "noavx,noavx,avx")
2113 (set_attr "type" "sseicvt")
2114 (set_attr "athlon_decode" "vector,double,*")
2115 (set_attr "amdfam10_decode" "vector,double,*")
2116 (set_attr "bdver1_decode" "double,direct,*")
2117 (set_attr "prefix" "orig,orig,vex")
2118 (set_attr "mode" "SF")])
2119
2120(define_insn "sse_cvtsi2ssq"
2121 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2122 (vec_merge:V4SF
2123 (vec_duplicate:V4SF
2124 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2125 (match_operand:V4SF 1 "register_operand" "0,0,x")
2126 (const_int 1)))]
2127 "TARGET_SSE && TARGET_64BIT"
2128 "@
2129 cvtsi2ssq\t{%2, %0|%0, %2}
2130 cvtsi2ssq\t{%2, %0|%0, %2}
2131 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2132 [(set_attr "isa" "noavx,noavx,avx")
2133 (set_attr "type" "sseicvt")
2134 (set_attr "athlon_decode" "vector,double,*")
2135 (set_attr "amdfam10_decode" "vector,double,*")
2136 (set_attr "bdver1_decode" "double,direct,*")
2137 (set_attr "length_vex" "*,*,4")
2138 (set_attr "prefix_rex" "1,1,*")
2139 (set_attr "prefix" "orig,orig,vex")
2140 (set_attr "mode" "SF")])
2141
2142(define_insn "sse_cvtss2si"
2143 [(set (match_operand:SI 0 "register_operand" "=r,r")
2144 (unspec:SI
2145 [(vec_select:SF
2146 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2147 (parallel [(const_int 0)]))]
2148 UNSPEC_FIX_NOTRUNC))]
2149 "TARGET_SSE"
2150 "%vcvtss2si\t{%1, %0|%0, %1}"
2151 [(set_attr "type" "sseicvt")
2152 (set_attr "athlon_decode" "double,vector")
2153 (set_attr "bdver1_decode" "double,double")
2154 (set_attr "prefix_rep" "1")
2155 (set_attr "prefix" "maybe_vex")
2156 (set_attr "mode" "SI")])
2157
2158(define_insn "sse_cvtss2si_2"
2159 [(set (match_operand:SI 0 "register_operand" "=r,r")
2160 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2161 UNSPEC_FIX_NOTRUNC))]
2162 "TARGET_SSE"
2163 "%vcvtss2si\t{%1, %0|%0, %1}"
2164 [(set_attr "type" "sseicvt")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")
2167 (set_attr "bdver1_decode" "double,double")
2168 (set_attr "prefix_rep" "1")
2169 (set_attr "prefix" "maybe_vex")
2170 (set_attr "mode" "SI")])
2171
2172(define_insn "sse_cvtss2siq"
2173 [(set (match_operand:DI 0 "register_operand" "=r,r")
2174 (unspec:DI
2175 [(vec_select:SF
2176 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2177 (parallel [(const_int 0)]))]
2178 UNSPEC_FIX_NOTRUNC))]
2179 "TARGET_SSE && TARGET_64BIT"
2180 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2181 [(set_attr "type" "sseicvt")
2182 (set_attr "athlon_decode" "double,vector")
2183 (set_attr "bdver1_decode" "double,double")
2184 (set_attr "prefix_rep" "1")
2185 (set_attr "prefix" "maybe_vex")
2186 (set_attr "mode" "DI")])
2187
2188(define_insn "sse_cvtss2siq_2"
2189 [(set (match_operand:DI 0 "register_operand" "=r,r")
2190 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2191 UNSPEC_FIX_NOTRUNC))]
2192 "TARGET_SSE && TARGET_64BIT"
2193 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2194 [(set_attr "type" "sseicvt")
2195 (set_attr "athlon_decode" "double,vector")
2196 (set_attr "amdfam10_decode" "double,double")
2197 (set_attr "bdver1_decode" "double,double")
2198 (set_attr "prefix_rep" "1")
2199 (set_attr "prefix" "maybe_vex")
2200 (set_attr "mode" "DI")])
2201
2202(define_insn "sse_cvttss2si"
2203 [(set (match_operand:SI 0 "register_operand" "=r,r")
2204 (fix:SI
2205 (vec_select:SF
2206 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2207 (parallel [(const_int 0)]))))]
2208 "TARGET_SSE"
2209 "%vcvttss2si\t{%1, %0|%0, %1}"
2210 [(set_attr "type" "sseicvt")
2211 (set_attr "athlon_decode" "double,vector")
2212 (set_attr "amdfam10_decode" "double,double")
2213 (set_attr "bdver1_decode" "double,double")
2214 (set_attr "prefix_rep" "1")
2215 (set_attr "prefix" "maybe_vex")
2216 (set_attr "mode" "SI")])
2217
2218(define_insn "sse_cvttss2siq"
2219 [(set (match_operand:DI 0 "register_operand" "=r,r")
2220 (fix:DI
2221 (vec_select:SF
2222 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2223 (parallel [(const_int 0)]))))]
2224 "TARGET_SSE && TARGET_64BIT"
2225 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2226 [(set_attr "type" "sseicvt")
2227 (set_attr "athlon_decode" "double,vector")
2228 (set_attr "amdfam10_decode" "double,double")
2229 (set_attr "bdver1_decode" "double,double")
2230 (set_attr "prefix_rep" "1")
2231 (set_attr "prefix" "maybe_vex")
2232 (set_attr "mode" "DI")])
2233
2234(define_insn "float<sseintvecmodelower><mode>2"
2235 [(set (match_operand:VF1 0 "register_operand" "=x")
2236 (float:VF1
2237 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2238 "TARGET_SSE2"
2239 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2240 [(set_attr "type" "ssecvt")
2241 (set_attr "prefix" "maybe_vex")
2242 (set_attr "mode" "<sseinsnmode>")])
2243
2244(define_expand "floatuns<sseintvecmodelower><mode>2"
2245 [(match_operand:VF1 0 "register_operand" "")
2246 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2247 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2248{
2249 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2250 DONE;
2251})
2252
2253(define_insn "avx_cvtps2dq256"
2254 [(set (match_operand:V8SI 0 "register_operand" "=x")
2255 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2256 UNSPEC_FIX_NOTRUNC))]
2257 "TARGET_AVX"
2258 "vcvtps2dq\t{%1, %0|%0, %1}"
2259 [(set_attr "type" "ssecvt")
2260 (set_attr "prefix" "vex")
2261 (set_attr "mode" "OI")])
2262
2263(define_insn "sse2_cvtps2dq"
2264 [(set (match_operand:V4SI 0 "register_operand" "=x")
2265 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2266 UNSPEC_FIX_NOTRUNC))]
2267 "TARGET_SSE2"
2268 "%vcvtps2dq\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssecvt")
2270 (set (attr "prefix_data16")
2271 (if_then_else
2272 (match_test "TARGET_AVX")
2273 (const_string "*")
2274 (const_string "1")))
2275 (set_attr "prefix" "maybe_vex")
2276 (set_attr "mode" "TI")])
2277
2278(define_insn "fix_truncv8sfv8si2"
2279 [(set (match_operand:V8SI 0 "register_operand" "=x")
2280 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2281 "TARGET_AVX"
2282 "vcvttps2dq\t{%1, %0|%0, %1}"
2283 [(set_attr "type" "ssecvt")
2284 (set_attr "prefix" "vex")
2285 (set_attr "mode" "OI")])
2286
2287(define_insn "fix_truncv4sfv4si2"
2288 [(set (match_operand:V4SI 0 "register_operand" "=x")
2289 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2290 "TARGET_SSE2"
2291 "%vcvttps2dq\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "ssecvt")
2293 (set (attr "prefix_rep")
2294 (if_then_else
2295 (match_test "TARGET_AVX")
2296 (const_string "*")
2297 (const_string "1")))
2298 (set (attr "prefix_data16")
2299 (if_then_else
2300 (match_test "TARGET_AVX")
2301 (const_string "*")
2302 (const_string "0")))
2303 (set_attr "prefix_data16" "0")
2304 (set_attr "prefix" "maybe_vex")
2305 (set_attr "mode" "TI")])
2306
2307(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2308 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2309 (match_operand:VF1 1 "register_operand" "")]
2310 "TARGET_SSE2"
2311{
2312 rtx tmp[3];
2313 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2314 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2315 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2316 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2317 DONE;
2318})
2319
2320;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2321;;
2322;; Parallel double-precision floating point conversion operations
2323;;
2324;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2325
2326(define_insn "sse2_cvtpi2pd"
2327 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2328 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2329 "TARGET_SSE2"
2330 "cvtpi2pd\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "unit" "mmx,*")
2333 (set_attr "prefix_data16" "1,*")
2334 (set_attr "mode" "V2DF")])
2335
2336(define_insn "sse2_cvtpd2pi"
2337 [(set (match_operand:V2SI 0 "register_operand" "=y")
2338 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2339 UNSPEC_FIX_NOTRUNC))]
2340 "TARGET_SSE2"
2341 "cvtpd2pi\t{%1, %0|%0, %1}"
2342 [(set_attr "type" "ssecvt")
2343 (set_attr "unit" "mmx")
2344 (set_attr "bdver1_decode" "double")
2345 (set_attr "prefix_data16" "1")
2346 (set_attr "mode" "DI")])
2347
2348(define_insn "sse2_cvttpd2pi"
2349 [(set (match_operand:V2SI 0 "register_operand" "=y")
2350 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2351 "TARGET_SSE2"
2352 "cvttpd2pi\t{%1, %0|%0, %1}"
2353 [(set_attr "type" "ssecvt")
2354 (set_attr "unit" "mmx")
2355 (set_attr "bdver1_decode" "double")
2356 (set_attr "prefix_data16" "1")
2357 (set_attr "mode" "TI")])
2358
2359(define_insn "sse2_cvtsi2sd"
2360 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2361 (vec_merge:V2DF
2362 (vec_duplicate:V2DF
2363 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2364 (match_operand:V2DF 1 "register_operand" "0,0,x")
2365 (const_int 1)))]
2366 "TARGET_SSE2"
2367 "@
2368 cvtsi2sd\t{%2, %0|%0, %2}
2369 cvtsi2sd\t{%2, %0|%0, %2}
2370 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2371 [(set_attr "isa" "noavx,noavx,avx")
2372 (set_attr "type" "sseicvt")
2373 (set_attr "athlon_decode" "double,direct,*")
2374 (set_attr "amdfam10_decode" "vector,double,*")
2375 (set_attr "bdver1_decode" "double,direct,*")
2376 (set_attr "prefix" "orig,orig,vex")
2377 (set_attr "mode" "DF")])
2378
2379(define_insn "sse2_cvtsi2sdq"
2380 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2381 (vec_merge:V2DF
2382 (vec_duplicate:V2DF
2383 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2384 (match_operand:V2DF 1 "register_operand" "0,0,x")
2385 (const_int 1)))]
2386 "TARGET_SSE2 && TARGET_64BIT"
2387 "@
2388 cvtsi2sdq\t{%2, %0|%0, %2}
2389 cvtsi2sdq\t{%2, %0|%0, %2}
2390 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2391 [(set_attr "isa" "noavx,noavx,avx")
2392 (set_attr "type" "sseicvt")
2393 (set_attr "athlon_decode" "double,direct,*")
2394 (set_attr "amdfam10_decode" "vector,double,*")
2395 (set_attr "bdver1_decode" "double,direct,*")
2396 (set_attr "length_vex" "*,*,4")
2397 (set_attr "prefix_rex" "1,1,*")
2398 (set_attr "prefix" "orig,orig,vex")
2399 (set_attr "mode" "DF")])
2400
2401(define_insn "sse2_cvtsd2si"
2402 [(set (match_operand:SI 0 "register_operand" "=r,r")
2403 (unspec:SI
2404 [(vec_select:DF
2405 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2406 (parallel [(const_int 0)]))]
2407 UNSPEC_FIX_NOTRUNC))]
2408 "TARGET_SSE2"
2409 "%vcvtsd2si\t{%1, %0|%0, %1}"
2410 [(set_attr "type" "sseicvt")
2411 (set_attr "athlon_decode" "double,vector")
2412 (set_attr "bdver1_decode" "double,double")
2413 (set_attr "prefix_rep" "1")
2414 (set_attr "prefix" "maybe_vex")
2415 (set_attr "mode" "SI")])
2416
2417(define_insn "sse2_cvtsd2si_2"
2418 [(set (match_operand:SI 0 "register_operand" "=r,r")
2419 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2420 UNSPEC_FIX_NOTRUNC))]
2421 "TARGET_SSE2"
2422 "%vcvtsd2si\t{%1, %0|%0, %1}"
2423 [(set_attr "type" "sseicvt")
2424 (set_attr "athlon_decode" "double,vector")
2425 (set_attr "amdfam10_decode" "double,double")
2426 (set_attr "bdver1_decode" "double,double")
2427 (set_attr "prefix_rep" "1")
2428 (set_attr "prefix" "maybe_vex")
2429 (set_attr "mode" "SI")])
2430
2431(define_insn "sse2_cvtsd2siq"
2432 [(set (match_operand:DI 0 "register_operand" "=r,r")
2433 (unspec:DI
2434 [(vec_select:DF
2435 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2436 (parallel [(const_int 0)]))]
2437 UNSPEC_FIX_NOTRUNC))]
2438 "TARGET_SSE2 && TARGET_64BIT"
2439 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2440 [(set_attr "type" "sseicvt")
2441 (set_attr "athlon_decode" "double,vector")
2442 (set_attr "bdver1_decode" "double,double")
2443 (set_attr "prefix_rep" "1")
2444 (set_attr "prefix" "maybe_vex")
2445 (set_attr "mode" "DI")])
2446
2447(define_insn "sse2_cvtsd2siq_2"
2448 [(set (match_operand:DI 0 "register_operand" "=r,r")
2449 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2450 UNSPEC_FIX_NOTRUNC))]
2451 "TARGET_SSE2 && TARGET_64BIT"
2452 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2453 [(set_attr "type" "sseicvt")
2454 (set_attr "athlon_decode" "double,vector")
2455 (set_attr "amdfam10_decode" "double,double")
2456 (set_attr "bdver1_decode" "double,double")
2457 (set_attr "prefix_rep" "1")
2458 (set_attr "prefix" "maybe_vex")
2459 (set_attr "mode" "DI")])
2460
2461(define_insn "sse2_cvttsd2si"
2462 [(set (match_operand:SI 0 "register_operand" "=r,r")
2463 (fix:SI
2464 (vec_select:DF
2465 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2466 (parallel [(const_int 0)]))))]
2467 "TARGET_SSE2"
2468 "%vcvttsd2si\t{%1, %0|%0, %1}"
2469 [(set_attr "type" "sseicvt")
2470 (set_attr "athlon_decode" "double,vector")
2471 (set_attr "amdfam10_decode" "double,double")
2472 (set_attr "bdver1_decode" "double,double")
2473 (set_attr "prefix_rep" "1")
2474 (set_attr "prefix" "maybe_vex")
2475 (set_attr "mode" "SI")])
2476
2477(define_insn "sse2_cvttsd2siq"
2478 [(set (match_operand:DI 0 "register_operand" "=r,r")
2479 (fix:DI
2480 (vec_select:DF
2481 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2482 (parallel [(const_int 0)]))))]
2483 "TARGET_SSE2 && TARGET_64BIT"
2484 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2485 [(set_attr "type" "sseicvt")
2486 (set_attr "athlon_decode" "double,vector")
2487 (set_attr "amdfam10_decode" "double,double")
2488 (set_attr "bdver1_decode" "double,double")
2489 (set_attr "prefix_rep" "1")
2490 (set_attr "prefix" "maybe_vex")
2491 (set_attr "mode" "DI")])
2492
2493(define_insn "floatv4siv4df2"
2494 [(set (match_operand:V4DF 0 "register_operand" "=x")
2495 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2496 "TARGET_AVX"
2497 "vcvtdq2pd\t{%1, %0|%0, %1}"
2498 [(set_attr "type" "ssecvt")
2499 (set_attr "prefix" "vex")
2500 (set_attr "mode" "V4DF")])
2501
2502(define_insn "avx_cvtdq2pd256_2"
2503 [(set (match_operand:V4DF 0 "register_operand" "=x")
2504 (float:V4DF
2505 (vec_select:V4SI
2506 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2507 (parallel [(const_int 0) (const_int 1)
2508 (const_int 2) (const_int 3)]))))]
2509 "TARGET_AVX"
2510 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2511 [(set_attr "type" "ssecvt")
2512 (set_attr "prefix" "vex")
2513 (set_attr "mode" "V4DF")])
2514
2515(define_insn "sse2_cvtdq2pd"
2516 [(set (match_operand:V2DF 0 "register_operand" "=x")
2517 (float:V2DF
2518 (vec_select:V2SI
2519 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2520 (parallel [(const_int 0) (const_int 1)]))))]
2521 "TARGET_SSE2"
2522 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2523 [(set_attr "type" "ssecvt")
2524 (set_attr "prefix" "maybe_vex")
2525 (set_attr "mode" "V2DF")])
2526
2527(define_insn "avx_cvtpd2dq256"
2528 [(set (match_operand:V4SI 0 "register_operand" "=x")
2529 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2530 UNSPEC_FIX_NOTRUNC))]
2531 "TARGET_AVX"
2532 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2533 [(set_attr "type" "ssecvt")
2534 (set_attr "prefix" "vex")
2535 (set_attr "mode" "OI")])
2536
2537(define_expand "avx_cvtpd2dq256_2"
2538 [(set (match_operand:V8SI 0 "register_operand" "")
2539 (vec_concat:V8SI
2540 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2541 UNSPEC_FIX_NOTRUNC)
2542 (match_dup 2)))]
2543 "TARGET_AVX"
2544 "operands[2] = CONST0_RTX (V4SImode);")
2545
2546(define_insn "*avx_cvtpd2dq256_2"
2547 [(set (match_operand:V8SI 0 "register_operand" "=x")
2548 (vec_concat:V8SI
2549 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2550 UNSPEC_FIX_NOTRUNC)
2551 (match_operand:V4SI 2 "const0_operand" "")))]
2552 "TARGET_AVX"
2553 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2554 [(set_attr "type" "ssecvt")
2555 (set_attr "prefix" "vex")
2556 (set_attr "mode" "OI")])
2557
2558(define_expand "sse2_cvtpd2dq"
2559 [(set (match_operand:V4SI 0 "register_operand" "")
2560 (vec_concat:V4SI
2561 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2562 UNSPEC_FIX_NOTRUNC)
2563 (match_dup 2)))]
2564 "TARGET_SSE2"
2565 "operands[2] = CONST0_RTX (V2SImode);")
2566
2567(define_insn "*sse2_cvtpd2dq"
2568 [(set (match_operand:V4SI 0 "register_operand" "=x")
2569 (vec_concat:V4SI
2570 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2571 UNSPEC_FIX_NOTRUNC)
2572 (match_operand:V2SI 2 "const0_operand" "")))]
2573 "TARGET_SSE2"
2574{
2575 if (TARGET_AVX)
2576 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2577 else
2578 return "cvtpd2dq\t{%1, %0|%0, %1}";
2579}
2580 [(set_attr "type" "ssecvt")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix_data16" "0")
2583 (set_attr "prefix" "maybe_vex")
2584 (set_attr "mode" "TI")
2585 (set_attr "amdfam10_decode" "double")
2586 (set_attr "athlon_decode" "vector")
2587 (set_attr "bdver1_decode" "double")])
2588
2589(define_insn "fix_truncv4dfv4si2"
2590 [(set (match_operand:V4SI 0 "register_operand" "=x")
2591 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2592 "TARGET_AVX"
2593 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2594 [(set_attr "type" "ssecvt")
2595 (set_attr "prefix" "vex")
2596 (set_attr "mode" "OI")])
2597
2598(define_expand "avx_cvttpd2dq256_2"
2599 [(set (match_operand:V8SI 0 "register_operand" "")
2600 (vec_concat:V8SI
2601 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2602 (match_dup 2)))]
2603 "TARGET_AVX"
2604 "operands[2] = CONST0_RTX (V4SImode);")
2605
2606(define_insn "*avx_cvttpd2dq256_2"
2607 [(set (match_operand:V8SI 0 "register_operand" "=x")
2608 (vec_concat:V8SI
2609 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2610 (match_operand:V4SI 2 "const0_operand" "")))]
2611 "TARGET_AVX"
2612 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2613 [(set_attr "type" "ssecvt")
2614 (set_attr "prefix" "vex")
2615 (set_attr "mode" "OI")])
2616
2617(define_expand "sse2_cvttpd2dq"
2618 [(set (match_operand:V4SI 0 "register_operand" "")
2619 (vec_concat:V4SI
2620 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2621 (match_dup 2)))]
2622 "TARGET_SSE2"
2623 "operands[2] = CONST0_RTX (V2SImode);")
2624
2625(define_insn "*sse2_cvttpd2dq"
2626 [(set (match_operand:V4SI 0 "register_operand" "=x")
2627 (vec_concat:V4SI
2628 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2629 (match_operand:V2SI 2 "const0_operand" "")))]
2630 "TARGET_SSE2"
2631{
2632 if (TARGET_AVX)
2633 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2634 else
2635 return "cvttpd2dq\t{%1, %0|%0, %1}";
2636}
2637 [(set_attr "type" "ssecvt")
2638 (set_attr "amdfam10_decode" "double")
2639 (set_attr "athlon_decode" "vector")
2640 (set_attr "bdver1_decode" "double")
2641 (set_attr "prefix" "maybe_vex")
2642 (set_attr "mode" "TI")])
2643
2644(define_insn "sse2_cvtsd2ss"
2645 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2646 (vec_merge:V4SF
2647 (vec_duplicate:V4SF
2648 (float_truncate:V2SF
2649 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2650 (match_operand:V4SF 1 "register_operand" "0,0,x")
2651 (const_int 1)))]
2652 "TARGET_SSE2"
2653 "@
2654 cvtsd2ss\t{%2, %0|%0, %2}
2655 cvtsd2ss\t{%2, %0|%0, %2}
2656 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2657 [(set_attr "isa" "noavx,noavx,avx")
2658 (set_attr "type" "ssecvt")
2659 (set_attr "athlon_decode" "vector,double,*")
2660 (set_attr "amdfam10_decode" "vector,double,*")
2661 (set_attr "bdver1_decode" "direct,direct,*")
2662 (set_attr "prefix" "orig,orig,vex")
2663 (set_attr "mode" "SF")])
2664
2665(define_insn "sse2_cvtss2sd"
2666 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2667 (vec_merge:V2DF
2668 (float_extend:V2DF
2669 (vec_select:V2SF
2670 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2671 (parallel [(const_int 0) (const_int 1)])))
2672 (match_operand:V2DF 1 "register_operand" "0,0,x")
2673 (const_int 1)))]
2674 "TARGET_SSE2"
2675 "@
2676 cvtss2sd\t{%2, %0|%0, %2}
2677 cvtss2sd\t{%2, %0|%0, %2}
2678 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2679 [(set_attr "isa" "noavx,noavx,avx")
2680 (set_attr "type" "ssecvt")
2681 (set_attr "amdfam10_decode" "vector,double,*")
2682 (set_attr "athlon_decode" "direct,direct,*")
2683 (set_attr "bdver1_decode" "direct,direct,*")
2684 (set_attr "prefix" "orig,orig,vex")
2685 (set_attr "mode" "DF")])
2686
2687(define_insn "avx_cvtpd2ps256"
2688 [(set (match_operand:V4SF 0 "register_operand" "=x")
2689 (float_truncate:V4SF
2690 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2691 "TARGET_AVX"
2692 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2693 [(set_attr "type" "ssecvt")
2694 (set_attr "prefix" "vex")
2695 (set_attr "mode" "V4SF")])
2696
2697(define_expand "sse2_cvtpd2ps"
2698 [(set (match_operand:V4SF 0 "register_operand" "")
2699 (vec_concat:V4SF
2700 (float_truncate:V2SF
2701 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2702 (match_dup 2)))]
2703 "TARGET_SSE2"
2704 "operands[2] = CONST0_RTX (V2SFmode);")
2705
2706(define_insn "*sse2_cvtpd2ps"
2707 [(set (match_operand:V4SF 0 "register_operand" "=x")
2708 (vec_concat:V4SF
2709 (float_truncate:V2SF
2710 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2711 (match_operand:V2SF 2 "const0_operand" "")))]
2712 "TARGET_SSE2"
2713{
2714 if (TARGET_AVX)
2715 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2716 else
2717 return "cvtpd2ps\t{%1, %0|%0, %1}";
2718}
2719 [(set_attr "type" "ssecvt")
2720 (set_attr "amdfam10_decode" "double")
2721 (set_attr "athlon_decode" "vector")
2722 (set_attr "bdver1_decode" "double")
2723 (set_attr "prefix_data16" "1")
2724 (set_attr "prefix" "maybe_vex")
2725 (set_attr "mode" "V4SF")])
2726
2727(define_insn "avx_cvtps2pd256"
2728 [(set (match_operand:V4DF 0 "register_operand" "=x")
2729 (float_extend:V4DF
2730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2731 "TARGET_AVX"
2732 "vcvtps2pd\t{%1, %0|%0, %1}"
2733 [(set_attr "type" "ssecvt")
2734 (set_attr "prefix" "vex")
2735 (set_attr "mode" "V4DF")])
2736
2737(define_insn "*avx_cvtps2pd256_2"
2738 [(set (match_operand:V4DF 0 "register_operand" "=x")
2739 (float_extend:V4DF
2740 (vec_select:V4SF
2741 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2742 (parallel [(const_int 0) (const_int 1)
2743 (const_int 2) (const_int 3)]))))]
2744 "TARGET_AVX"
2745 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2746 [(set_attr "type" "ssecvt")
2747 (set_attr "prefix" "vex")
2748 (set_attr "mode" "V4DF")])
2749
2750(define_insn "sse2_cvtps2pd"
2751 [(set (match_operand:V2DF 0 "register_operand" "=x")
2752 (float_extend:V2DF
2753 (vec_select:V2SF
2754 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2755 (parallel [(const_int 0) (const_int 1)]))))]
2756 "TARGET_SSE2"
2757 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2758 [(set_attr "type" "ssecvt")
2759 (set_attr "amdfam10_decode" "direct")
2760 (set_attr "athlon_decode" "double")
2761 (set_attr "bdver1_decode" "double")
2762 (set_attr "prefix_data16" "0")
2763 (set_attr "prefix" "maybe_vex")
2764 (set_attr "mode" "V2DF")])
2765
2766(define_expand "vec_unpacks_hi_v4sf"
2767 [(set (match_dup 2)
2768 (vec_select:V4SF
2769 (vec_concat:V8SF
2770 (match_dup 2)
2771 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2772 (parallel [(const_int 6) (const_int 7)
2773 (const_int 2) (const_int 3)])))
2774 (set (match_operand:V2DF 0 "register_operand" "")
2775 (float_extend:V2DF
2776 (vec_select:V2SF
2777 (match_dup 2)
2778 (parallel [(const_int 0) (const_int 1)]))))]
2779 "TARGET_SSE2"
2780 "operands[2] = gen_reg_rtx (V4SFmode);")
2781
2782(define_expand "vec_unpacks_hi_v8sf"
2783 [(set (match_dup 2)
2784 (vec_select:V4SF
2785 (match_operand:V8SF 1 "nonimmediate_operand" "")
2786 (parallel [(const_int 4) (const_int 5)
2787 (const_int 6) (const_int 7)])))
2788 (set (match_operand:V4DF 0 "register_operand" "")
2789 (float_extend:V4DF
2790 (match_dup 2)))]
2791 "TARGET_AVX"
2792 "operands[2] = gen_reg_rtx (V4SFmode);")
2793
2794(define_expand "vec_unpacks_lo_v4sf"
2795 [(set (match_operand:V2DF 0 "register_operand" "")
2796 (float_extend:V2DF
2797 (vec_select:V2SF
2798 (match_operand:V4SF 1 "nonimmediate_operand" "")
2799 (parallel [(const_int 0) (const_int 1)]))))]
2800 "TARGET_SSE2")
2801
2802(define_expand "vec_unpacks_lo_v8sf"
2803 [(set (match_operand:V4DF 0 "register_operand" "")
2804 (float_extend:V4DF
2805 (vec_select:V4SF
2806 (match_operand:V8SF 1 "nonimmediate_operand" "")
2807 (parallel [(const_int 0) (const_int 1)
2808 (const_int 2) (const_int 3)]))))]
2809 "TARGET_AVX")
2810
2811(define_mode_attr sseunpackfltmode
2812 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2813
2814(define_expand "vec_unpacks_float_hi_<mode>"
2815 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2816 (match_operand:VI2_AVX2 1 "register_operand" "")]
2817 "TARGET_SSE2"
2818{
2819 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2820
2821 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2822 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2823 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2824 DONE;
2825})
2826
2827(define_expand "vec_unpacks_float_lo_<mode>"
2828 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2829 (match_operand:VI2_AVX2 1 "register_operand" "")]
2830 "TARGET_SSE2"
2831{
2832 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2833
2834 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2835 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2836 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2837 DONE;
2838})
2839
2840(define_expand "vec_unpacku_float_hi_<mode>"
2841 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2842 (match_operand:VI2_AVX2 1 "register_operand" "")]
2843 "TARGET_SSE2"
2844{
2845 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2846
2847 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2848 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2849 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2850 DONE;
2851})
2852
2853(define_expand "vec_unpacku_float_lo_<mode>"
2854 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2855 (match_operand:VI2_AVX2 1 "register_operand" "")]
2856 "TARGET_SSE2"
2857{
2858 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2859
2860 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2861 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2862 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2863 DONE;
2864})
2865
2866(define_expand "vec_unpacks_float_hi_v4si"
2867 [(set (match_dup 2)
2868 (vec_select:V4SI
2869 (match_operand:V4SI 1 "nonimmediate_operand" "")
2870 (parallel [(const_int 2) (const_int 3)
2871 (const_int 2) (const_int 3)])))
2872 (set (match_operand:V2DF 0 "register_operand" "")
2873 (float:V2DF
2874 (vec_select:V2SI
2875 (match_dup 2)
2876 (parallel [(const_int 0) (const_int 1)]))))]
2877 "TARGET_SSE2"
2878 "operands[2] = gen_reg_rtx (V4SImode);")
2879
2880(define_expand "vec_unpacks_float_lo_v4si"
2881 [(set (match_operand:V2DF 0 "register_operand" "")
2882 (float:V2DF
2883 (vec_select:V2SI
2884 (match_operand:V4SI 1 "nonimmediate_operand" "")
2885 (parallel [(const_int 0) (const_int 1)]))))]
2886 "TARGET_SSE2")
2887
2888(define_expand "vec_unpacks_float_hi_v8si"
2889 [(set (match_dup 2)
2890 (vec_select:V4SI
2891 (match_operand:V8SI 1 "nonimmediate_operand" "")
2892 (parallel [(const_int 4) (const_int 5)
2893 (const_int 6) (const_int 7)])))
2894 (set (match_operand:V4DF 0 "register_operand" "")
2895 (float:V4DF
2896 (match_dup 2)))]
2897 "TARGET_AVX"
2898 "operands[2] = gen_reg_rtx (V4SImode);")
2899
2900(define_expand "vec_unpacks_float_lo_v8si"
2901 [(set (match_operand:V4DF 0 "register_operand" "")
2902 (float:V4DF
2903 (vec_select:V4SI
2904 (match_operand:V8SI 1 "nonimmediate_operand" "")
2905 (parallel [(const_int 0) (const_int 1)
2906 (const_int 2) (const_int 3)]))))]
2907 "TARGET_AVX")
2908
2909(define_expand "vec_unpacku_float_hi_v4si"
2910 [(set (match_dup 5)
2911 (vec_select:V4SI
2912 (match_operand:V4SI 1 "nonimmediate_operand" "")
2913 (parallel [(const_int 2) (const_int 3)
2914 (const_int 2) (const_int 3)])))
2915 (set (match_dup 6)
2916 (float:V2DF
2917 (vec_select:V2SI
2918 (match_dup 5)
2919 (parallel [(const_int 0) (const_int 1)]))))
2920 (set (match_dup 7)
2921 (lt:V2DF (match_dup 6) (match_dup 3)))
2922 (set (match_dup 8)
2923 (and:V2DF (match_dup 7) (match_dup 4)))
2924 (set (match_operand:V2DF 0 "register_operand" "")
2925 (plus:V2DF (match_dup 6) (match_dup 8)))]
2926 "TARGET_SSE2"
2927{
2928 REAL_VALUE_TYPE TWO32r;
2929 rtx x;
2930 int i;
2931
2932 real_ldexp (&TWO32r, &dconst1, 32);
2933 x = const_double_from_real_value (TWO32r, DFmode);
2934
2935 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2936 operands[4] = force_reg (V2DFmode,
2937 ix86_build_const_vector (V2DFmode, 1, x));
2938
2939 operands[5] = gen_reg_rtx (V4SImode);
2940
2941 for (i = 6; i < 9; i++)
2942 operands[i] = gen_reg_rtx (V2DFmode);
2943})
2944
2945(define_expand "vec_unpacku_float_lo_v4si"
2946 [(set (match_dup 5)
2947 (float:V2DF
2948 (vec_select:V2SI
2949 (match_operand:V4SI 1 "nonimmediate_operand" "")
2950 (parallel [(const_int 0) (const_int 1)]))))
2951 (set (match_dup 6)
2952 (lt:V2DF (match_dup 5) (match_dup 3)))
2953 (set (match_dup 7)
2954 (and:V2DF (match_dup 6) (match_dup 4)))
2955 (set (match_operand:V2DF 0 "register_operand" "")
2956 (plus:V2DF (match_dup 5) (match_dup 7)))]
2957 "TARGET_SSE2"
2958{
2959 REAL_VALUE_TYPE TWO32r;
2960 rtx x;
2961 int i;
2962
2963 real_ldexp (&TWO32r, &dconst1, 32);
2964 x = const_double_from_real_value (TWO32r, DFmode);
2965
2966 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2967 operands[4] = force_reg (V2DFmode,
2968 ix86_build_const_vector (V2DFmode, 1, x));
2969
2970 for (i = 5; i < 8; i++)
2971 operands[i] = gen_reg_rtx (V2DFmode);
2972})
2973
2974(define_expand "vec_unpacku_float_hi_v8si"
2975 [(match_operand:V4DF 0 "register_operand" "")
2976 (match_operand:V8SI 1 "register_operand" "")]
2977 "TARGET_AVX"
2978{
2979 REAL_VALUE_TYPE TWO32r;
2980 rtx x, tmp[6];
2981 int i;
2982
2983 real_ldexp (&TWO32r, &dconst1, 32);
2984 x = const_double_from_real_value (TWO32r, DFmode);
2985
2986 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2987 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2988 tmp[5] = gen_reg_rtx (V4SImode);
2989
2990 for (i = 2; i < 5; i++)
2991 tmp[i] = gen_reg_rtx (V4DFmode);
2992 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2993 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2994 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2995 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2996 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2997 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2998 DONE;
2999})
3000
3001(define_expand "vec_unpacku_float_lo_v8si"
3002 [(match_operand:V4DF 0 "register_operand" "")
3003 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3004 "TARGET_AVX"
3005{
3006 REAL_VALUE_TYPE TWO32r;
3007 rtx x, tmp[5];
3008 int i;
3009
3010 real_ldexp (&TWO32r, &dconst1, 32);
3011 x = const_double_from_real_value (TWO32r, DFmode);
3012
3013 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3014 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3015
3016 for (i = 2; i < 5; i++)
3017 tmp[i] = gen_reg_rtx (V4DFmode);
3018 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3019 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3020 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3021 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3022 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3023 DONE;
3024})
3025
3026(define_expand "vec_pack_trunc_v4df"
3027 [(set (match_dup 3)
3028 (float_truncate:V4SF
3029 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3030 (set (match_dup 4)
3031 (float_truncate:V4SF
3032 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3033 (set (match_operand:V8SF 0 "register_operand" "")
3034 (vec_concat:V8SF
3035 (match_dup 3)
3036 (match_dup 4)))]
3037 "TARGET_AVX"
3038{
3039 operands[3] = gen_reg_rtx (V4SFmode);
3040 operands[4] = gen_reg_rtx (V4SFmode);
3041})
3042
3043(define_expand "vec_pack_trunc_v2df"
3044 [(match_operand:V4SF 0 "register_operand" "")
3045 (match_operand:V2DF 1 "nonimmediate_operand" "")
3046 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3047 "TARGET_SSE2"
3048{
3049 rtx tmp0, tmp1;
3050
3051 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3052 {
3053 tmp0 = gen_reg_rtx (V4DFmode);
3054 tmp1 = force_reg (V2DFmode, operands[1]);
3055
3056 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3057 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3058 }
3059 else
3060 {
3061 tmp0 = gen_reg_rtx (V4SFmode);
3062 tmp1 = gen_reg_rtx (V4SFmode);
3063
3064 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3065 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3066 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3067 }
3068 DONE;
3069})
3070
3071(define_expand "vec_pack_sfix_trunc_v4df"
3072 [(match_operand:V8SI 0 "register_operand" "")
3073 (match_operand:V4DF 1 "nonimmediate_operand" "")
3074 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3075 "TARGET_AVX"
3076{
3077 rtx r1, r2;
3078
3079 r1 = gen_reg_rtx (V4SImode);
3080 r2 = gen_reg_rtx (V4SImode);
3081
3082 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3083 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3084 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3085 DONE;
3086})
3087
3088(define_expand "vec_pack_sfix_trunc_v2df"
3089 [(match_operand:V4SI 0 "register_operand" "")
3090 (match_operand:V2DF 1 "nonimmediate_operand" "")
3091 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3092 "TARGET_SSE2"
3093{
3094 rtx tmp0, tmp1;
3095
3096 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3097 {
3098 tmp0 = gen_reg_rtx (V4DFmode);
3099 tmp1 = force_reg (V2DFmode, operands[1]);
3100
3101 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3102 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3103 }
3104 else
3105 {
3106 tmp0 = gen_reg_rtx (V4SImode);
3107 tmp1 = gen_reg_rtx (V4SImode);
3108
3109 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3110 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3111 emit_insn
3112 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3113 gen_lowpart (V2DImode, tmp0),
3114 gen_lowpart (V2DImode, tmp1)));
3115 }
3116 DONE;
3117})
3118
3119(define_mode_attr ssepackfltmode
3120 [(V4DF "V8SI") (V2DF "V4SI")])
3121
3122(define_expand "vec_pack_ufix_trunc_<mode>"
3123 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3124 (match_operand:VF2 1 "register_operand" "")
3125 (match_operand:VF2 2 "register_operand" "")]
3126 "TARGET_SSE2"
3127{
3128 rtx tmp[7];
3129 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3130 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3131 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3132 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3133 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3134 {
3135 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3136 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3137 }
3138 else
3139 {
3140 tmp[5] = gen_reg_rtx (V8SFmode);
3141 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3142 gen_lowpart (V8SFmode, tmp[3]), 0);
3143 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3144 }
3145 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3146 operands[0], 0, OPTAB_DIRECT);
3147 if (tmp[6] != operands[0])
3148 emit_move_insn (operands[0], tmp[6]);
3149 DONE;
3150})
3151
3152(define_expand "vec_pack_sfix_v4df"
3153 [(match_operand:V8SI 0 "register_operand" "")
3154 (match_operand:V4DF 1 "nonimmediate_operand" "")
3155 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3156 "TARGET_AVX"
3157{
3158 rtx r1, r2;
3159
3160 r1 = gen_reg_rtx (V4SImode);
3161 r2 = gen_reg_rtx (V4SImode);
3162
3163 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3164 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3165 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3166 DONE;
3167})
3168
3169(define_expand "vec_pack_sfix_v2df"
3170 [(match_operand:V4SI 0 "register_operand" "")
3171 (match_operand:V2DF 1 "nonimmediate_operand" "")
3172 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3173 "TARGET_SSE2"
3174{
3175 rtx tmp0, tmp1;
3176
3177 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3178 {
3179 tmp0 = gen_reg_rtx (V4DFmode);
3180 tmp1 = force_reg (V2DFmode, operands[1]);
3181
3182 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3183 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3184 }
3185 else
3186 {
3187 tmp0 = gen_reg_rtx (V4SImode);
3188 tmp1 = gen_reg_rtx (V4SImode);
3189
3190 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3191 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3192 emit_insn
3193 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3194 gen_lowpart (V2DImode, tmp0),
3195 gen_lowpart (V2DImode, tmp1)));
3196 }
3197 DONE;
3198})
3199
3200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3201;;
3202;; Parallel single-precision floating point element swizzling
3203;;
3204;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3205
3206(define_expand "sse_movhlps_exp"
3207 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3208 (vec_select:V4SF
3209 (vec_concat:V8SF
3210 (match_operand:V4SF 1 "nonimmediate_operand" "")
3211 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3212 (parallel [(const_int 6)
3213 (const_int 7)
3214 (const_int 2)
3215 (const_int 3)])))]
3216 "TARGET_SSE"
3217{
3218 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3219
3220 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3221
3222 /* Fix up the destination if needed. */
3223 if (dst != operands[0])
3224 emit_move_insn (operands[0], dst);
3225
3226 DONE;
3227})
3228
3229(define_insn "sse_movhlps"
3230 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3231 (vec_select:V4SF
3232 (vec_concat:V8SF
3233 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3234 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3235 (parallel [(const_int 6)
3236 (const_int 7)
3237 (const_int 2)
3238 (const_int 3)])))]
3239 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3240 "@
3241 movhlps\t{%2, %0|%0, %2}
3242 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3243 movlps\t{%H2, %0|%0, %H2}
3244 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3245 %vmovhps\t{%2, %0|%0, %2}"
3246 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3247 (set_attr "type" "ssemov")
3248 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3249 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3250
3251(define_expand "sse_movlhps_exp"
3252 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3253 (vec_select:V4SF
3254 (vec_concat:V8SF
3255 (match_operand:V4SF 1 "nonimmediate_operand" "")
3256 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3257 (parallel [(const_int 0)
3258 (const_int 1)
3259 (const_int 4)
3260 (const_int 5)])))]
3261 "TARGET_SSE"
3262{
3263 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3264
3265 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3266
3267 /* Fix up the destination if needed. */
3268 if (dst != operands[0])
3269 emit_move_insn (operands[0], dst);
3270
3271 DONE;
3272})
3273
3274(define_insn "sse_movlhps"
3275 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3276 (vec_select:V4SF
3277 (vec_concat:V8SF
3278 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
95d28233 3279 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
e4b17023
JM
3280 (parallel [(const_int 0)
3281 (const_int 1)
3282 (const_int 4)
3283 (const_int 5)])))]
3284 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3285 "@
3286 movlhps\t{%2, %0|%0, %2}
3287 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3288 movhps\t{%2, %0|%0, %2}
3289 vmovhps\t{%2, %1, %0|%0, %1, %2}
3290 %vmovlps\t{%2, %H0|%H0, %2}"
3291 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3292 (set_attr "type" "ssemov")
3293 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3294 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3295
3296;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3297(define_insn "avx_unpckhps256"
3298 [(set (match_operand:V8SF 0 "register_operand" "=x")
3299 (vec_select:V8SF
3300 (vec_concat:V16SF
3301 (match_operand:V8SF 1 "register_operand" "x")
3302 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3303 (parallel [(const_int 2) (const_int 10)
3304 (const_int 3) (const_int 11)
3305 (const_int 6) (const_int 14)
3306 (const_int 7) (const_int 15)])))]
3307 "TARGET_AVX"
3308 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3309 [(set_attr "type" "sselog")
3310 (set_attr "prefix" "vex")
3311 (set_attr "mode" "V8SF")])
3312
3313(define_expand "vec_interleave_highv8sf"
3314 [(set (match_dup 3)
3315 (vec_select:V8SF
3316 (vec_concat:V16SF
3317 (match_operand:V8SF 1 "register_operand" "x")
3318 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3319 (parallel [(const_int 0) (const_int 8)
3320 (const_int 1) (const_int 9)
3321 (const_int 4) (const_int 12)
3322 (const_int 5) (const_int 13)])))
3323 (set (match_dup 4)
3324 (vec_select:V8SF
3325 (vec_concat:V16SF
3326 (match_dup 1)
3327 (match_dup 2))
3328 (parallel [(const_int 2) (const_int 10)
3329 (const_int 3) (const_int 11)
3330 (const_int 6) (const_int 14)
3331 (const_int 7) (const_int 15)])))
3332 (set (match_operand:V8SF 0 "register_operand" "")
3333 (vec_select:V8SF
3334 (vec_concat:V16SF
3335 (match_dup 3)
3336 (match_dup 4))
3337 (parallel [(const_int 4) (const_int 5)
3338 (const_int 6) (const_int 7)
3339 (const_int 12) (const_int 13)
3340 (const_int 14) (const_int 15)])))]
3341 "TARGET_AVX"
3342{
3343 operands[3] = gen_reg_rtx (V8SFmode);
3344 operands[4] = gen_reg_rtx (V8SFmode);
3345})
3346
3347(define_insn "vec_interleave_highv4sf"
3348 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3349 (vec_select:V4SF
3350 (vec_concat:V8SF
3351 (match_operand:V4SF 1 "register_operand" "0,x")
3352 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3353 (parallel [(const_int 2) (const_int 6)
3354 (const_int 3) (const_int 7)])))]
3355 "TARGET_SSE"
3356 "@
3357 unpckhps\t{%2, %0|%0, %2}
3358 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3359 [(set_attr "isa" "noavx,avx")
3360 (set_attr "type" "sselog")
3361 (set_attr "prefix" "orig,vex")
3362 (set_attr "mode" "V4SF")])
3363
3364;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3365(define_insn "avx_unpcklps256"
3366 [(set (match_operand:V8SF 0 "register_operand" "=x")
3367 (vec_select:V8SF
3368 (vec_concat:V16SF
3369 (match_operand:V8SF 1 "register_operand" "x")
3370 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3371 (parallel [(const_int 0) (const_int 8)
3372 (const_int 1) (const_int 9)
3373 (const_int 4) (const_int 12)
3374 (const_int 5) (const_int 13)])))]
3375 "TARGET_AVX"
3376 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3377 [(set_attr "type" "sselog")
3378 (set_attr "prefix" "vex")
3379 (set_attr "mode" "V8SF")])
3380
3381(define_expand "vec_interleave_lowv8sf"
3382 [(set (match_dup 3)
3383 (vec_select:V8SF
3384 (vec_concat:V16SF
3385 (match_operand:V8SF 1 "register_operand" "x")
3386 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3387 (parallel [(const_int 0) (const_int 8)
3388 (const_int 1) (const_int 9)
3389 (const_int 4) (const_int 12)
3390 (const_int 5) (const_int 13)])))
3391 (set (match_dup 4)
3392 (vec_select:V8SF
3393 (vec_concat:V16SF
3394 (match_dup 1)
3395 (match_dup 2))
3396 (parallel [(const_int 2) (const_int 10)
3397 (const_int 3) (const_int 11)
3398 (const_int 6) (const_int 14)
3399 (const_int 7) (const_int 15)])))
3400 (set (match_operand:V8SF 0 "register_operand" "")
3401 (vec_select:V8SF
3402 (vec_concat:V16SF
3403 (match_dup 3)
3404 (match_dup 4))
3405 (parallel [(const_int 0) (const_int 1)
3406 (const_int 2) (const_int 3)
3407 (const_int 8) (const_int 9)
3408 (const_int 10) (const_int 11)])))]
3409 "TARGET_AVX"
3410{
3411 operands[3] = gen_reg_rtx (V8SFmode);
3412 operands[4] = gen_reg_rtx (V8SFmode);
3413})
3414
3415(define_insn "vec_interleave_lowv4sf"
3416 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3417 (vec_select:V4SF
3418 (vec_concat:V8SF
3419 (match_operand:V4SF 1 "register_operand" "0,x")
3420 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3421 (parallel [(const_int 0) (const_int 4)
3422 (const_int 1) (const_int 5)])))]
3423 "TARGET_SSE"
3424 "@
3425 unpcklps\t{%2, %0|%0, %2}
3426 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3427 [(set_attr "isa" "noavx,avx")
3428 (set_attr "type" "sselog")
3429 (set_attr "prefix" "orig,vex")
3430 (set_attr "mode" "V4SF")])
3431
3432;; These are modeled with the same vec_concat as the others so that we
3433;; capture users of shufps that can use the new instructions
3434(define_insn "avx_movshdup256"
3435 [(set (match_operand:V8SF 0 "register_operand" "=x")
3436 (vec_select:V8SF
3437 (vec_concat:V16SF
3438 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3439 (match_dup 1))
3440 (parallel [(const_int 1) (const_int 1)
3441 (const_int 3) (const_int 3)
3442 (const_int 5) (const_int 5)
3443 (const_int 7) (const_int 7)])))]
3444 "TARGET_AVX"
3445 "vmovshdup\t{%1, %0|%0, %1}"
3446 [(set_attr "type" "sse")
3447 (set_attr "prefix" "vex")
3448 (set_attr "mode" "V8SF")])
3449
3450(define_insn "sse3_movshdup"
3451 [(set (match_operand:V4SF 0 "register_operand" "=x")
3452 (vec_select:V4SF
3453 (vec_concat:V8SF
3454 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3455 (match_dup 1))
3456 (parallel [(const_int 1)
3457 (const_int 1)
3458 (const_int 7)
3459 (const_int 7)])))]
3460 "TARGET_SSE3"
3461 "%vmovshdup\t{%1, %0|%0, %1}"
3462 [(set_attr "type" "sse")
3463 (set_attr "prefix_rep" "1")
3464 (set_attr "prefix" "maybe_vex")
3465 (set_attr "mode" "V4SF")])
3466
3467(define_insn "avx_movsldup256"
3468 [(set (match_operand:V8SF 0 "register_operand" "=x")
3469 (vec_select:V8SF
3470 (vec_concat:V16SF
3471 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3472 (match_dup 1))
3473 (parallel [(const_int 0) (const_int 0)
3474 (const_int 2) (const_int 2)
3475 (const_int 4) (const_int 4)
3476 (const_int 6) (const_int 6)])))]
3477 "TARGET_AVX"
3478 "vmovsldup\t{%1, %0|%0, %1}"
3479 [(set_attr "type" "sse")
3480 (set_attr "prefix" "vex")
3481 (set_attr "mode" "V8SF")])
3482
3483(define_insn "sse3_movsldup"
3484 [(set (match_operand:V4SF 0 "register_operand" "=x")
3485 (vec_select:V4SF
3486 (vec_concat:V8SF
3487 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3488 (match_dup 1))
3489 (parallel [(const_int 0)
3490 (const_int 0)
3491 (const_int 6)
3492 (const_int 6)])))]
3493 "TARGET_SSE3"
3494 "%vmovsldup\t{%1, %0|%0, %1}"
3495 [(set_attr "type" "sse")
3496 (set_attr "prefix_rep" "1")
3497 (set_attr "prefix" "maybe_vex")
3498 (set_attr "mode" "V4SF")])
3499
3500(define_expand "avx_shufps256"
3501 [(match_operand:V8SF 0 "register_operand" "")
3502 (match_operand:V8SF 1 "register_operand" "")
3503 (match_operand:V8SF 2 "nonimmediate_operand" "")
3504 (match_operand:SI 3 "const_int_operand" "")]
3505 "TARGET_AVX"
3506{
3507 int mask = INTVAL (operands[3]);
3508 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3509 GEN_INT ((mask >> 0) & 3),
3510 GEN_INT ((mask >> 2) & 3),
3511 GEN_INT (((mask >> 4) & 3) + 8),
3512 GEN_INT (((mask >> 6) & 3) + 8),
3513 GEN_INT (((mask >> 0) & 3) + 4),
3514 GEN_INT (((mask >> 2) & 3) + 4),
3515 GEN_INT (((mask >> 4) & 3) + 12),
3516 GEN_INT (((mask >> 6) & 3) + 12)));
3517 DONE;
3518})
3519
3520;; One bit in mask selects 2 elements.
3521(define_insn "avx_shufps256_1"
3522 [(set (match_operand:V8SF 0 "register_operand" "=x")
3523 (vec_select:V8SF
3524 (vec_concat:V16SF
3525 (match_operand:V8SF 1 "register_operand" "x")
3526 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3527 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3528 (match_operand 4 "const_0_to_3_operand" "")
3529 (match_operand 5 "const_8_to_11_operand" "")
3530 (match_operand 6 "const_8_to_11_operand" "")
3531 (match_operand 7 "const_4_to_7_operand" "")
3532 (match_operand 8 "const_4_to_7_operand" "")
3533 (match_operand 9 "const_12_to_15_operand" "")
3534 (match_operand 10 "const_12_to_15_operand" "")])))]
3535 "TARGET_AVX
3536 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3537 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3538 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3539 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3540{
3541 int mask;
3542 mask = INTVAL (operands[3]);
3543 mask |= INTVAL (operands[4]) << 2;
3544 mask |= (INTVAL (operands[5]) - 8) << 4;
3545 mask |= (INTVAL (operands[6]) - 8) << 6;
3546 operands[3] = GEN_INT (mask);
3547
3548 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3549}
3550 [(set_attr "type" "sselog")
3551 (set_attr "length_immediate" "1")
3552 (set_attr "prefix" "vex")
3553 (set_attr "mode" "V8SF")])
3554
3555(define_expand "sse_shufps"
3556 [(match_operand:V4SF 0 "register_operand" "")
3557 (match_operand:V4SF 1 "register_operand" "")
3558 (match_operand:V4SF 2 "nonimmediate_operand" "")
3559 (match_operand:SI 3 "const_int_operand" "")]
3560 "TARGET_SSE"
356