1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
132 ;; For AVX512BW support
140 ;; For AVX512DQ support
145 ;; For AVX512IFMA support
149 ;; For AVX512VBMI support
153 (define_c_enum "unspecv" [
163 ;; All vector modes including V?TImode, used in move patterns.
164 (define_mode_iterator VMOVE
165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
173 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174 (define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
180 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181 (define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
185 (define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
189 (define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
197 ;; All 128bit vector modes
198 (define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
201 ;; All 256bit vector modes
202 (define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
205 ;; All 512bit vector modes
206 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
208 ;; All 256bit and 512bit vector modes
209 (define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
214 ;; All vector float modes
215 (define_mode_iterator VF
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
219 ;; 128- and 256-bit float vector modes
220 (define_mode_iterator VF_128_256
221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
224 ;; All SFmode vector float modes
225 (define_mode_iterator VF1
226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
228 ;; 128- and 256-bit SF vector modes
229 (define_mode_iterator VF1_128_256
230 [(V8SF "TARGET_AVX") V4SF])
232 (define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
235 ;; All DFmode vector float modes
236 (define_mode_iterator VF2
237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
239 ;; 128- and 256-bit DF vector modes
240 (define_mode_iterator VF2_128_256
241 [(V4DF "TARGET_AVX") V2DF])
243 (define_mode_iterator VF2_512_256
244 [(V8DF "TARGET_AVX512F") V4DF])
246 (define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
249 ;; All 128bit vector float modes
250 (define_mode_iterator VF_128
251 [V4SF (V2DF "TARGET_SSE2")])
253 ;; All 256bit vector float modes
254 (define_mode_iterator VF_256
257 ;; All 512bit vector float modes
258 (define_mode_iterator VF_512
261 (define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
265 (define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
269 (define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
272 (define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
275 ;; All vector integer modes
276 (define_mode_iterator VI
277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
283 (define_mode_iterator VI_AVX2
284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
289 ;; All QImode vector integer modes
290 (define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
293 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
297 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
301 ;; All DImode vector integer modes
302 (define_mode_iterator VI8
303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
305 (define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
308 (define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
311 (define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
314 (define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
317 (define_mode_iterator VI2_AVX2
318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
320 (define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
323 (define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
326 (define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
329 (define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
332 (define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
335 (define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
339 (define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
342 (define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
345 (define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
348 (define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
351 (define_mode_iterator VI4_128_8_256
355 (define_mode_iterator V8FI
359 (define_mode_iterator V16FI
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
374 (define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
378 (define_mode_iterator VI124_AVX512F
379 [(V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
388 (define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
391 (define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
393 (V4DI "TARGET_AVX2") V2DI])
395 (define_mode_iterator VI248_AVX2_8_AVX512F
396 [(V16HI "TARGET_AVX2") V8HI
397 (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
400 (define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
404 ;; Suppose TARGET_AVX512VL as baseline
405 (define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
409 (define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
413 (define_mode_iterator V48_AVX2
416 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
417 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
419 (define_mode_attr avx512
420 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
421 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
422 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
423 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
424 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
425 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
427 (define_mode_attr sse2_avx_avx512f
428 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
430 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
432 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
433 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
435 (define_mode_attr sse2_avx2
436 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
437 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
438 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
439 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
440 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
442 (define_mode_attr ssse3_avx2
443 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
444 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
445 (V4SI "ssse3") (V8SI "avx2")
446 (V2DI "ssse3") (V4DI "avx2")
447 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
449 (define_mode_attr sse4_1_avx2
450 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
452 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
453 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
455 (define_mode_attr avx_avx2
456 [(V4SF "avx") (V2DF "avx")
457 (V8SF "avx") (V4DF "avx")
458 (V4SI "avx2") (V2DI "avx2")
459 (V8SI "avx2") (V4DI "avx2")])
461 (define_mode_attr vec_avx2
462 [(V16QI "vec") (V32QI "avx2")
463 (V8HI "vec") (V16HI "avx2")
464 (V4SI "vec") (V8SI "avx2")
465 (V2DI "vec") (V4DI "avx2")])
467 (define_mode_attr avx2_avx512
468 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
469 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
470 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
471 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
472 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
474 (define_mode_attr shuffletype
475 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
476 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
477 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
478 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
479 (V64QI "i") (V1TI "i") (V2TI "i")])
481 (define_mode_attr ssequartermode
482 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
484 (define_mode_attr ssedoublemodelower
485 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
486 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
487 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
489 (define_mode_attr ssedoublemode
490 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
491 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
492 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
493 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
495 (define_mode_attr ssebytemode
496 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
498 ;; All 128bit vector integer modes
499 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
501 ;; All 256bit vector integer modes
502 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
504 ;; All 512bit vector integer modes
505 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
507 ;; Various 128bit vector integer mode combinations
508 (define_mode_iterator VI12_128 [V16QI V8HI])
509 (define_mode_iterator VI14_128 [V16QI V4SI])
510 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
511 (define_mode_iterator VI24_128 [V8HI V4SI])
512 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
513 (define_mode_iterator VI48_128 [V4SI V2DI])
515 ;; Various 256bit and 512 vector integer mode combinations
516 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
517 (define_mode_iterator VI124_256_AVX512F_AVX512BW
519 (V64QI "TARGET_AVX512BW")
520 (V32HI "TARGET_AVX512BW")
521 (V16SI "TARGET_AVX512F")])
522 (define_mode_iterator VI48_256 [V8SI V4DI])
523 (define_mode_iterator VI48_512 [V16SI V8DI])
524 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
525 (define_mode_iterator VI_AVX512BW
526 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
528 ;; Int-float size matches
529 (define_mode_iterator VI4F_128 [V4SI V4SF])
530 (define_mode_iterator VI8F_128 [V2DI V2DF])
531 (define_mode_iterator VI4F_256 [V8SI V8SF])
532 (define_mode_iterator VI8F_256 [V4DI V4DF])
533 (define_mode_iterator VI8F_256_512
534 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
535 (define_mode_iterator VI48F_256_512
537 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
538 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
539 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
540 (define_mode_iterator VF48_I1248
541 [V16SI V16SF V8DI V8DF V32HI V64QI])
542 (define_mode_iterator VI48F
543 [V16SI V16SF V8DI V8DF
544 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
545 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
546 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
547 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
548 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
550 ;; Mapping from float mode to required SSE level
551 (define_mode_attr sse
552 [(SF "sse") (DF "sse2")
553 (V4SF "sse") (V2DF "sse2")
554 (V16SF "avx512f") (V8SF "avx")
555 (V8DF "avx512f") (V4DF "avx")])
557 (define_mode_attr sse2
558 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
559 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
561 (define_mode_attr sse3
562 [(V16QI "sse3") (V32QI "avx")])
564 (define_mode_attr sse4_1
565 [(V4SF "sse4_1") (V2DF "sse4_1")
566 (V8SF "avx") (V4DF "avx")
569 (define_mode_attr avxsizesuffix
570 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
571 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
572 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
573 (V16SF "512") (V8DF "512")
574 (V8SF "256") (V4DF "256")
575 (V4SF "") (V2DF "")])
577 ;; SSE instruction mode
578 (define_mode_attr sseinsnmode
579 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
580 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
581 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
582 (V16SF "V16SF") (V8DF "V8DF")
583 (V8SF "V8SF") (V4DF "V4DF")
584 (V4SF "V4SF") (V2DF "V2DF")
587 ;; Mapping of vector modes to corresponding mask size
588 (define_mode_attr avx512fmaskmode
589 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
590 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
591 (V16SI "HI") (V8SI "QI") (V4SI "QI")
592 (V8DI "QI") (V4DI "QI") (V2DI "QI")
593 (V16SF "HI") (V8SF "QI") (V4SF "QI")
594 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
596 ;; Mapping of vector float modes to an integer mode of the same size
597 (define_mode_attr sseintvecmode
598 [(V16SF "V16SI") (V8DF "V8DI")
599 (V8SF "V8SI") (V4DF "V4DI")
600 (V4SF "V4SI") (V2DF "V2DI")
601 (V16SI "V16SI") (V8DI "V8DI")
602 (V8SI "V8SI") (V4DI "V4DI")
603 (V4SI "V4SI") (V2DI "V2DI")
604 (V16HI "V16HI") (V8HI "V8HI")
605 (V32HI "V32HI") (V64QI "V64QI")
606 (V32QI "V32QI") (V16QI "V16QI")])
608 (define_mode_attr sseintvecmode2
609 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
610 (V8SF "OI") (V4SF "TI")])
612 (define_mode_attr sseintvecmodelower
613 [(V16SF "v16si") (V8DF "v8di")
614 (V8SF "v8si") (V4DF "v4di")
615 (V4SF "v4si") (V2DF "v2di")
616 (V8SI "v8si") (V4DI "v4di")
617 (V4SI "v4si") (V2DI "v2di")
618 (V16HI "v16hi") (V8HI "v8hi")
619 (V32QI "v32qi") (V16QI "v16qi")])
621 ;; Mapping of vector modes to a vector mode of double size
622 (define_mode_attr ssedoublevecmode
623 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
624 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
625 (V8SF "V16SF") (V4DF "V8DF")
626 (V4SF "V8SF") (V2DF "V4DF")])
628 ;; Mapping of vector modes to a vector mode of half size
629 (define_mode_attr ssehalfvecmode
630 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
631 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
632 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
633 (V16SF "V8SF") (V8DF "V4DF")
634 (V8SF "V4SF") (V4DF "V2DF")
637 ;; Mapping of vector modes ti packed single mode of the same size
638 (define_mode_attr ssePSmode
639 [(V16SI "V16SF") (V8DF "V16SF")
640 (V16SF "V16SF") (V8DI "V16SF")
641 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
642 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
643 (V8SI "V8SF") (V4SI "V4SF")
644 (V4DI "V8SF") (V2DI "V4SF")
645 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
646 (V8SF "V8SF") (V4SF "V4SF")
647 (V4DF "V8SF") (V2DF "V4SF")])
649 (define_mode_attr ssePSmode2
650 [(V8DI "V8SF") (V4DI "V4SF")])
652 ;; Mapping of vector modes back to the scalar modes
653 (define_mode_attr ssescalarmode
654 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
655 (V32HI "HI") (V16HI "HI") (V8HI "HI")
656 (V16SI "SI") (V8SI "SI") (V4SI "SI")
657 (V8DI "DI") (V4DI "DI") (V2DI "DI")
658 (V16SF "SF") (V8SF "SF") (V4SF "SF")
659 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
661 ;; Mapping of vector modes to the 128bit modes
662 (define_mode_attr ssexmmmode
663 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
664 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
665 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
666 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
667 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
668 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
670 ;; Pointer size override for scalar modes (Intel asm dialect)
671 (define_mode_attr iptr
672 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
673 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
674 (V8SF "k") (V4DF "q")
675 (V4SF "k") (V2DF "q")
678 ;; Number of scalar elements in each vector type
679 (define_mode_attr ssescalarnum
680 [(V64QI "64") (V16SI "16") (V8DI "8")
681 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
682 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
683 (V16SF "16") (V8DF "8")
684 (V8SF "8") (V4DF "4")
685 (V4SF "4") (V2DF "2")])
687 ;; Mask of scalar elements in each vector type
688 (define_mode_attr ssescalarnummask
689 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
690 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
691 (V8SF "7") (V4DF "3")
692 (V4SF "3") (V2DF "1")])
694 (define_mode_attr ssescalarsize
695 [(V8DI "64") (V4DI "64") (V2DI "64")
696 (V64QI "8") (V32QI "8") (V16QI "8")
697 (V32HI "16") (V16HI "16") (V8HI "16")
698 (V16SI "32") (V8SI "32") (V4SI "32")
699 (V16SF "32") (V8DF "64")])
701 ;; SSE prefix for integer vector modes
702 (define_mode_attr sseintprefix
703 [(V2DI "p") (V2DF "")
708 (V16SI "p") (V16SF "")
709 (V16QI "p") (V8HI "p")
710 (V32QI "p") (V16HI "p")
711 (V64QI "p") (V32HI "p")])
713 ;; SSE scalar suffix for vector modes
714 (define_mode_attr ssescalarmodesuffix
716 (V8SF "ss") (V4DF "sd")
717 (V4SF "ss") (V2DF "sd")
718 (V8SI "ss") (V4DI "sd")
721 ;; Pack/unpack vector modes
722 (define_mode_attr sseunpackmode
723 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
724 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
725 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
727 (define_mode_attr ssepackmode
728 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
729 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
730 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
732 ;; Mapping of the max integer size for xop rotate immediate constraint
733 (define_mode_attr sserotatemax
734 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
736 ;; Mapping of mode to cast intrinsic name
737 (define_mode_attr castmode
738 [(V8SI "si") (V8SF "ps") (V4DF "pd")
739 (V16SI "si") (V16SF "ps") (V8DF "pd")])
741 ;; Instruction suffix for sign and zero extensions.
742 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
744 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
745 ;; i64x4 or f64x4 for 512bit modes.
746 (define_mode_attr i128
747 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
748 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
749 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
752 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
753 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
755 ;; Mapping for dbpsabbw modes
756 (define_mode_attr dbpsadbwmode
757 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
759 ;; Mapping suffixes for broadcast
760 (define_mode_attr bcstscalarsuff
761 [(V64QI "b") (V32QI "b") (V16QI "b")
762 (V32HI "w") (V16HI "w") (V8HI "w")
763 (V16SI "d") (V8SI "d") (V4SI "d")
764 (V8DI "q") (V4DI "q") (V2DI "q")
765 (V16SF "ss") (V8SF "ss") (V4SF "ss")
766 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
768 ;; Tie mode of assembler operand to mode iterator
769 (define_mode_attr concat_tg_mode
770 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
771 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
774 ;; Include define_subst patterns for instructions with mask
777 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; All of these patterns are enabled for SSE1 as well as SSE2.
786 ;; This is essential for maintaining stable calling conventions.
788 (define_expand "mov<mode>"
789 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
790 (match_operand:VMOVE 1 "nonimmediate_operand"))]
793 ix86_expand_vector_move (<MODE>mode, operands);
797 (define_insn "*mov<mode>_internal"
798 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
799 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
801 && (register_operand (operands[0], <MODE>mode)
802 || register_operand (operands[1], <MODE>mode))"
804 int mode = get_attr_mode (insn);
805 switch (which_alternative)
808 return standard_sse_constant_opcode (insn, operands[1]);
811 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
812 in avx512f, so we need to use workarounds, to access sse registers
813 16-31, which are evex-only. In avx512vl we don't need workarounds. */
814 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
815 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
816 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
818 if (memory_operand (operands[0], <MODE>mode))
820 if (<MODE_SIZE> == 32)
821 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
822 else if (<MODE_SIZE> == 16)
823 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
827 else if (memory_operand (operands[1], <MODE>mode))
829 if (<MODE_SIZE> == 32)
830 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
831 else if (<MODE_SIZE> == 16)
832 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
837 /* Reg -> reg move is always aligned. Just use wider move. */
842 return "vmovaps\t{%g1, %g0|%g0, %g1}";
845 return "vmovapd\t{%g1, %g0|%g0, %g1}";
848 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
859 && (misaligned_operand (operands[0], <MODE>mode)
860 || misaligned_operand (operands[1], <MODE>mode)))
861 return "vmovups\t{%1, %0|%0, %1}";
863 return "%vmovaps\t{%1, %0|%0, %1}";
869 && (misaligned_operand (operands[0], <MODE>mode)
870 || misaligned_operand (operands[1], <MODE>mode)))
871 return "vmovupd\t{%1, %0|%0, %1}";
873 return "%vmovapd\t{%1, %0|%0, %1}";
878 && (misaligned_operand (operands[0], <MODE>mode)
879 || misaligned_operand (operands[1], <MODE>mode)))
880 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
881 : "vmovdqu\t{%1, %0|%0, %1}";
883 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
884 : "%vmovdqa\t{%1, %0|%0, %1}";
886 if (misaligned_operand (operands[0], <MODE>mode)
887 || misaligned_operand (operands[1], <MODE>mode))
888 return "vmovdqu64\t{%1, %0|%0, %1}";
890 return "vmovdqa64\t{%1, %0|%0, %1}";
899 [(set_attr "type" "sselog1,ssemov,ssemov")
900 (set_attr "prefix" "maybe_vex")
902 (cond [(and (match_test "<MODE_SIZE> == 16")
903 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
904 (and (eq_attr "alternative" "2")
905 (match_test "TARGET_SSE_TYPELESS_STORES"))))
906 (const_string "<ssePSmode>")
907 (match_test "TARGET_AVX")
908 (const_string "<sseinsnmode>")
909 (ior (not (match_test "TARGET_SSE2"))
910 (match_test "optimize_function_for_size_p (cfun)"))
911 (const_string "V4SF")
912 (and (eq_attr "alternative" "0")
913 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
916 (const_string "<sseinsnmode>")))])
918 (define_insn "<avx512>_load<mode>_mask"
919 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
920 (vec_merge:V48_AVX512VL
921 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
922 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
923 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
926 static char buf [64];
929 const char *sse_suffix;
931 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
934 sse_suffix = "<ssemodesuffix>";
939 sse_suffix = "<ssescalarsize>";
942 if (misaligned_operand (operands[1], <MODE>mode))
947 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
948 insn_op, align, sse_suffix);
951 [(set_attr "type" "ssemov")
952 (set_attr "prefix" "evex")
953 (set_attr "memory" "none,load")
954 (set_attr "mode" "<sseinsnmode>")])
956 (define_insn "<avx512>_load<mode>_mask"
957 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
958 (vec_merge:VI12_AVX512VL
959 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
960 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
961 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
963 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
964 [(set_attr "type" "ssemov")
965 (set_attr "prefix" "evex")
966 (set_attr "memory" "none,load")
967 (set_attr "mode" "<sseinsnmode>")])
969 (define_insn "<avx512>_blendm<mode>"
970 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
971 (vec_merge:V48_AVX512VL
972 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
973 (match_operand:V48_AVX512VL 1 "register_operand" "v")
974 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
976 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
977 [(set_attr "type" "ssemov")
978 (set_attr "prefix" "evex")
979 (set_attr "mode" "<sseinsnmode>")])
981 (define_insn "<avx512>_blendm<mode>"
982 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
983 (vec_merge:VI12_AVX512VL
984 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
985 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
986 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
988 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
989 [(set_attr "type" "ssemov")
990 (set_attr "prefix" "evex")
991 (set_attr "mode" "<sseinsnmode>")])
993 (define_insn "<avx512>_store<mode>_mask"
994 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
995 (vec_merge:V48_AVX512VL
996 (match_operand:V48_AVX512VL 1 "register_operand" "v")
998 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1001 static char buf [64];
1003 const char *insn_op;
1004 const char *sse_suffix;
1006 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1009 sse_suffix = "<ssemodesuffix>";
1014 sse_suffix = "<ssescalarsize>";
1017 if (misaligned_operand (operands[1], <MODE>mode))
1022 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1023 insn_op, align, sse_suffix);
1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "memory" "store")
1029 (set_attr "mode" "<sseinsnmode>")])
1031 (define_insn "<avx512>_store<mode>_mask"
1032 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1033 (vec_merge:VI12_AVX512VL
1034 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1036 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1038 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1039 [(set_attr "type" "ssemov")
1040 (set_attr "prefix" "evex")
1041 (set_attr "memory" "store")
1042 (set_attr "mode" "<sseinsnmode>")])
1044 (define_insn "sse2_movq128"
1045 [(set (match_operand:V2DI 0 "register_operand" "=x")
1048 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1049 (parallel [(const_int 0)]))
1052 "%vmovq\t{%1, %0|%0, %q1}"
1053 [(set_attr "type" "ssemov")
1054 (set_attr "prefix" "maybe_vex")
1055 (set_attr "mode" "TI")])
1057 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1058 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1059 ;; from memory, we'd prefer to load the memory directly into the %xmm
1060 ;; register. To facilitate this happy circumstance, this pattern won't
1061 ;; split until after register allocation. If the 64-bit value didn't
1062 ;; come from memory, this is the best we can do. This is much better
1063 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1066 (define_insn_and_split "movdi_to_sse"
1068 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1069 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1070 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1071 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1073 "&& reload_completed"
1076 if (register_operand (operands[1], DImode))
1078 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1079 Assemble the 64-bit DImode value in an xmm register. */
1080 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1081 gen_rtx_SUBREG (SImode, operands[1], 0)));
1082 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1083 gen_rtx_SUBREG (SImode, operands[1], 4)));
1084 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1087 else if (memory_operand (operands[1], DImode))
1089 rtx tmp = gen_reg_rtx (V2DImode);
1090 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1091 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1098 [(set (match_operand:V4SF 0 "register_operand")
1099 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1100 "TARGET_SSE && reload_completed"
1103 (vec_duplicate:V4SF (match_dup 1))
1107 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1108 operands[2] = CONST0_RTX (V4SFmode);
1112 [(set (match_operand:V2DF 0 "register_operand")
1113 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1114 "TARGET_SSE2 && reload_completed"
1115 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1117 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1118 operands[2] = CONST0_RTX (DFmode);
1121 (define_expand "movmisalign<mode>"
1122 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1123 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1126 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1130 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1131 [(set (match_operand:VF 0 "register_operand")
1132 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1134 "TARGET_SSE && <mask_mode512bit_condition>"
1136 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1137 just fine if misaligned_operand is true, and without the UNSPEC it can
1138 be combined with arithmetic instructions. If misaligned_operand is
1139 false, still emit UNSPEC_LOADU insn to honor user's request for
1142 && misaligned_operand (operands[1], <MODE>mode))
1144 rtx src = operands[1];
1146 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1147 operands[2 * <mask_applied>],
1148 operands[3 * <mask_applied>]);
1149 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1154 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1155 [(set (match_operand:VF 0 "register_operand" "=v")
1157 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1159 "TARGET_SSE && <mask_mode512bit_condition>"
1161 switch (get_attr_mode (insn))
1166 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1168 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1171 [(set_attr "type" "ssemov")
1172 (set_attr "movu" "1")
1173 (set_attr "ssememalign" "8")
1174 (set_attr "prefix" "maybe_vex")
1176 (cond [(and (match_test "<MODE_SIZE> == 16")
1177 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1178 (const_string "<ssePSmode>")
1179 (match_test "TARGET_AVX")
1180 (const_string "<MODE>")
1181 (match_test "optimize_function_for_size_p (cfun)")
1182 (const_string "V4SF")
1184 (const_string "<MODE>")))])
1186 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1187 [(set (match_operand:VF 0 "memory_operand" "=m")
1189 [(match_operand:VF 1 "register_operand" "v")]
1193 switch (get_attr_mode (insn))
1198 return "%vmovups\t{%1, %0|%0, %1}";
1200 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1203 [(set_attr "type" "ssemov")
1204 (set_attr "movu" "1")
1205 (set_attr "ssememalign" "8")
1206 (set_attr "prefix" "maybe_vex")
1208 (cond [(and (match_test "<MODE_SIZE> == 16")
1209 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1210 (match_test "TARGET_SSE_TYPELESS_STORES")))
1211 (const_string "<ssePSmode>")
1212 (match_test "TARGET_AVX")
1213 (const_string "<MODE>")
1214 (match_test "optimize_function_for_size_p (cfun)")
1215 (const_string "V4SF")
1217 (const_string "<MODE>")))])
1219 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1220 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1221 (vec_merge:VF_AVX512VL
1223 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1226 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1229 switch (get_attr_mode (insn))
1234 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1236 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1239 [(set_attr "type" "ssemov")
1240 (set_attr "movu" "1")
1241 (set_attr "memory" "store")
1242 (set_attr "prefix" "evex")
1243 (set_attr "mode" "<sseinsnmode>")])
1245 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1246 just fine if misaligned_operand is true, and without the UNSPEC it can
1247 be combined with arithmetic instructions. If misaligned_operand is
1248 false, still emit UNSPEC_LOADU insn to honor user's request for
1250 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1251 [(set (match_operand:VI1 0 "register_operand")
1253 [(match_operand:VI1 1 "nonimmediate_operand")]
1255 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1258 && misaligned_operand (operands[1], <MODE>mode))
1260 rtx src = operands[1];
1262 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1263 operands[2 * <mask_applied>],
1264 operands[3 * <mask_applied>]);
1265 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1270 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1271 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1272 (unspec:VI_ULOADSTORE_BW_AVX512VL
1273 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1277 if (misaligned_operand (operands[1], <MODE>mode))
1279 rtx src = operands[1];
1281 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1282 operands[2 * <mask_applied>],
1283 operands[3 * <mask_applied>]);
1284 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1289 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1290 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1291 (unspec:VI_ULOADSTORE_F_AVX512VL
1292 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1296 if (misaligned_operand (operands[1], <MODE>mode))
1298 rtx src = operands[1];
1300 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1301 operands[2 * <mask_applied>],
1302 operands[3 * <mask_applied>]);
1303 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1308 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1309 [(set (match_operand:VI1 0 "register_operand" "=v")
1311 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1313 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1315 switch (get_attr_mode (insn))
1319 return "%vmovups\t{%1, %0|%0, %1}";
1321 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1322 return "%vmovdqu\t{%1, %0|%0, %1}";
1324 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1327 [(set_attr "type" "ssemov")
1328 (set_attr "movu" "1")
1329 (set_attr "ssememalign" "8")
1330 (set (attr "prefix_data16")
1332 (match_test "TARGET_AVX")
1334 (const_string "1")))
1335 (set_attr "prefix" "maybe_vex")
1337 (cond [(and (match_test "<MODE_SIZE> == 16")
1338 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1339 (const_string "<ssePSmode>")
1340 (match_test "TARGET_AVX")
1341 (const_string "<sseinsnmode>")
1342 (match_test "optimize_function_for_size_p (cfun)")
1343 (const_string "V4SF")
1345 (const_string "<sseinsnmode>")))])
1347 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1348 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1349 (unspec:VI_ULOADSTORE_BW_AVX512VL
1350 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1353 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1354 [(set_attr "type" "ssemov")
1355 (set_attr "movu" "1")
1356 (set_attr "ssememalign" "8")
1357 (set_attr "prefix" "maybe_evex")])
1359 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1360 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1361 (unspec:VI_ULOADSTORE_F_AVX512VL
1362 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1365 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1366 [(set_attr "type" "ssemov")
1367 (set_attr "movu" "1")
1368 (set_attr "ssememalign" "8")
1369 (set_attr "prefix" "maybe_evex")])
1371 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1372 [(set (match_operand:VI1 0 "memory_operand" "=m")
1374 [(match_operand:VI1 1 "register_operand" "v")]
1378 switch (get_attr_mode (insn))
1383 return "%vmovups\t{%1, %0|%0, %1}";
1389 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1390 return "%vmovdqu\t{%1, %0|%0, %1}";
1392 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1396 [(set_attr "type" "ssemov")
1397 (set_attr "movu" "1")
1398 (set_attr "ssememalign" "8")
1399 (set (attr "prefix_data16")
1401 (match_test "TARGET_AVX")
1403 (const_string "1")))
1404 (set_attr "prefix" "maybe_vex")
1406 (cond [(and (match_test "<MODE_SIZE> == 16")
1407 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1408 (match_test "TARGET_SSE_TYPELESS_STORES")))
1409 (const_string "<ssePSmode>")
1410 (match_test "TARGET_AVX")
1411 (const_string "<sseinsnmode>")
1412 (match_test "optimize_function_for_size_p (cfun)")
1413 (const_string "V4SF")
1415 (const_string "<sseinsnmode>")))])
1417 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1418 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1419 (unspec:VI_ULOADSTORE_BW_AVX512VL
1420 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1423 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1424 [(set_attr "type" "ssemov")
1425 (set_attr "movu" "1")
1426 (set_attr "ssememalign" "8")
1427 (set_attr "prefix" "maybe_evex")])
1429 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1430 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1431 (unspec:VI_ULOADSTORE_F_AVX512VL
1432 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1435 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1436 [(set_attr "type" "ssemov")
1437 (set_attr "movu" "1")
1438 (set_attr "ssememalign" "8")
1439 (set_attr "prefix" "maybe_vex")])
1441 (define_insn "<avx512>_storedqu<mode>_mask"
1442 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1443 (vec_merge:VI48_AVX512VL
1444 (unspec:VI48_AVX512VL
1445 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1448 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1450 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1451 [(set_attr "type" "ssemov")
1452 (set_attr "movu" "1")
1453 (set_attr "memory" "store")
1454 (set_attr "prefix" "evex")
1455 (set_attr "mode" "<sseinsnmode>")])
1457 (define_insn "<avx512>_storedqu<mode>_mask"
1458 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1459 (vec_merge:VI12_AVX512VL
1460 (unspec:VI12_AVX512VL
1461 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1464 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1466 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1467 [(set_attr "type" "ssemov")
1468 (set_attr "movu" "1")
1469 (set_attr "memory" "store")
1470 (set_attr "prefix" "evex")
1471 (set_attr "mode" "<sseinsnmode>")])
1473 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1474 [(set (match_operand:VI1 0 "register_operand" "=x")
1475 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1478 "%vlddqu\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "ssemov")
1480 (set_attr "movu" "1")
1481 (set_attr "ssememalign" "8")
1482 (set (attr "prefix_data16")
1484 (match_test "TARGET_AVX")
1486 (const_string "0")))
1487 (set (attr "prefix_rep")
1489 (match_test "TARGET_AVX")
1491 (const_string "1")))
1492 (set_attr "prefix" "maybe_vex")
1493 (set_attr "mode" "<sseinsnmode>")])
1495 (define_insn "sse2_movnti<mode>"
1496 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1497 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1500 "movnti\t{%1, %0|%0, %1}"
1501 [(set_attr "type" "ssemov")
1502 (set_attr "prefix_data16" "0")
1503 (set_attr "mode" "<MODE>")])
1505 (define_insn "<sse>_movnt<mode>"
1506 [(set (match_operand:VF 0 "memory_operand" "=m")
1508 [(match_operand:VF 1 "register_operand" "v")]
1511 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1512 [(set_attr "type" "ssemov")
1513 (set_attr "prefix" "maybe_vex")
1514 (set_attr "mode" "<MODE>")])
1516 (define_insn "<sse2>_movnt<mode>"
1517 [(set (match_operand:VI8 0 "memory_operand" "=m")
1518 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1521 "%vmovntdq\t{%1, %0|%0, %1}"
1522 [(set_attr "type" "ssecvt")
1523 (set (attr "prefix_data16")
1525 (match_test "TARGET_AVX")
1527 (const_string "1")))
1528 (set_attr "prefix" "maybe_vex")
1529 (set_attr "mode" "<sseinsnmode>")])
1531 ; Expand patterns for non-temporal stores. At the moment, only those
1532 ; that directly map to insns are defined; it would be possible to
1533 ; define patterns for other modes that would expand to several insns.
1535 ;; Modes handled by storent patterns.
1536 (define_mode_iterator STORENT_MODE
1537 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1538 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1540 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1541 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1543 (define_expand "storent<mode>"
1544 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1545 (unspec:STORENT_MODE
1546 [(match_operand:STORENT_MODE 1 "register_operand")]
1550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1552 ;; Parallel floating point arithmetic
1554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1556 (define_expand "<code><mode>2"
1557 [(set (match_operand:VF 0 "register_operand")
1559 (match_operand:VF 1 "register_operand")))]
1561 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1563 (define_insn_and_split "*absneg<mode>2"
1564 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1565 (match_operator:VF 3 "absneg_operator"
1566 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1567 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1570 "&& reload_completed"
1573 enum rtx_code absneg_op;
1579 if (MEM_P (operands[1]))
1580 op1 = operands[2], op2 = operands[1];
1582 op1 = operands[1], op2 = operands[2];
1587 if (rtx_equal_p (operands[0], operands[1]))
1593 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1594 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1595 t = gen_rtx_SET (VOIDmode, operands[0], t);
1599 [(set_attr "isa" "noavx,noavx,avx,avx")])
1601 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1602 [(set (match_operand:VF 0 "register_operand")
1604 (match_operand:VF 1 "<round_nimm_predicate>")
1605 (match_operand:VF 2 "<round_nimm_predicate>")))]
1606 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1607 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1609 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1610 [(set (match_operand:VF 0 "register_operand" "=x,v")
1612 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1613 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1614 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1616 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1617 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1618 [(set_attr "isa" "noavx,avx")
1619 (set_attr "type" "sseadd")
1620 (set_attr "prefix" "<mask_prefix3>")
1621 (set_attr "mode" "<MODE>")])
1623 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1624 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1627 (match_operand:VF_128 1 "register_operand" "0,v")
1628 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1633 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1634 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1635 [(set_attr "isa" "noavx,avx")
1636 (set_attr "type" "sseadd")
1637 (set_attr "prefix" "<round_prefix>")
1638 (set_attr "mode" "<ssescalarmode>")])
1640 (define_expand "mul<mode>3<mask_name><round_name>"
1641 [(set (match_operand:VF 0 "register_operand")
1643 (match_operand:VF 1 "<round_nimm_predicate>")
1644 (match_operand:VF 2 "<round_nimm_predicate>")))]
1645 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1646 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1648 (define_insn "*mul<mode>3<mask_name><round_name>"
1649 [(set (match_operand:VF 0 "register_operand" "=x,v")
1651 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1652 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1653 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1655 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1656 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1657 [(set_attr "isa" "noavx,avx")
1658 (set_attr "type" "ssemul")
1659 (set_attr "prefix" "<mask_prefix3>")
1660 (set_attr "btver2_decode" "direct,double")
1661 (set_attr "mode" "<MODE>")])
1663 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1664 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1667 (match_operand:VF_128 1 "register_operand" "0,v")
1668 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1673 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1674 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1675 [(set_attr "isa" "noavx,avx")
1676 (set_attr "type" "sse<multdiv_mnemonic>")
1677 (set_attr "prefix" "<round_prefix>")
1678 (set_attr "btver2_decode" "direct,double")
1679 (set_attr "mode" "<ssescalarmode>")])
1681 (define_expand "div<mode>3"
1682 [(set (match_operand:VF2 0 "register_operand")
1683 (div:VF2 (match_operand:VF2 1 "register_operand")
1684 (match_operand:VF2 2 "nonimmediate_operand")))]
1686 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1688 (define_expand "div<mode>3"
1689 [(set (match_operand:VF1 0 "register_operand")
1690 (div:VF1 (match_operand:VF1 1 "register_operand")
1691 (match_operand:VF1 2 "nonimmediate_operand")))]
1694 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1697 && TARGET_RECIP_VEC_DIV
1698 && !optimize_insn_for_size_p ()
1699 && flag_finite_math_only && !flag_trapping_math
1700 && flag_unsafe_math_optimizations)
1702 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1707 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1708 [(set (match_operand:VF 0 "register_operand" "=x,v")
1710 (match_operand:VF 1 "register_operand" "0,v")
1711 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1712 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1714 div<ssemodesuffix>\t{%2, %0|%0, %2}
1715 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1716 [(set_attr "isa" "noavx,avx")
1717 (set_attr "type" "ssediv")
1718 (set_attr "prefix" "<mask_prefix3>")
1719 (set_attr "mode" "<MODE>")])
1721 (define_insn "<sse>_rcp<mode>2"
1722 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1724 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1726 "%vrcpps\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "sse")
1728 (set_attr "atom_sse_attr" "rcp")
1729 (set_attr "btver2_sse_attr" "rcp")
1730 (set_attr "prefix" "maybe_vex")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "sse_vmrcpv4sf2"
1734 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1736 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1738 (match_operand:V4SF 2 "register_operand" "0,x")
1742 rcpss\t{%1, %0|%0, %k1}
1743 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1744 [(set_attr "isa" "noavx,avx")
1745 (set_attr "type" "sse")
1746 (set_attr "ssememalign" "32")
1747 (set_attr "atom_sse_attr" "rcp")
1748 (set_attr "btver2_sse_attr" "rcp")
1749 (set_attr "prefix" "orig,vex")
1750 (set_attr "mode" "SF")])
1752 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1753 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1755 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1758 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1759 [(set_attr "type" "sse")
1760 (set_attr "prefix" "evex")
1761 (set_attr "mode" "<MODE>")])
1763 (define_insn "srcp14<mode>"
1764 [(set (match_operand:VF_128 0 "register_operand" "=v")
1767 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1769 (match_operand:VF_128 2 "register_operand" "v")
1772 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1773 [(set_attr "type" "sse")
1774 (set_attr "prefix" "evex")
1775 (set_attr "mode" "<MODE>")])
1777 (define_expand "sqrt<mode>2"
1778 [(set (match_operand:VF2 0 "register_operand")
1779 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1782 (define_expand "sqrt<mode>2"
1783 [(set (match_operand:VF1 0 "register_operand")
1784 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1788 && TARGET_RECIP_VEC_SQRT
1789 && !optimize_insn_for_size_p ()
1790 && flag_finite_math_only && !flag_trapping_math
1791 && flag_unsafe_math_optimizations)
1793 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1798 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1799 [(set (match_operand:VF 0 "register_operand" "=v")
1800 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1801 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1802 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1803 [(set_attr "type" "sse")
1804 (set_attr "atom_sse_attr" "sqrt")
1805 (set_attr "btver2_sse_attr" "sqrt")
1806 (set_attr "prefix" "maybe_vex")
1807 (set_attr "mode" "<MODE>")])
1809 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1810 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1813 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1814 (match_operand:VF_128 2 "register_operand" "0,v")
1818 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1819 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1820 [(set_attr "isa" "noavx,avx")
1821 (set_attr "type" "sse")
1822 (set_attr "atom_sse_attr" "sqrt")
1823 (set_attr "prefix" "<round_prefix>")
1824 (set_attr "btver2_sse_attr" "sqrt")
1825 (set_attr "mode" "<ssescalarmode>")])
1827 (define_expand "rsqrt<mode>2"
1828 [(set (match_operand:VF1_128_256 0 "register_operand")
1830 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1833 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1837 (define_insn "<sse>_rsqrt<mode>2"
1838 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1840 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1842 "%vrsqrtps\t{%1, %0|%0, %1}"
1843 [(set_attr "type" "sse")
1844 (set_attr "prefix" "maybe_vex")
1845 (set_attr "mode" "<MODE>")])
1847 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1848 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1850 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1853 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1854 [(set_attr "type" "sse")
1855 (set_attr "prefix" "evex")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "rsqrt14<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=v")
1862 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1864 (match_operand:VF_128 2 "register_operand" "v")
1867 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1868 [(set_attr "type" "sse")
1869 (set_attr "prefix" "evex")
1870 (set_attr "mode" "<MODE>")])
1872 (define_insn "sse_vmrsqrtv4sf2"
1873 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1875 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1877 (match_operand:V4SF 2 "register_operand" "0,x")
1881 rsqrtss\t{%1, %0|%0, %k1}
1882 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1883 [(set_attr "isa" "noavx,avx")
1884 (set_attr "type" "sse")
1885 (set_attr "ssememalign" "32")
1886 (set_attr "prefix" "orig,vex")
1887 (set_attr "mode" "SF")])
1889 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1890 ;; isn't really correct, as those rtl operators aren't defined when
1891 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1893 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1894 [(set (match_operand:VF 0 "register_operand")
1896 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1897 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1898 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1900 if (!flag_finite_math_only)
1901 operands[1] = force_reg (<MODE>mode, operands[1]);
1902 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1905 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1906 [(set (match_operand:VF 0 "register_operand" "=x,v")
1908 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1909 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1910 "TARGET_SSE && flag_finite_math_only
1911 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1912 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1914 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1915 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1916 [(set_attr "isa" "noavx,avx")
1917 (set_attr "type" "sseadd")
1918 (set_attr "btver2_sse_attr" "maxmin")
1919 (set_attr "prefix" "<mask_prefix3>")
1920 (set_attr "mode" "<MODE>")])
1922 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1923 [(set (match_operand:VF 0 "register_operand" "=x,v")
1925 (match_operand:VF 1 "register_operand" "0,v")
1926 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1927 "TARGET_SSE && !flag_finite_math_only
1928 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1930 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1931 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1932 [(set_attr "isa" "noavx,avx")
1933 (set_attr "type" "sseadd")
1934 (set_attr "btver2_sse_attr" "maxmin")
1935 (set_attr "prefix" "<mask_prefix3>")
1936 (set_attr "mode" "<MODE>")])
1938 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1939 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1942 (match_operand:VF_128 1 "register_operand" "0,v")
1943 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1948 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1949 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1950 [(set_attr "isa" "noavx,avx")
1951 (set_attr "type" "sse")
1952 (set_attr "btver2_sse_attr" "maxmin")
1953 (set_attr "prefix" "<round_saeonly_prefix>")
1954 (set_attr "mode" "<ssescalarmode>")])
1956 ;; These versions of the min/max patterns implement exactly the operations
1957 ;; min = (op1 < op2 ? op1 : op2)
1958 ;; max = (!(op1 < op2) ? op1 : op2)
1959 ;; Their operands are not commutative, and thus they may be used in the
1960 ;; presence of -0.0 and NaN.
1962 (define_insn "*ieee_smin<mode>3"
1963 [(set (match_operand:VF 0 "register_operand" "=v,v")
1965 [(match_operand:VF 1 "register_operand" "0,v")
1966 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1970 min<ssemodesuffix>\t{%2, %0|%0, %2}
1971 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1972 [(set_attr "isa" "noavx,avx")
1973 (set_attr "type" "sseadd")
1974 (set_attr "prefix" "orig,vex")
1975 (set_attr "mode" "<MODE>")])
1977 (define_insn "*ieee_smax<mode>3"
1978 [(set (match_operand:VF 0 "register_operand" "=v,v")
1980 [(match_operand:VF 1 "register_operand" "0,v")
1981 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1985 max<ssemodesuffix>\t{%2, %0|%0, %2}
1986 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1987 [(set_attr "isa" "noavx,avx")
1988 (set_attr "type" "sseadd")
1989 (set_attr "prefix" "orig,vex")
1990 (set_attr "mode" "<MODE>")])
1992 (define_insn "avx_addsubv4df3"
1993 [(set (match_operand:V4DF 0 "register_operand" "=x")
1996 (match_operand:V4DF 1 "register_operand" "x")
1997 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1998 (minus:V4DF (match_dup 1) (match_dup 2))
2001 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2002 [(set_attr "type" "sseadd")
2003 (set_attr "prefix" "vex")
2004 (set_attr "mode" "V4DF")])
2006 (define_insn "sse3_addsubv2df3"
2007 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2010 (match_operand:V2DF 1 "register_operand" "0,x")
2011 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2012 (minus:V2DF (match_dup 1) (match_dup 2))
2016 addsubpd\t{%2, %0|%0, %2}
2017 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2018 [(set_attr "isa" "noavx,avx")
2019 (set_attr "type" "sseadd")
2020 (set_attr "atom_unit" "complex")
2021 (set_attr "prefix" "orig,vex")
2022 (set_attr "mode" "V2DF")])
2024 (define_insn "avx_addsubv8sf3"
2025 [(set (match_operand:V8SF 0 "register_operand" "=x")
2028 (match_operand:V8SF 1 "register_operand" "x")
2029 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2030 (minus:V8SF (match_dup 1) (match_dup 2))
2033 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2034 [(set_attr "type" "sseadd")
2035 (set_attr "prefix" "vex")
2036 (set_attr "mode" "V8SF")])
2038 (define_insn "sse3_addsubv4sf3"
2039 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2042 (match_operand:V4SF 1 "register_operand" "0,x")
2043 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2044 (minus:V4SF (match_dup 1) (match_dup 2))
2048 addsubps\t{%2, %0|%0, %2}
2049 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "isa" "noavx,avx")
2051 (set_attr "type" "sseadd")
2052 (set_attr "prefix" "orig,vex")
2053 (set_attr "prefix_rep" "1,*")
2054 (set_attr "mode" "V4SF")])
2056 (define_insn "avx_h<plusminus_insn>v4df3"
2057 [(set (match_operand:V4DF 0 "register_operand" "=x")
2062 (match_operand:V4DF 1 "register_operand" "x")
2063 (parallel [(const_int 0)]))
2064 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2067 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2068 (parallel [(const_int 0)]))
2069 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2072 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2073 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2075 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2076 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2078 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2079 [(set_attr "type" "sseadd")
2080 (set_attr "prefix" "vex")
2081 (set_attr "mode" "V4DF")])
2083 (define_expand "sse3_haddv2df3"
2084 [(set (match_operand:V2DF 0 "register_operand")
2088 (match_operand:V2DF 1 "register_operand")
2089 (parallel [(const_int 0)]))
2090 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2093 (match_operand:V2DF 2 "nonimmediate_operand")
2094 (parallel [(const_int 0)]))
2095 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2098 (define_insn "*sse3_haddv2df3"
2099 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2103 (match_operand:V2DF 1 "register_operand" "0,x")
2104 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2107 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2110 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2111 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2114 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2116 && INTVAL (operands[3]) != INTVAL (operands[4])
2117 && INTVAL (operands[5]) != INTVAL (operands[6])"
2119 haddpd\t{%2, %0|%0, %2}
2120 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2121 [(set_attr "isa" "noavx,avx")
2122 (set_attr "type" "sseadd")
2123 (set_attr "prefix" "orig,vex")
2124 (set_attr "mode" "V2DF")])
2126 (define_insn "sse3_hsubv2df3"
2127 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2131 (match_operand:V2DF 1 "register_operand" "0,x")
2132 (parallel [(const_int 0)]))
2133 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2136 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2137 (parallel [(const_int 0)]))
2138 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2141 hsubpd\t{%2, %0|%0, %2}
2142 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2143 [(set_attr "isa" "noavx,avx")
2144 (set_attr "type" "sseadd")
2145 (set_attr "prefix" "orig,vex")
2146 (set_attr "mode" "V2DF")])
2148 (define_insn "*sse3_haddv2df3_low"
2149 [(set (match_operand:DF 0 "register_operand" "=x,x")
2152 (match_operand:V2DF 1 "register_operand" "0,x")
2153 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2156 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2158 && INTVAL (operands[2]) != INTVAL (operands[3])"
2160 haddpd\t{%0, %0|%0, %0}
2161 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2162 [(set_attr "isa" "noavx,avx")
2163 (set_attr "type" "sseadd1")
2164 (set_attr "prefix" "orig,vex")
2165 (set_attr "mode" "V2DF")])
2167 (define_insn "*sse3_hsubv2df3_low"
2168 [(set (match_operand:DF 0 "register_operand" "=x,x")
2171 (match_operand:V2DF 1 "register_operand" "0,x")
2172 (parallel [(const_int 0)]))
2175 (parallel [(const_int 1)]))))]
2178 hsubpd\t{%0, %0|%0, %0}
2179 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2180 [(set_attr "isa" "noavx,avx")
2181 (set_attr "type" "sseadd1")
2182 (set_attr "prefix" "orig,vex")
2183 (set_attr "mode" "V2DF")])
2185 (define_insn "avx_h<plusminus_insn>v8sf3"
2186 [(set (match_operand:V8SF 0 "register_operand" "=x")
2192 (match_operand:V8SF 1 "register_operand" "x")
2193 (parallel [(const_int 0)]))
2194 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2196 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2197 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2201 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2202 (parallel [(const_int 0)]))
2203 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2205 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2206 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2210 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2211 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2213 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2214 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2217 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2218 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2220 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2221 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2223 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2224 [(set_attr "type" "sseadd")
2225 (set_attr "prefix" "vex")
2226 (set_attr "mode" "V8SF")])
2228 (define_insn "sse3_h<plusminus_insn>v4sf3"
2229 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2234 (match_operand:V4SF 1 "register_operand" "0,x")
2235 (parallel [(const_int 0)]))
2236 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2238 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2239 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2243 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2244 (parallel [(const_int 0)]))
2245 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2247 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2248 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2251 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2252 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "isa" "noavx,avx")
2254 (set_attr "type" "sseadd")
2255 (set_attr "atom_unit" "complex")
2256 (set_attr "prefix" "orig,vex")
2257 (set_attr "prefix_rep" "1,*")
2258 (set_attr "mode" "V4SF")])
2260 (define_expand "reduc_splus_v8df"
2261 [(match_operand:V8DF 0 "register_operand")
2262 (match_operand:V8DF 1 "register_operand")]
2265 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2269 (define_expand "reduc_splus_v4df"
2270 [(match_operand:V4DF 0 "register_operand")
2271 (match_operand:V4DF 1 "register_operand")]
2274 rtx tmp = gen_reg_rtx (V4DFmode);
2275 rtx tmp2 = gen_reg_rtx (V4DFmode);
2276 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2277 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2278 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2282 (define_expand "reduc_splus_v2df"
2283 [(match_operand:V2DF 0 "register_operand")
2284 (match_operand:V2DF 1 "register_operand")]
2287 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2291 (define_expand "reduc_splus_v16sf"
2292 [(match_operand:V16SF 0 "register_operand")
2293 (match_operand:V16SF 1 "register_operand")]
2296 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2300 (define_expand "reduc_splus_v8sf"
2301 [(match_operand:V8SF 0 "register_operand")
2302 (match_operand:V8SF 1 "register_operand")]
2305 rtx tmp = gen_reg_rtx (V8SFmode);
2306 rtx tmp2 = gen_reg_rtx (V8SFmode);
2307 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2308 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2309 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2310 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2314 (define_expand "reduc_splus_v4sf"
2315 [(match_operand:V4SF 0 "register_operand")
2316 (match_operand:V4SF 1 "register_operand")]
2321 rtx tmp = gen_reg_rtx (V4SFmode);
2322 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2323 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2326 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2330 ;; Modes handled by reduc_sm{in,ax}* patterns.
2331 (define_mode_iterator REDUC_SMINMAX_MODE
2332 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2333 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2334 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2335 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2336 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2337 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2338 (V8DF "TARGET_AVX512F")])
2340 (define_expand "reduc_<code>_<mode>"
2341 [(smaxmin:REDUC_SMINMAX_MODE
2342 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2343 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2346 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2350 (define_expand "reduc_<code>_<mode>"
2351 [(umaxmin:VI_AVX512BW
2352 (match_operand:VI_AVX512BW 0 "register_operand")
2353 (match_operand:VI_AVX512BW 1 "register_operand"))]
2356 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2360 (define_expand "reduc_<code>_<mode>"
2362 (match_operand:VI_256 0 "register_operand")
2363 (match_operand:VI_256 1 "register_operand"))]
2366 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2370 (define_expand "reduc_umin_v8hi"
2372 (match_operand:V8HI 0 "register_operand")
2373 (match_operand:V8HI 1 "register_operand"))]
2376 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2380 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2381 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2383 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2384 (match_operand:SI 2 "const_0_to_255_operand")]
2387 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2388 [(set_attr "type" "sse")
2389 (set_attr "prefix" "evex")
2390 (set_attr "mode" "<MODE>")])
2392 (define_insn "reduces<mode>"
2393 [(set (match_operand:VF_128 0 "register_operand" "=v")
2396 [(match_operand:VF_128 1 "register_operand" "v")
2397 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2398 (match_operand:SI 3 "const_0_to_255_operand")]
2403 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2404 [(set_attr "type" "sse")
2405 (set_attr "prefix" "evex")
2406 (set_attr "mode" "<MODE>")])
2408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2410 ;; Parallel floating point comparisons
2412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2414 (define_insn "avx_cmp<mode>3"
2415 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2417 [(match_operand:VF_128_256 1 "register_operand" "x")
2418 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2419 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2422 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2423 [(set_attr "type" "ssecmp")
2424 (set_attr "length_immediate" "1")
2425 (set_attr "prefix" "vex")
2426 (set_attr "mode" "<MODE>")])
2428 (define_insn "avx_vmcmp<mode>3"
2429 [(set (match_operand:VF_128 0 "register_operand" "=x")
2432 [(match_operand:VF_128 1 "register_operand" "x")
2433 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2439 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2440 [(set_attr "type" "ssecmp")
2441 (set_attr "length_immediate" "1")
2442 (set_attr "prefix" "vex")
2443 (set_attr "mode" "<ssescalarmode>")])
2445 (define_insn "*<sse>_maskcmp<mode>3_comm"
2446 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2447 (match_operator:VF_128_256 3 "sse_comparison_operator"
2448 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2449 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2451 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2453 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2454 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2455 [(set_attr "isa" "noavx,avx")
2456 (set_attr "type" "ssecmp")
2457 (set_attr "length_immediate" "1")
2458 (set_attr "prefix" "orig,vex")
2459 (set_attr "mode" "<MODE>")])
2461 (define_insn "<sse>_maskcmp<mode>3"
2462 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2463 (match_operator:VF_128_256 3 "sse_comparison_operator"
2464 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2465 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2468 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2469 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2470 [(set_attr "isa" "noavx,avx")
2471 (set_attr "type" "ssecmp")
2472 (set_attr "length_immediate" "1")
2473 (set_attr "prefix" "orig,vex")
2474 (set_attr "mode" "<MODE>")])
2476 (define_insn "<sse>_vmmaskcmp<mode>3"
2477 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2479 (match_operator:VF_128 3 "sse_comparison_operator"
2480 [(match_operand:VF_128 1 "register_operand" "0,x")
2481 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2486 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2487 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2488 [(set_attr "isa" "noavx,avx")
2489 (set_attr "type" "ssecmp")
2490 (set_attr "length_immediate" "1,*")
2491 (set_attr "prefix" "orig,vex")
2492 (set_attr "mode" "<ssescalarmode>")])
2494 (define_mode_attr cmp_imm_predicate
2495 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2496 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2497 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2498 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2499 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2500 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2501 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2502 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2503 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2505 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2506 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2507 (unspec:<avx512fmaskmode>
2508 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2509 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2510 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2512 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2513 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2514 [(set_attr "type" "ssecmp")
2515 (set_attr "length_immediate" "1")
2516 (set_attr "prefix" "evex")
2517 (set_attr "mode" "<sseinsnmode>")])
2519 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2520 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2521 (unspec:<avx512fmaskmode>
2522 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2523 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2524 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2527 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2528 [(set_attr "type" "ssecmp")
2529 (set_attr "length_immediate" "1")
2530 (set_attr "prefix" "evex")
2531 (set_attr "mode" "<sseinsnmode>")])
2533 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2534 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2535 (unspec:<avx512fmaskmode>
2536 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2537 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2538 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2539 UNSPEC_UNSIGNED_PCMP))]
2541 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2542 [(set_attr "type" "ssecmp")
2543 (set_attr "length_immediate" "1")
2544 (set_attr "prefix" "evex")
2545 (set_attr "mode" "<sseinsnmode>")])
2547 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2548 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2549 (unspec:<avx512fmaskmode>
2550 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2551 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2552 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2553 UNSPEC_UNSIGNED_PCMP))]
2555 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2556 [(set_attr "type" "ssecmp")
2557 (set_attr "length_immediate" "1")
2558 (set_attr "prefix" "evex")
2559 (set_attr "mode" "<sseinsnmode>")])
2561 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2562 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2563 (and:<avx512fmaskmode>
2564 (unspec:<avx512fmaskmode>
2565 [(match_operand:VF_128 1 "register_operand" "v")
2566 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2567 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2571 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2572 [(set_attr "type" "ssecmp")
2573 (set_attr "length_immediate" "1")
2574 (set_attr "prefix" "evex")
2575 (set_attr "mode" "<ssescalarmode>")])
2577 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2579 (and:<avx512fmaskmode>
2580 (unspec:<avx512fmaskmode>
2581 [(match_operand:VF_128 1 "register_operand" "v")
2582 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2583 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2585 (and:<avx512fmaskmode>
2586 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2589 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2590 [(set_attr "type" "ssecmp")
2591 (set_attr "length_immediate" "1")
2592 (set_attr "prefix" "evex")
2593 (set_attr "mode" "<ssescalarmode>")])
2595 (define_insn "avx512f_maskcmp<mode>3"
2596 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2597 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2598 [(match_operand:VF 1 "register_operand" "v")
2599 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2601 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2602 [(set_attr "type" "ssecmp")
2603 (set_attr "length_immediate" "1")
2604 (set_attr "prefix" "evex")
2605 (set_attr "mode" "<sseinsnmode>")])
2607 (define_insn "<sse>_comi<round_saeonly_name>"
2608 [(set (reg:CCFP FLAGS_REG)
2611 (match_operand:<ssevecmode> 0 "register_operand" "v")
2612 (parallel [(const_int 0)]))
2614 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2615 (parallel [(const_int 0)]))))]
2616 "SSE_FLOAT_MODE_P (<MODE>mode)"
2617 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2618 [(set_attr "type" "ssecomi")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "prefix_rep" "0")
2621 (set (attr "prefix_data16")
2622 (if_then_else (eq_attr "mode" "DF")
2624 (const_string "0")))
2625 (set_attr "mode" "<MODE>")])
2627 (define_insn "<sse>_ucomi<round_saeonly_name>"
2628 [(set (reg:CCFPU FLAGS_REG)
2631 (match_operand:<ssevecmode> 0 "register_operand" "v")
2632 (parallel [(const_int 0)]))
2634 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2635 (parallel [(const_int 0)]))))]
2636 "SSE_FLOAT_MODE_P (<MODE>mode)"
2637 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2638 [(set_attr "type" "ssecomi")
2639 (set_attr "prefix" "maybe_vex")
2640 (set_attr "prefix_rep" "0")
2641 (set (attr "prefix_data16")
2642 (if_then_else (eq_attr "mode" "DF")
2644 (const_string "0")))
2645 (set_attr "mode" "<MODE>")])
2647 (define_expand "vcond<V_512:mode><VF_512:mode>"
2648 [(set (match_operand:V_512 0 "register_operand")
2650 (match_operator 3 ""
2651 [(match_operand:VF_512 4 "nonimmediate_operand")
2652 (match_operand:VF_512 5 "nonimmediate_operand")])
2653 (match_operand:V_512 1 "general_operand")
2654 (match_operand:V_512 2 "general_operand")))]
2656 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2657 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2659 bool ok = ix86_expand_fp_vcond (operands);
2664 (define_expand "vcond<V_256:mode><VF_256:mode>"
2665 [(set (match_operand:V_256 0 "register_operand")
2667 (match_operator 3 ""
2668 [(match_operand:VF_256 4 "nonimmediate_operand")
2669 (match_operand:VF_256 5 "nonimmediate_operand")])
2670 (match_operand:V_256 1 "general_operand")
2671 (match_operand:V_256 2 "general_operand")))]
2673 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2674 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2676 bool ok = ix86_expand_fp_vcond (operands);
2681 (define_expand "vcond<V_128:mode><VF_128:mode>"
2682 [(set (match_operand:V_128 0 "register_operand")
2684 (match_operator 3 ""
2685 [(match_operand:VF_128 4 "nonimmediate_operand")
2686 (match_operand:VF_128 5 "nonimmediate_operand")])
2687 (match_operand:V_128 1 "general_operand")
2688 (match_operand:V_128 2 "general_operand")))]
2690 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2691 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2693 bool ok = ix86_expand_fp_vcond (operands);
2698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2700 ;; Parallel floating point logical operations
2702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2704 (define_insn "<sse>_andnot<mode>3<mask_name>"
2705 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2708 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2709 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2710 "TARGET_SSE && <mask_avx512vl_condition>"
2712 static char buf[128];
2716 switch (get_attr_mode (insn))
2723 suffix = "<ssemodesuffix>";
2726 switch (which_alternative)
2729 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2732 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2738 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2739 if (<mask_applied> && !TARGET_AVX512DQ)
2741 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2742 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2745 snprintf (buf, sizeof (buf), ops, suffix);
2748 [(set_attr "isa" "noavx,avx")
2749 (set_attr "type" "sselog")
2750 (set_attr "prefix" "orig,maybe_evex")
2752 (cond [(and (match_test "<MODE_SIZE> == 16")
2753 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2754 (const_string "<ssePSmode>")
2755 (match_test "TARGET_AVX")
2756 (const_string "<MODE>")
2757 (match_test "optimize_function_for_size_p (cfun)")
2758 (const_string "V4SF")
2760 (const_string "<MODE>")))])
2763 (define_insn "<sse>_andnot<mode>3<mask_name>"
2764 [(set (match_operand:VF_512 0 "register_operand" "=v")
2767 (match_operand:VF_512 1 "register_operand" "v"))
2768 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2771 static char buf[128];
2775 suffix = "<ssemodesuffix>";
2778 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2779 if (!TARGET_AVX512DQ)
2781 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2785 snprintf (buf, sizeof (buf),
2786 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2790 [(set_attr "type" "sselog")
2791 (set_attr "prefix" "evex")
2792 (set_attr "mode" "<sseinsnmode>")])
2794 (define_expand "<code><mode>3<mask_name>"
2795 [(set (match_operand:VF_128_256 0 "register_operand")
2796 (any_logic:VF_128_256
2797 (match_operand:VF_128_256 1 "nonimmediate_operand")
2798 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2799 "TARGET_SSE && <mask_avx512vl_condition>"
2800 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2802 (define_expand "<code><mode>3<mask_name>"
2803 [(set (match_operand:VF_512 0 "register_operand")
2805 (match_operand:VF_512 1 "nonimmediate_operand")
2806 (match_operand:VF_512 2 "nonimmediate_operand")))]
2808 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2810 (define_insn "*<code><mode>3<mask_name>"
2811 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2812 (any_logic:VF_128_256
2813 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2814 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2815 "TARGET_SSE && <mask_avx512vl_condition>
2816 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2818 static char buf[128];
2822 switch (get_attr_mode (insn))
2829 suffix = "<ssemodesuffix>";
2832 switch (which_alternative)
2835 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2838 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2844 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2845 if (<mask_applied> && !TARGET_AVX512DQ)
2847 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2848 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2851 snprintf (buf, sizeof (buf), ops, suffix);
2854 [(set_attr "isa" "noavx,avx")
2855 (set_attr "type" "sselog")
2856 (set_attr "prefix" "orig,maybe_evex")
2858 (cond [(and (match_test "<MODE_SIZE> == 16")
2859 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2860 (const_string "<ssePSmode>")
2861 (match_test "TARGET_AVX")
2862 (const_string "<MODE>")
2863 (match_test "optimize_function_for_size_p (cfun)")
2864 (const_string "V4SF")
2866 (const_string "<MODE>")))])
2868 (define_insn "*<code><mode>3<mask_name>"
2869 [(set (match_operand:VF_512 0 "register_operand" "=v")
2871 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2872 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2873 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2875 static char buf[128];
2879 suffix = "<ssemodesuffix>";
2882 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2883 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2885 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2889 snprintf (buf, sizeof (buf),
2890 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2894 [(set_attr "type" "sselog")
2895 (set_attr "prefix" "evex")
2896 (set_attr "mode" "<sseinsnmode>")])
2898 (define_expand "copysign<mode>3"
2901 (not:VF (match_dup 3))
2902 (match_operand:VF 1 "nonimmediate_operand")))
2904 (and:VF (match_dup 3)
2905 (match_operand:VF 2 "nonimmediate_operand")))
2906 (set (match_operand:VF 0 "register_operand")
2907 (ior:VF (match_dup 4) (match_dup 5)))]
2910 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2912 operands[4] = gen_reg_rtx (<MODE>mode);
2913 operands[5] = gen_reg_rtx (<MODE>mode);
2916 ;; Also define scalar versions. These are used for abs, neg, and
2917 ;; conditional move. Using subregs into vector modes causes register
2918 ;; allocation lossage. These patterns do not allow memory operands
2919 ;; because the native instructions read the full 128-bits.
2921 (define_insn "*andnot<mode>3"
2922 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2925 (match_operand:MODEF 1 "register_operand" "0,x"))
2926 (match_operand:MODEF 2 "register_operand" "x,x")))]
2927 "SSE_FLOAT_MODE_P (<MODE>mode)"
2929 static char buf[32];
2932 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2934 switch (which_alternative)
2937 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2940 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2946 snprintf (buf, sizeof (buf), ops, suffix);
2949 [(set_attr "isa" "noavx,avx")
2950 (set_attr "type" "sselog")
2951 (set_attr "prefix" "orig,vex")
2953 (cond [(and (match_test "<MODE_SIZE> == 16")
2954 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2955 (const_string "V4SF")
2956 (match_test "TARGET_AVX")
2957 (const_string "<ssevecmode>")
2958 (match_test "optimize_function_for_size_p (cfun)")
2959 (const_string "V4SF")
2961 (const_string "<ssevecmode>")))])
2963 (define_insn "*andnottf3"
2964 [(set (match_operand:TF 0 "register_operand" "=x,x")
2966 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2967 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2970 static char buf[32];
2973 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2975 switch (which_alternative)
2978 ops = "%s\t{%%2, %%0|%%0, %%2}";
2981 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2987 snprintf (buf, sizeof (buf), ops, tmp);
2990 [(set_attr "isa" "noavx,avx")
2991 (set_attr "type" "sselog")
2992 (set (attr "prefix_data16")
2994 (and (eq_attr "alternative" "0")
2995 (eq_attr "mode" "TI"))
2997 (const_string "*")))
2998 (set_attr "prefix" "orig,vex")
3000 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3001 (const_string "V4SF")
3002 (match_test "TARGET_AVX")
3004 (ior (not (match_test "TARGET_SSE2"))
3005 (match_test "optimize_function_for_size_p (cfun)"))
3006 (const_string "V4SF")
3008 (const_string "TI")))])
3010 (define_insn "*<code><mode>3"
3011 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3013 (match_operand:MODEF 1 "register_operand" "%0,x")
3014 (match_operand:MODEF 2 "register_operand" "x,x")))]
3015 "SSE_FLOAT_MODE_P (<MODE>mode)"
3017 static char buf[32];
3020 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3022 switch (which_alternative)
3025 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3028 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3034 snprintf (buf, sizeof (buf), ops, suffix);
3037 [(set_attr "isa" "noavx,avx")
3038 (set_attr "type" "sselog")
3039 (set_attr "prefix" "orig,vex")
3041 (cond [(and (match_test "<MODE_SIZE> == 16")
3042 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3043 (const_string "V4SF")
3044 (match_test "TARGET_AVX")
3045 (const_string "<ssevecmode>")
3046 (match_test "optimize_function_for_size_p (cfun)")
3047 (const_string "V4SF")
3049 (const_string "<ssevecmode>")))])
3051 (define_expand "<code>tf3"
3052 [(set (match_operand:TF 0 "register_operand")
3054 (match_operand:TF 1 "nonimmediate_operand")
3055 (match_operand:TF 2 "nonimmediate_operand")))]
3057 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3059 (define_insn "*<code>tf3"
3060 [(set (match_operand:TF 0 "register_operand" "=x,x")
3062 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3063 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3065 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3067 static char buf[32];
3070 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3072 switch (which_alternative)
3075 ops = "%s\t{%%2, %%0|%%0, %%2}";
3078 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3084 snprintf (buf, sizeof (buf), ops, tmp);
3087 [(set_attr "isa" "noavx,avx")
3088 (set_attr "type" "sselog")
3089 (set (attr "prefix_data16")
3091 (and (eq_attr "alternative" "0")
3092 (eq_attr "mode" "TI"))
3094 (const_string "*")))
3095 (set_attr "prefix" "orig,vex")
3097 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3098 (const_string "V4SF")
3099 (match_test "TARGET_AVX")
3101 (ior (not (match_test "TARGET_SSE2"))
3102 (match_test "optimize_function_for_size_p (cfun)"))
3103 (const_string "V4SF")
3105 (const_string "TI")))])
3107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3109 ;; FMA floating point multiply/accumulate instructions. These include
3110 ;; scalar versions of the instructions as well as vector versions.
3112 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3114 ;; The standard names for scalar FMA are only available with SSE math enabled.
3115 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3116 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3117 ;; and TARGET_FMA4 are both false.
3118 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3119 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3120 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3121 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3122 (define_mode_iterator FMAMODEM
3123 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3124 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3125 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3126 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3127 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3128 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3129 (V16SF "TARGET_AVX512F")
3130 (V8DF "TARGET_AVX512F")])
3132 (define_expand "fma<mode>4"
3133 [(set (match_operand:FMAMODEM 0 "register_operand")
3135 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3136 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3137 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3139 (define_expand "fms<mode>4"
3140 [(set (match_operand:FMAMODEM 0 "register_operand")
3142 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3143 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3144 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3146 (define_expand "fnma<mode>4"
3147 [(set (match_operand:FMAMODEM 0 "register_operand")
3149 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3150 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3151 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3153 (define_expand "fnms<mode>4"
3154 [(set (match_operand:FMAMODEM 0 "register_operand")
3156 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3157 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3158 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3160 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3161 (define_mode_iterator FMAMODE_AVX512
3162 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3163 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3164 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3165 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3166 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3167 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3168 (V16SF "TARGET_AVX512F")
3169 (V8DF "TARGET_AVX512F")])
3171 (define_mode_iterator FMAMODE
3172 [SF DF V4SF V2DF V8SF V4DF])
3174 (define_expand "fma4i_fmadd_<mode>"
3175 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3177 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3178 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3179 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3181 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3182 [(match_operand:VF_AVX512VL 0 "register_operand")
3183 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3184 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3185 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3186 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3187 "TARGET_AVX512F && <round_mode512bit_condition>"
3189 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3190 operands[0], operands[1], operands[2], operands[3],
3191 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3195 (define_insn "*fma_fmadd_<mode>"
3196 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3198 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3199 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3200 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3201 "TARGET_FMA || TARGET_FMA4"
3203 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3204 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3205 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3206 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3207 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3208 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3209 (set_attr "type" "ssemuladd")
3210 (set_attr "mode" "<MODE>")])
3212 ;; Suppose AVX-512F as baseline
3213 (define_mode_iterator VF_SF_AVX512VL
3214 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3215 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3217 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3218 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3220 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3221 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3222 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3223 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3225 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3226 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3227 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3228 [(set_attr "type" "ssemuladd")
3229 (set_attr "mode" "<MODE>")])
3231 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3232 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3233 (vec_merge:VF_AVX512VL
3235 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3236 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3237 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3239 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3240 "TARGET_AVX512F && <round_mode512bit_condition>"
3242 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3243 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3244 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3245 (set_attr "type" "ssemuladd")
3246 (set_attr "mode" "<MODE>")])
3248 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3249 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3250 (vec_merge:VF_AVX512VL
3252 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3253 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3254 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3256 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3258 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3259 [(set_attr "isa" "fma_avx512f")
3260 (set_attr "type" "ssemuladd")
3261 (set_attr "mode" "<MODE>")])
3263 (define_insn "*fma_fmsub_<mode>"
3264 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3266 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3267 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3269 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3270 "TARGET_FMA || TARGET_FMA4"
3272 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3273 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3274 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3275 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3276 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3277 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3278 (set_attr "type" "ssemuladd")
3279 (set_attr "mode" "<MODE>")])
3281 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3282 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3284 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3285 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3287 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3288 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3290 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3291 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3292 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3293 [(set_attr "type" "ssemuladd")
3294 (set_attr "mode" "<MODE>")])
3296 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3297 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3298 (vec_merge:VF_AVX512VL
3300 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3301 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3303 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3305 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3308 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3309 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3310 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3311 (set_attr "type" "ssemuladd")
3312 (set_attr "mode" "<MODE>")])
3314 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3315 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3316 (vec_merge:VF_AVX512VL
3318 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3319 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3321 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3323 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3324 "TARGET_AVX512F && <round_mode512bit_condition>"
3325 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3326 [(set_attr "isa" "fma_avx512f")
3327 (set_attr "type" "ssemuladd")
3328 (set_attr "mode" "<MODE>")])
3330 (define_insn "*fma_fnmadd_<mode>"
3331 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3334 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3335 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3336 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3337 "TARGET_FMA || TARGET_FMA4"
3339 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3340 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3341 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3342 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3343 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3344 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3345 (set_attr "type" "ssemuladd")
3346 (set_attr "mode" "<MODE>")])
3348 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3349 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3352 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3353 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3354 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3355 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3357 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3358 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3359 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3360 [(set_attr "type" "ssemuladd")
3361 (set_attr "mode" "<MODE>")])
3363 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3364 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3365 (vec_merge:VF_AVX512VL
3368 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3369 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3370 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3372 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3373 "TARGET_AVX512F && <round_mode512bit_condition>"
3375 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3376 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3377 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3378 (set_attr "type" "ssemuladd")
3379 (set_attr "mode" "<MODE>")])
3381 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3382 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3383 (vec_merge:VF_AVX512VL
3386 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3387 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3388 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3390 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3391 "TARGET_AVX512F && <round_mode512bit_condition>"
3392 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3393 [(set_attr "isa" "fma_avx512f")
3394 (set_attr "type" "ssemuladd")
3395 (set_attr "mode" "<MODE>")])
3397 (define_insn "*fma_fnmsub_<mode>"
3398 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3401 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3402 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3404 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3405 "TARGET_FMA || TARGET_FMA4"
3407 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3408 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3409 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3410 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3411 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3412 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3413 (set_attr "type" "ssemuladd")
3414 (set_attr "mode" "<MODE>")])
3416 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3417 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3420 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3421 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3423 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3424 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3426 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3427 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3428 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3429 [(set_attr "type" "ssemuladd")
3430 (set_attr "mode" "<MODE>")])
3432 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3433 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3434 (vec_merge:VF_AVX512VL
3437 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3438 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3440 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3442 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3443 "TARGET_AVX512F && <round_mode512bit_condition>"
3445 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3446 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3447 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3448 (set_attr "type" "ssemuladd")
3449 (set_attr "mode" "<MODE>")])
3451 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3452 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3453 (vec_merge:VF_AVX512VL
3456 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3457 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3459 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3461 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3463 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3464 [(set_attr "isa" "fma_avx512f")
3465 (set_attr "type" "ssemuladd")
3466 (set_attr "mode" "<MODE>")])
3468 ;; FMA parallel floating point multiply addsub and subadd operations.
3470 ;; It would be possible to represent these without the UNSPEC as
3473 ;; (fma op1 op2 op3)
3474 ;; (fma op1 op2 (neg op3))
3477 ;; But this doesn't seem useful in practice.
3479 (define_expand "fmaddsub_<mode>"
3480 [(set (match_operand:VF 0 "register_operand")
3482 [(match_operand:VF 1 "nonimmediate_operand")
3483 (match_operand:VF 2 "nonimmediate_operand")
3484 (match_operand:VF 3 "nonimmediate_operand")]
3486 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3488 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3489 [(match_operand:VF_AVX512VL 0 "register_operand")
3490 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3491 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3492 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3493 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3496 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3497 operands[0], operands[1], operands[2], operands[3],
3498 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3502 (define_insn "*fma_fmaddsub_<mode>"
3503 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3505 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3506 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3507 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3509 "TARGET_FMA || TARGET_FMA4"
3511 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3512 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3513 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3514 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3515 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3516 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3517 (set_attr "type" "ssemuladd")
3518 (set_attr "mode" "<MODE>")])
3520 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3521 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3522 (unspec:VF_SF_AVX512VL
3523 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3524 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3525 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3527 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3529 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3530 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3531 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3532 [(set_attr "type" "ssemuladd")
3533 (set_attr "mode" "<MODE>")])
3535 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3537 (vec_merge:VF_AVX512VL
3539 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3540 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3541 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3544 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3547 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3548 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3549 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3550 (set_attr "type" "ssemuladd")
3551 (set_attr "mode" "<MODE>")])
3553 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3554 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3555 (vec_merge:VF_AVX512VL
3557 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3558 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3559 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3562 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3564 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3565 [(set_attr "isa" "fma_avx512f")
3566 (set_attr "type" "ssemuladd")
3567 (set_attr "mode" "<MODE>")])
3569 (define_insn "*fma_fmsubadd_<mode>"
3570 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3572 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3573 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3575 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3577 "TARGET_FMA || TARGET_FMA4"
3579 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3580 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3581 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3582 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3583 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3584 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3585 (set_attr "type" "ssemuladd")
3586 (set_attr "mode" "<MODE>")])
3588 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3589 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3590 (unspec:VF_SF_AVX512VL
3591 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3592 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3594 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3596 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3598 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3599 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3600 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3601 [(set_attr "type" "ssemuladd")
3602 (set_attr "mode" "<MODE>")])
3604 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3605 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3606 (vec_merge:VF_AVX512VL
3608 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3609 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3611 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3614 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3617 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3618 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3619 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3620 (set_attr "type" "ssemuladd")
3621 (set_attr "mode" "<MODE>")])
3623 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3624 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3625 (vec_merge:VF_AVX512VL
3627 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3628 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3630 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3633 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3635 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3636 [(set_attr "isa" "fma_avx512f")
3637 (set_attr "type" "ssemuladd")
3638 (set_attr "mode" "<MODE>")])
3640 ;; FMA3 floating point scalar intrinsics. These merge result with
3641 ;; high-order elements from the destination register.
3643 (define_expand "fmai_vmfmadd_<mode><round_name>"
3644 [(set (match_operand:VF_128 0 "register_operand")
3647 (match_operand:VF_128 1 "<round_nimm_predicate>")
3648 (match_operand:VF_128 2 "<round_nimm_predicate>")
3649 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3654 (define_insn "*fmai_fmadd_<mode>"
3655 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3658 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3659 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3660 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3663 "TARGET_FMA || TARGET_AVX512F"
3665 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3666 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3667 [(set_attr "type" "ssemuladd")
3668 (set_attr "mode" "<MODE>")])
3670 (define_insn "*fmai_fmsub_<mode>"
3671 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3674 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3675 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3677 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3680 "TARGET_FMA || TARGET_AVX512F"
3682 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3683 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3684 [(set_attr "type" "ssemuladd")
3685 (set_attr "mode" "<MODE>")])
3687 (define_insn "*fmai_fnmadd_<mode><round_name>"
3688 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3692 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3693 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3694 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3697 "TARGET_FMA || TARGET_AVX512F"
3699 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3700 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3701 [(set_attr "type" "ssemuladd")
3702 (set_attr "mode" "<MODE>")])
3704 (define_insn "*fmai_fnmsub_<mode><round_name>"
3705 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3709 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3710 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3712 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3715 "TARGET_FMA || TARGET_AVX512F"
3717 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3718 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3719 [(set_attr "type" "ssemuladd")
3720 (set_attr "mode" "<MODE>")])
3722 ;; FMA4 floating point scalar intrinsics. These write the
3723 ;; entire destination register, with the high-order elements zeroed.
3725 (define_expand "fma4i_vmfmadd_<mode>"
3726 [(set (match_operand:VF_128 0 "register_operand")
3729 (match_operand:VF_128 1 "nonimmediate_operand")
3730 (match_operand:VF_128 2 "nonimmediate_operand")
3731 (match_operand:VF_128 3 "nonimmediate_operand"))
3735 "operands[4] = CONST0_RTX (<MODE>mode);")
3737 (define_insn "*fma4i_vmfmadd_<mode>"
3738 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3741 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3742 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3743 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3744 (match_operand:VF_128 4 "const0_operand")
3747 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3748 [(set_attr "type" "ssemuladd")
3749 (set_attr "mode" "<MODE>")])
3751 (define_insn "*fma4i_vmfmsub_<mode>"
3752 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3755 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3756 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3758 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3759 (match_operand:VF_128 4 "const0_operand")
3762 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3763 [(set_attr "type" "ssemuladd")
3764 (set_attr "mode" "<MODE>")])
3766 (define_insn "*fma4i_vmfnmadd_<mode>"
3767 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3771 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3772 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3773 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3774 (match_operand:VF_128 4 "const0_operand")
3777 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3778 [(set_attr "type" "ssemuladd")
3779 (set_attr "mode" "<MODE>")])
3781 (define_insn "*fma4i_vmfnmsub_<mode>"
3782 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3786 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3787 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3789 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3790 (match_operand:VF_128 4 "const0_operand")
3793 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3794 [(set_attr "type" "ssemuladd")
3795 (set_attr "mode" "<MODE>")])
3797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3799 ;; Parallel single-precision floating point conversion operations
3801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3803 (define_insn "sse_cvtpi2ps"
3804 [(set (match_operand:V4SF 0 "register_operand" "=x")
3807 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3808 (match_operand:V4SF 1 "register_operand" "0")
3811 "cvtpi2ps\t{%2, %0|%0, %2}"
3812 [(set_attr "type" "ssecvt")
3813 (set_attr "mode" "V4SF")])
3815 (define_insn "sse_cvtps2pi"
3816 [(set (match_operand:V2SI 0 "register_operand" "=y")
3818 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3820 (parallel [(const_int 0) (const_int 1)])))]
3822 "cvtps2pi\t{%1, %0|%0, %q1}"
3823 [(set_attr "type" "ssecvt")
3824 (set_attr "unit" "mmx")
3825 (set_attr "mode" "DI")])
3827 (define_insn "sse_cvttps2pi"
3828 [(set (match_operand:V2SI 0 "register_operand" "=y")
3830 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3831 (parallel [(const_int 0) (const_int 1)])))]
3833 "cvttps2pi\t{%1, %0|%0, %q1}"
3834 [(set_attr "type" "ssecvt")
3835 (set_attr "unit" "mmx")
3836 (set_attr "prefix_rep" "0")
3837 (set_attr "mode" "SF")])
3839 (define_insn "sse_cvtsi2ss<round_name>"
3840 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3843 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3844 (match_operand:V4SF 1 "register_operand" "0,0,v")
3848 cvtsi2ss\t{%2, %0|%0, %2}
3849 cvtsi2ss\t{%2, %0|%0, %2}
3850 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3851 [(set_attr "isa" "noavx,noavx,avx")
3852 (set_attr "type" "sseicvt")
3853 (set_attr "athlon_decode" "vector,double,*")
3854 (set_attr "amdfam10_decode" "vector,double,*")
3855 (set_attr "bdver1_decode" "double,direct,*")
3856 (set_attr "btver2_decode" "double,double,double")
3857 (set_attr "prefix" "orig,orig,maybe_evex")
3858 (set_attr "mode" "SF")])
3860 (define_insn "sse_cvtsi2ssq<round_name>"
3861 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3864 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3865 (match_operand:V4SF 1 "register_operand" "0,0,v")
3867 "TARGET_SSE && TARGET_64BIT"
3869 cvtsi2ssq\t{%2, %0|%0, %2}
3870 cvtsi2ssq\t{%2, %0|%0, %2}
3871 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3872 [(set_attr "isa" "noavx,noavx,avx")
3873 (set_attr "type" "sseicvt")
3874 (set_attr "athlon_decode" "vector,double,*")
3875 (set_attr "amdfam10_decode" "vector,double,*")
3876 (set_attr "bdver1_decode" "double,direct,*")
3877 (set_attr "btver2_decode" "double,double,double")
3878 (set_attr "length_vex" "*,*,4")
3879 (set_attr "prefix_rex" "1,1,*")
3880 (set_attr "prefix" "orig,orig,maybe_evex")
3881 (set_attr "mode" "SF")])
3883 (define_insn "sse_cvtss2si<round_name>"
3884 [(set (match_operand:SI 0 "register_operand" "=r,r")
3887 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3888 (parallel [(const_int 0)]))]
3889 UNSPEC_FIX_NOTRUNC))]
3891 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3892 [(set_attr "type" "sseicvt")
3893 (set_attr "athlon_decode" "double,vector")
3894 (set_attr "bdver1_decode" "double,double")
3895 (set_attr "prefix_rep" "1")
3896 (set_attr "prefix" "maybe_vex")
3897 (set_attr "mode" "SI")])
3899 (define_insn "sse_cvtss2si_2"
3900 [(set (match_operand:SI 0 "register_operand" "=r,r")
3901 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3902 UNSPEC_FIX_NOTRUNC))]
3904 "%vcvtss2si\t{%1, %0|%0, %k1}"
3905 [(set_attr "type" "sseicvt")
3906 (set_attr "athlon_decode" "double,vector")
3907 (set_attr "amdfam10_decode" "double,double")
3908 (set_attr "bdver1_decode" "double,double")
3909 (set_attr "prefix_rep" "1")
3910 (set_attr "prefix" "maybe_vex")
3911 (set_attr "mode" "SI")])
3913 (define_insn "sse_cvtss2siq<round_name>"
3914 [(set (match_operand:DI 0 "register_operand" "=r,r")
3917 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3918 (parallel [(const_int 0)]))]
3919 UNSPEC_FIX_NOTRUNC))]
3920 "TARGET_SSE && TARGET_64BIT"
3921 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3922 [(set_attr "type" "sseicvt")
3923 (set_attr "athlon_decode" "double,vector")
3924 (set_attr "bdver1_decode" "double,double")
3925 (set_attr "prefix_rep" "1")
3926 (set_attr "prefix" "maybe_vex")
3927 (set_attr "mode" "DI")])
3929 (define_insn "sse_cvtss2siq_2"
3930 [(set (match_operand:DI 0 "register_operand" "=r,r")
3931 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3932 UNSPEC_FIX_NOTRUNC))]
3933 "TARGET_SSE && TARGET_64BIT"
3934 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3935 [(set_attr "type" "sseicvt")
3936 (set_attr "athlon_decode" "double,vector")
3937 (set_attr "amdfam10_decode" "double,double")
3938 (set_attr "bdver1_decode" "double,double")
3939 (set_attr "prefix_rep" "1")
3940 (set_attr "prefix" "maybe_vex")
3941 (set_attr "mode" "DI")])
3943 (define_insn "sse_cvttss2si<round_saeonly_name>"
3944 [(set (match_operand:SI 0 "register_operand" "=r,r")
3947 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3948 (parallel [(const_int 0)]))))]
3950 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3951 [(set_attr "type" "sseicvt")
3952 (set_attr "athlon_decode" "double,vector")
3953 (set_attr "amdfam10_decode" "double,double")
3954 (set_attr "bdver1_decode" "double,double")
3955 (set_attr "prefix_rep" "1")
3956 (set_attr "prefix" "maybe_vex")
3957 (set_attr "mode" "SI")])
3959 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3960 [(set (match_operand:DI 0 "register_operand" "=r,r")
3963 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3964 (parallel [(const_int 0)]))))]
3965 "TARGET_SSE && TARGET_64BIT"
3966 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3967 [(set_attr "type" "sseicvt")
3968 (set_attr "athlon_decode" "double,vector")
3969 (set_attr "amdfam10_decode" "double,double")
3970 (set_attr "bdver1_decode" "double,double")
3971 (set_attr "prefix_rep" "1")
3972 (set_attr "prefix" "maybe_vex")
3973 (set_attr "mode" "DI")])
3975 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3976 [(set (match_operand:VF_128 0 "register_operand" "=v")
3978 (vec_duplicate:VF_128
3979 (unsigned_float:<ssescalarmode>
3980 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3981 (match_operand:VF_128 1 "register_operand" "v")
3983 "TARGET_AVX512F && <round_modev4sf_condition>"
3984 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3985 [(set_attr "type" "sseicvt")
3986 (set_attr "prefix" "evex")
3987 (set_attr "mode" "<ssescalarmode>")])
3989 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3990 [(set (match_operand:VF_128 0 "register_operand" "=v")
3992 (vec_duplicate:VF_128
3993 (unsigned_float:<ssescalarmode>
3994 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3995 (match_operand:VF_128 1 "register_operand" "v")
3997 "TARGET_AVX512F && TARGET_64BIT"
3998 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3999 [(set_attr "type" "sseicvt")
4000 (set_attr "prefix" "evex")
4001 (set_attr "mode" "<ssescalarmode>")])
4003 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4004 [(set (match_operand:VF1 0 "register_operand" "=v")
4006 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4007 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4008 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4009 [(set_attr "type" "ssecvt")
4010 (set_attr "prefix" "maybe_vex")
4011 (set_attr "mode" "<sseinsnmode>")])
4013 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4014 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4015 (unsigned_float:VF1_AVX512VL
4016 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4018 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4019 [(set_attr "type" "ssecvt")
4020 (set_attr "prefix" "evex")
4021 (set_attr "mode" "<MODE>")])
4023 (define_expand "floatuns<sseintvecmodelower><mode>2"
4024 [(match_operand:VF1 0 "register_operand")
4025 (match_operand:<sseintvecmode> 1 "register_operand")]
4026 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4028 if (<MODE>mode == V16SFmode)
4029 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4031 if (TARGET_AVX512VL)
4033 if (<MODE>mode == V4SFmode)
4034 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4036 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4039 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4045 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4046 (define_mode_attr sf2simodelower
4047 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4049 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4050 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4052 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4053 UNSPEC_FIX_NOTRUNC))]
4054 "TARGET_SSE2 && <mask_mode512bit_condition>"
4055 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4056 [(set_attr "type" "ssecvt")
4057 (set (attr "prefix_data16")
4059 (match_test "TARGET_AVX")
4061 (const_string "1")))
4062 (set_attr "prefix" "maybe_vex")
4063 (set_attr "mode" "<sseinsnmode>")])
4065 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4066 [(set (match_operand:V16SI 0 "register_operand" "=v")
4068 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4069 UNSPEC_FIX_NOTRUNC))]
4071 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4072 [(set_attr "type" "ssecvt")
4073 (set_attr "prefix" "evex")
4074 (set_attr "mode" "XI")])
4076 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4077 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4078 (unspec:VI4_AVX512VL
4079 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4080 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4082 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4083 [(set_attr "type" "ssecvt")
4084 (set_attr "prefix" "evex")
4085 (set_attr "mode" "<sseinsnmode>")])
4087 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4088 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4089 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4090 UNSPEC_FIX_NOTRUNC))]
4091 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4092 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4093 [(set_attr "type" "ssecvt")
4094 (set_attr "prefix" "evex")
4095 (set_attr "mode" "<sseinsnmode>")])
4097 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4098 [(set (match_operand:V2DI 0 "register_operand" "=v")
4101 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4102 (parallel [(const_int 0) (const_int 1)]))]
4103 UNSPEC_FIX_NOTRUNC))]
4104 "TARGET_AVX512DQ && TARGET_AVX512VL"
4105 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4106 [(set_attr "type" "ssecvt")
4107 (set_attr "prefix" "evex")
4108 (set_attr "mode" "TI")])
4110 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4111 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4112 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4113 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4114 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4115 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4116 [(set_attr "type" "ssecvt")
4117 (set_attr "prefix" "evex")
4118 (set_attr "mode" "<sseinsnmode>")])
4120 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4121 [(set (match_operand:V2DI 0 "register_operand" "=v")
4124 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4125 (parallel [(const_int 0) (const_int 1)]))]
4126 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4127 "TARGET_AVX512DQ && TARGET_AVX512VL"
4128 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4129 [(set_attr "type" "ssecvt")
4130 (set_attr "prefix" "evex")
4131 (set_attr "mode" "TI")])
4133 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4134 [(set (match_operand:V16SI 0 "register_operand" "=v")
4136 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4138 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4139 [(set_attr "type" "ssecvt")
4140 (set_attr "prefix" "evex")
4141 (set_attr "mode" "XI")])
4143 (define_insn "fix_truncv8sfv8si2<mask_name>"
4144 [(set (match_operand:V8SI 0 "register_operand" "=v")
4145 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4146 "TARGET_AVX && <mask_avx512vl_condition>"
4147 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4148 [(set_attr "type" "ssecvt")
4149 (set_attr "prefix" "<mask_prefix>")
4150 (set_attr "mode" "OI")])
4152 (define_insn "fix_truncv4sfv4si2<mask_name>"
4153 [(set (match_operand:V4SI 0 "register_operand" "=v")
4154 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4155 "TARGET_SSE2 && <mask_avx512vl_condition>"
4156 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4157 [(set_attr "type" "ssecvt")
4158 (set (attr "prefix_rep")
4160 (match_test "TARGET_AVX")
4162 (const_string "1")))
4163 (set (attr "prefix_data16")
4165 (match_test "TARGET_AVX")
4167 (const_string "0")))
4168 (set_attr "prefix_data16" "0")
4169 (set_attr "prefix" "<mask_prefix2>")
4170 (set_attr "mode" "TI")])
4172 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4173 [(match_operand:<sseintvecmode> 0 "register_operand")
4174 (match_operand:VF1 1 "register_operand")]
4177 if (<MODE>mode == V16SFmode)
4178 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4183 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4184 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4185 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4186 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4193 ;; Parallel double-precision floating point conversion operations
4195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4197 (define_insn "sse2_cvtpi2pd"
4198 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4199 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4201 "cvtpi2pd\t{%1, %0|%0, %1}"
4202 [(set_attr "type" "ssecvt")
4203 (set_attr "unit" "mmx,*")
4204 (set_attr "prefix_data16" "1,*")
4205 (set_attr "mode" "V2DF")])
4207 (define_insn "sse2_cvtpd2pi"
4208 [(set (match_operand:V2SI 0 "register_operand" "=y")
4209 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4210 UNSPEC_FIX_NOTRUNC))]
4212 "cvtpd2pi\t{%1, %0|%0, %1}"
4213 [(set_attr "type" "ssecvt")
4214 (set_attr "unit" "mmx")
4215 (set_attr "bdver1_decode" "double")
4216 (set_attr "btver2_decode" "direct")
4217 (set_attr "prefix_data16" "1")
4218 (set_attr "mode" "DI")])
4220 (define_insn "sse2_cvttpd2pi"
4221 [(set (match_operand:V2SI 0 "register_operand" "=y")
4222 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4224 "cvttpd2pi\t{%1, %0|%0, %1}"
4225 [(set_attr "type" "ssecvt")
4226 (set_attr "unit" "mmx")
4227 (set_attr "bdver1_decode" "double")
4228 (set_attr "prefix_data16" "1")
4229 (set_attr "mode" "TI")])
4231 (define_insn "sse2_cvtsi2sd"
4232 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4235 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4236 (match_operand:V2DF 1 "register_operand" "0,0,x")
4240 cvtsi2sd\t{%2, %0|%0, %2}
4241 cvtsi2sd\t{%2, %0|%0, %2}
4242 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4243 [(set_attr "isa" "noavx,noavx,avx")
4244 (set_attr "type" "sseicvt")
4245 (set_attr "athlon_decode" "double,direct,*")
4246 (set_attr "amdfam10_decode" "vector,double,*")
4247 (set_attr "bdver1_decode" "double,direct,*")
4248 (set_attr "btver2_decode" "double,double,double")
4249 (set_attr "prefix" "orig,orig,vex")
4250 (set_attr "mode" "DF")])
4252 (define_insn "sse2_cvtsi2sdq<round_name>"
4253 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4256 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4257 (match_operand:V2DF 1 "register_operand" "0,0,v")
4259 "TARGET_SSE2 && TARGET_64BIT"
4261 cvtsi2sdq\t{%2, %0|%0, %2}
4262 cvtsi2sdq\t{%2, %0|%0, %2}
4263 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4264 [(set_attr "isa" "noavx,noavx,avx")
4265 (set_attr "type" "sseicvt")
4266 (set_attr "athlon_decode" "double,direct,*")
4267 (set_attr "amdfam10_decode" "vector,double,*")
4268 (set_attr "bdver1_decode" "double,direct,*")
4269 (set_attr "length_vex" "*,*,4")
4270 (set_attr "prefix_rex" "1,1,*")
4271 (set_attr "prefix" "orig,orig,maybe_evex")
4272 (set_attr "mode" "DF")])
4274 (define_insn "avx512f_vcvtss2usi<round_name>"
4275 [(set (match_operand:SI 0 "register_operand" "=r")
4278 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4279 (parallel [(const_int 0)]))]
4280 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4282 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4283 [(set_attr "type" "sseicvt")
4284 (set_attr "prefix" "evex")
4285 (set_attr "mode" "SI")])
4287 (define_insn "avx512f_vcvtss2usiq<round_name>"
4288 [(set (match_operand:DI 0 "register_operand" "=r")
4291 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4292 (parallel [(const_int 0)]))]
4293 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4294 "TARGET_AVX512F && TARGET_64BIT"
4295 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4296 [(set_attr "type" "sseicvt")
4297 (set_attr "prefix" "evex")
4298 (set_attr "mode" "DI")])
4300 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4301 [(set (match_operand:SI 0 "register_operand" "=r")
4304 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4305 (parallel [(const_int 0)]))))]
4307 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4308 [(set_attr "type" "sseicvt")
4309 (set_attr "prefix" "evex")
4310 (set_attr "mode" "SI")])
4312 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4313 [(set (match_operand:DI 0 "register_operand" "=r")
4316 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4317 (parallel [(const_int 0)]))))]
4318 "TARGET_AVX512F && TARGET_64BIT"
4319 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4320 [(set_attr "type" "sseicvt")
4321 (set_attr "prefix" "evex")
4322 (set_attr "mode" "DI")])
4324 (define_insn "avx512f_vcvtsd2usi<round_name>"
4325 [(set (match_operand:SI 0 "register_operand" "=r")
4328 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4329 (parallel [(const_int 0)]))]
4330 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4332 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4333 [(set_attr "type" "sseicvt")
4334 (set_attr "prefix" "evex")
4335 (set_attr "mode" "SI")])
4337 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4338 [(set (match_operand:DI 0 "register_operand" "=r")
4341 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4342 (parallel [(const_int 0)]))]
4343 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4344 "TARGET_AVX512F && TARGET_64BIT"
4345 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4346 [(set_attr "type" "sseicvt")
4347 (set_attr "prefix" "evex")
4348 (set_attr "mode" "DI")])
4350 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4351 [(set (match_operand:SI 0 "register_operand" "=r")
4354 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4355 (parallel [(const_int 0)]))))]
4357 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4358 [(set_attr "type" "sseicvt")
4359 (set_attr "prefix" "evex")
4360 (set_attr "mode" "SI")])
4362 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4363 [(set (match_operand:DI 0 "register_operand" "=r")
4366 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4367 (parallel [(const_int 0)]))))]
4368 "TARGET_AVX512F && TARGET_64BIT"
4369 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4370 [(set_attr "type" "sseicvt")
4371 (set_attr "prefix" "evex")
4372 (set_attr "mode" "DI")])
4374 (define_insn "sse2_cvtsd2si<round_name>"
4375 [(set (match_operand:SI 0 "register_operand" "=r,r")
4378 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4379 (parallel [(const_int 0)]))]
4380 UNSPEC_FIX_NOTRUNC))]
4382 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4383 [(set_attr "type" "sseicvt")
4384 (set_attr "athlon_decode" "double,vector")
4385 (set_attr "bdver1_decode" "double,double")
4386 (set_attr "btver2_decode" "double,double")
4387 (set_attr "prefix_rep" "1")
4388 (set_attr "prefix" "maybe_vex")
4389 (set_attr "mode" "SI")])
4391 (define_insn "sse2_cvtsd2si_2"
4392 [(set (match_operand:SI 0 "register_operand" "=r,r")
4393 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4394 UNSPEC_FIX_NOTRUNC))]
4396 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4397 [(set_attr "type" "sseicvt")
4398 (set_attr "athlon_decode" "double,vector")
4399 (set_attr "amdfam10_decode" "double,double")
4400 (set_attr "bdver1_decode" "double,double")
4401 (set_attr "prefix_rep" "1")
4402 (set_attr "prefix" "maybe_vex")
4403 (set_attr "mode" "SI")])
4405 (define_insn "sse2_cvtsd2siq<round_name>"
4406 [(set (match_operand:DI 0 "register_operand" "=r,r")
4409 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4410 (parallel [(const_int 0)]))]
4411 UNSPEC_FIX_NOTRUNC))]
4412 "TARGET_SSE2 && TARGET_64BIT"
4413 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4414 [(set_attr "type" "sseicvt")
4415 (set_attr "athlon_decode" "double,vector")
4416 (set_attr "bdver1_decode" "double,double")
4417 (set_attr "prefix_rep" "1")
4418 (set_attr "prefix" "maybe_vex")
4419 (set_attr "mode" "DI")])
4421 (define_insn "sse2_cvtsd2siq_2"
4422 [(set (match_operand:DI 0 "register_operand" "=r,r")
4423 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4424 UNSPEC_FIX_NOTRUNC))]
4425 "TARGET_SSE2 && TARGET_64BIT"
4426 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4427 [(set_attr "type" "sseicvt")
4428 (set_attr "athlon_decode" "double,vector")
4429 (set_attr "amdfam10_decode" "double,double")
4430 (set_attr "bdver1_decode" "double,double")
4431 (set_attr "prefix_rep" "1")
4432 (set_attr "prefix" "maybe_vex")
4433 (set_attr "mode" "DI")])
4435 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4436 [(set (match_operand:SI 0 "register_operand" "=r,r")
4439 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4440 (parallel [(const_int 0)]))))]
4442 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4443 [(set_attr "type" "sseicvt")
4444 (set_attr "athlon_decode" "double,vector")
4445 (set_attr "amdfam10_decode" "double,double")
4446 (set_attr "bdver1_decode" "double,double")
4447 (set_attr "btver2_decode" "double,double")
4448 (set_attr "prefix_rep" "1")
4449 (set_attr "prefix" "maybe_vex")
4450 (set_attr "mode" "SI")])
4452 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4453 [(set (match_operand:DI 0 "register_operand" "=r,r")
4456 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4457 (parallel [(const_int 0)]))))]
4458 "TARGET_SSE2 && TARGET_64BIT"
4459 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4460 [(set_attr "type" "sseicvt")
4461 (set_attr "athlon_decode" "double,vector")
4462 (set_attr "amdfam10_decode" "double,double")
4463 (set_attr "bdver1_decode" "double,double")
4464 (set_attr "prefix_rep" "1")
4465 (set_attr "prefix" "maybe_vex")
4466 (set_attr "mode" "DI")])
4468 ;; For float<si2dfmode><mode>2 insn pattern
4469 (define_mode_attr si2dfmode
4470 [(V8DF "V8SI") (V4DF "V4SI")])
4471 (define_mode_attr si2dfmodelower
4472 [(V8DF "v8si") (V4DF "v4si")])
4474 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4475 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4476 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4477 "TARGET_AVX && <mask_mode512bit_condition>"
4478 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4479 [(set_attr "type" "ssecvt")
4480 (set_attr "prefix" "maybe_vex")
4481 (set_attr "mode" "<MODE>")])
4483 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4484 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4485 (any_float:VF2_AVX512VL
4486 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4488 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4489 [(set_attr "type" "ssecvt")
4490 (set_attr "prefix" "evex")
4491 (set_attr "mode" "<MODE>")])
4493 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4494 (define_mode_attr qq2pssuff
4495 [(V8SF "") (V4SF "{y}")])
4497 (define_mode_attr sselongvecmode
4498 [(V8SF "V8DI") (V4SF "V4DI")])
4500 (define_mode_attr sselongvecmodelower
4501 [(V8SF "v8di") (V4SF "v4di")])
4503 (define_mode_attr sseintvecmode3
4504 [(V8SF "XI") (V4SF "OI")
4505 (V8DF "OI") (V4DF "TI")])
4507 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4508 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4509 (any_float:VF1_128_256VL
4510 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4511 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4512 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4513 [(set_attr "type" "ssecvt")
4514 (set_attr "prefix" "evex")
4515 (set_attr "mode" "<MODE>")])
4517 (define_insn "*<floatsuffix>floatv2div2sf2"
4518 [(set (match_operand:V4SF 0 "register_operand" "=v")
4520 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4521 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4522 "TARGET_AVX512DQ && TARGET_AVX512VL"
4523 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4524 [(set_attr "type" "ssecvt")
4525 (set_attr "prefix" "evex")
4526 (set_attr "mode" "V4SF")])
4528 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4529 [(set (match_operand:V4SF 0 "register_operand" "=v")
4532 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4534 (match_operand:V4SF 2 "vector_move_operand" "0C")
4535 (parallel [(const_int 0) (const_int 1)]))
4536 (match_operand:QI 3 "register_operand" "Yk"))
4537 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4538 "TARGET_AVX512DQ && TARGET_AVX512VL"
4539 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4540 [(set_attr "type" "ssecvt")
4541 (set_attr "prefix" "evex")
4542 (set_attr "mode" "V4SF")])
4544 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4545 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4546 (unsigned_float:VF2_512_256VL
4547 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4549 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4550 [(set_attr "type" "ssecvt")
4551 (set_attr "prefix" "evex")
4552 (set_attr "mode" "<MODE>")])
4554 (define_insn "ufloatv2siv2df2<mask_name>"
4555 [(set (match_operand:V2DF 0 "register_operand" "=v")
4556 (unsigned_float:V2DF
4558 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4559 (parallel [(const_int 0) (const_int 1)]))))]
4561 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4562 [(set_attr "type" "ssecvt")
4563 (set_attr "prefix" "evex")
4564 (set_attr "mode" "V2DF")])
4566 (define_insn "avx512f_cvtdq2pd512_2"
4567 [(set (match_operand:V8DF 0 "register_operand" "=v")
4570 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4571 (parallel [(const_int 0) (const_int 1)
4572 (const_int 2) (const_int 3)
4573 (const_int 4) (const_int 5)
4574 (const_int 6) (const_int 7)]))))]
4576 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4577 [(set_attr "type" "ssecvt")
4578 (set_attr "prefix" "evex")
4579 (set_attr "mode" "V8DF")])
4581 (define_insn "avx_cvtdq2pd256_2"
4582 [(set (match_operand:V4DF 0 "register_operand" "=v")
4585 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4586 (parallel [(const_int 0) (const_int 1)
4587 (const_int 2) (const_int 3)]))))]
4589 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4590 [(set_attr "type" "ssecvt")
4591 (set_attr "prefix" "maybe_evex")
4592 (set_attr "mode" "V4DF")])
4594 (define_insn "sse2_cvtdq2pd<mask_name>"
4595 [(set (match_operand:V2DF 0 "register_operand" "=v")
4598 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4599 (parallel [(const_int 0) (const_int 1)]))))]
4600 "TARGET_SSE2 && <mask_avx512vl_condition>"
4601 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4602 [(set_attr "type" "ssecvt")
4603 (set_attr "prefix" "maybe_vex")
4604 (set_attr "ssememalign" "64")
4605 (set_attr "mode" "V2DF")])
4607 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4608 [(set (match_operand:V8SI 0 "register_operand" "=v")
4610 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4611 UNSPEC_FIX_NOTRUNC))]
4613 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4614 [(set_attr "type" "ssecvt")
4615 (set_attr "prefix" "evex")
4616 (set_attr "mode" "OI")])
4618 (define_insn "avx_cvtpd2dq256<mask_name>"
4619 [(set (match_operand:V4SI 0 "register_operand" "=v")
4620 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4621 UNSPEC_FIX_NOTRUNC))]
4622 "TARGET_AVX && <mask_avx512vl_condition>"
4623 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4624 [(set_attr "type" "ssecvt")
4625 (set_attr "prefix" "<mask_prefix>")
4626 (set_attr "mode" "OI")])
4628 (define_expand "avx_cvtpd2dq256_2"
4629 [(set (match_operand:V8SI 0 "register_operand")
4631 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4635 "operands[2] = CONST0_RTX (V4SImode);")
4637 (define_insn "*avx_cvtpd2dq256_2"
4638 [(set (match_operand:V8SI 0 "register_operand" "=x")
4640 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4642 (match_operand:V4SI 2 "const0_operand")))]
4644 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4645 [(set_attr "type" "ssecvt")
4646 (set_attr "prefix" "vex")
4647 (set_attr "btver2_decode" "vector")
4648 (set_attr "mode" "OI")])
4650 (define_insn "sse2_cvtpd2dq<mask_name>"
4651 [(set (match_operand:V4SI 0 "register_operand" "=v")
4653 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4655 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4656 "TARGET_SSE2 && <mask_avx512vl_condition>"
4659 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4661 return "cvtpd2dq\t{%1, %0|%0, %1}";
4663 [(set_attr "type" "ssecvt")
4664 (set_attr "prefix_rep" "1")
4665 (set_attr "prefix_data16" "0")
4666 (set_attr "prefix" "maybe_vex")
4667 (set_attr "mode" "TI")
4668 (set_attr "amdfam10_decode" "double")
4669 (set_attr "athlon_decode" "vector")
4670 (set_attr "bdver1_decode" "double")])
4672 ;; For ufix_notrunc* insn patterns
4673 (define_mode_attr pd2udqsuff
4674 [(V8DF "") (V4DF "{y}")])
4676 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4677 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4679 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4680 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4682 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4683 [(set_attr "type" "ssecvt")
4684 (set_attr "prefix" "evex")
4685 (set_attr "mode" "<sseinsnmode>")])
4687 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4688 [(set (match_operand:V4SI 0 "register_operand" "=v")
4691 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4692 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4693 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4695 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4696 [(set_attr "type" "ssecvt")
4697 (set_attr "prefix" "evex")
4698 (set_attr "mode" "TI")])
4700 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4701 [(set (match_operand:V8SI 0 "register_operand" "=v")
4703 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4705 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4706 [(set_attr "type" "ssecvt")
4707 (set_attr "prefix" "evex")
4708 (set_attr "mode" "OI")])
4710 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4711 [(set (match_operand:V4SI 0 "register_operand" "=v")
4713 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4714 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4716 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4717 [(set_attr "type" "ssecvt")
4718 (set_attr "prefix" "evex")
4719 (set_attr "mode" "TI")])
4721 (define_insn "fix_truncv4dfv4si2<mask_name>"
4722 [(set (match_operand:V4SI 0 "register_operand" "=v")
4723 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4724 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4725 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4726 [(set_attr "type" "ssecvt")
4727 (set_attr "prefix" "maybe_evex")
4728 (set_attr "mode" "OI")])
4730 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4731 [(set (match_operand:V4SI 0 "register_operand" "=v")
4732 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4733 "TARGET_AVX512VL && TARGET_AVX512F"
4734 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "maybe_evex")
4737 (set_attr "mode" "OI")])
4739 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4740 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4741 (any_fix:<sseintvecmode>
4742 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4743 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4744 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4745 [(set_attr "type" "ssecvt")
4746 (set_attr "prefix" "evex")
4747 (set_attr "mode" "<sseintvecmode2>")])
4749 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4750 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4751 (unspec:<sseintvecmode>
4752 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4753 UNSPEC_FIX_NOTRUNC))]
4754 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4755 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4756 [(set_attr "type" "ssecvt")
4757 (set_attr "prefix" "evex")
4758 (set_attr "mode" "<sseintvecmode2>")])
4760 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4761 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4762 (unspec:<sseintvecmode>
4763 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4764 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4765 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4766 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4767 [(set_attr "type" "ssecvt")
4768 (set_attr "prefix" "evex")
4769 (set_attr "mode" "<sseintvecmode2>")])
4771 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4772 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4773 (any_fix:<sselongvecmode>
4774 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4775 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4776 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4777 [(set_attr "type" "ssecvt")
4778 (set_attr "prefix" "evex")
4779 (set_attr "mode" "<sseintvecmode3>")])
4781 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4782 [(set (match_operand:V2DI 0 "register_operand" "=v")
4785 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4786 (parallel [(const_int 0) (const_int 1)]))))]
4787 "TARGET_AVX512DQ && TARGET_AVX512VL"
4788 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4789 [(set_attr "type" "ssecvt")
4790 (set_attr "prefix" "evex")
4791 (set_attr "mode" "TI")])
4793 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4794 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4795 (unsigned_fix:<sseintvecmode>
4796 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4798 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4799 [(set_attr "type" "ssecvt")
4800 (set_attr "prefix" "evex")
4801 (set_attr "mode" "<sseintvecmode2>")])
4803 (define_expand "avx_cvttpd2dq256_2"
4804 [(set (match_operand:V8SI 0 "register_operand")
4806 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4809 "operands[2] = CONST0_RTX (V4SImode);")
4811 (define_insn "sse2_cvttpd2dq<mask_name>"
4812 [(set (match_operand:V4SI 0 "register_operand" "=v")
4814 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4815 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4816 "TARGET_SSE2 && <mask_avx512vl_condition>"
4819 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4821 return "cvttpd2dq\t{%1, %0|%0, %1}";
4823 [(set_attr "type" "ssecvt")
4824 (set_attr "amdfam10_decode" "double")
4825 (set_attr "athlon_decode" "vector")
4826 (set_attr "bdver1_decode" "double")
4827 (set_attr "prefix" "maybe_vex")
4828 (set_attr "mode" "TI")])
4830 (define_insn "sse2_cvtsd2ss<round_name>"
4831 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4834 (float_truncate:V2SF
4835 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4836 (match_operand:V4SF 1 "register_operand" "0,0,v")
4840 cvtsd2ss\t{%2, %0|%0, %2}
4841 cvtsd2ss\t{%2, %0|%0, %q2}
4842 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4843 [(set_attr "isa" "noavx,noavx,avx")
4844 (set_attr "type" "ssecvt")
4845 (set_attr "athlon_decode" "vector,double,*")
4846 (set_attr "amdfam10_decode" "vector,double,*")
4847 (set_attr "bdver1_decode" "direct,direct,*")
4848 (set_attr "btver2_decode" "double,double,double")
4849 (set_attr "prefix" "orig,orig,<round_prefix>")
4850 (set_attr "mode" "SF")])
4852 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4853 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4857 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4858 (parallel [(const_int 0) (const_int 1)])))
4859 (match_operand:V2DF 1 "register_operand" "0,0,v")
4863 cvtss2sd\t{%2, %0|%0, %2}
4864 cvtss2sd\t{%2, %0|%0, %k2}
4865 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4866 [(set_attr "isa" "noavx,noavx,avx")
4867 (set_attr "type" "ssecvt")
4868 (set_attr "amdfam10_decode" "vector,double,*")
4869 (set_attr "athlon_decode" "direct,direct,*")
4870 (set_attr "bdver1_decode" "direct,direct,*")
4871 (set_attr "btver2_decode" "double,double,double")
4872 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4873 (set_attr "mode" "DF")])
4875 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4876 [(set (match_operand:V8SF 0 "register_operand" "=v")
4877 (float_truncate:V8SF
4878 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4880 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4881 [(set_attr "type" "ssecvt")
4882 (set_attr "prefix" "evex")
4883 (set_attr "mode" "V8SF")])
4885 (define_insn "avx_cvtpd2ps256<mask_name>"
4886 [(set (match_operand:V4SF 0 "register_operand" "=v")
4887 (float_truncate:V4SF
4888 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4889 "TARGET_AVX && <mask_avx512vl_condition>"
4890 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4891 [(set_attr "type" "ssecvt")
4892 (set_attr "prefix" "maybe_evex")
4893 (set_attr "btver2_decode" "vector")
4894 (set_attr "mode" "V4SF")])
4896 (define_expand "sse2_cvtpd2ps"
4897 [(set (match_operand:V4SF 0 "register_operand")
4899 (float_truncate:V2SF
4900 (match_operand:V2DF 1 "nonimmediate_operand"))
4903 "operands[2] = CONST0_RTX (V2SFmode);")
4905 (define_expand "sse2_cvtpd2ps_mask"
4906 [(set (match_operand:V4SF 0 "register_operand")
4909 (float_truncate:V2SF
4910 (match_operand:V2DF 1 "nonimmediate_operand"))
4912 (match_operand:V4SF 2 "register_operand")
4913 (match_operand:QI 3 "register_operand")))]
4915 "operands[4] = CONST0_RTX (V2SFmode);")
4917 (define_insn "*sse2_cvtpd2ps<mask_name>"
4918 [(set (match_operand:V4SF 0 "register_operand" "=v")
4920 (float_truncate:V2SF
4921 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4922 (match_operand:V2SF 2 "const0_operand")))]
4923 "TARGET_SSE2 && <mask_avx512vl_condition>"
4926 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4928 return "cvtpd2ps\t{%1, %0|%0, %1}";
4930 [(set_attr "type" "ssecvt")
4931 (set_attr "amdfam10_decode" "double")
4932 (set_attr "athlon_decode" "vector")
4933 (set_attr "bdver1_decode" "double")
4934 (set_attr "prefix_data16" "1")
4935 (set_attr "prefix" "maybe_vex")
4936 (set_attr "mode" "V4SF")])
4938 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4939 (define_mode_attr sf2dfmode
4940 [(V8DF "V8SF") (V4DF "V4SF")])
4942 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4943 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4944 (float_extend:VF2_512_256
4945 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4946 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4947 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4948 [(set_attr "type" "ssecvt")
4949 (set_attr "prefix" "maybe_vex")
4950 (set_attr "mode" "<MODE>")])
4952 (define_insn "*avx_cvtps2pd256_2"
4953 [(set (match_operand:V4DF 0 "register_operand" "=x")
4956 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4957 (parallel [(const_int 0) (const_int 1)
4958 (const_int 2) (const_int 3)]))))]
4960 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4961 [(set_attr "type" "ssecvt")
4962 (set_attr "prefix" "vex")
4963 (set_attr "mode" "V4DF")])
4965 (define_insn "vec_unpacks_lo_v16sf"
4966 [(set (match_operand:V8DF 0 "register_operand" "=v")
4969 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4970 (parallel [(const_int 0) (const_int 1)
4971 (const_int 2) (const_int 3)
4972 (const_int 4) (const_int 5)
4973 (const_int 6) (const_int 7)]))))]
4975 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4976 [(set_attr "type" "ssecvt")
4977 (set_attr "prefix" "evex")
4978 (set_attr "mode" "V8DF")])
4980 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4981 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4982 (unspec:<avx512fmaskmode>
4983 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
4984 UNSPEC_CVTINT2MASK))]
4986 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4987 [(set_attr "prefix" "evex")
4988 (set_attr "mode" "<sseinsnmode>")])
4990 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4991 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4992 (unspec:<avx512fmaskmode>
4993 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
4994 UNSPEC_CVTINT2MASK))]
4996 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4997 [(set_attr "prefix" "evex")
4998 (set_attr "mode" "<sseinsnmode>")])
5000 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5001 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5002 (vec_merge:VI12_AVX512VL
5005 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5008 operands[2] = CONSTM1_RTX (<MODE>mode);
5009 operands[3] = CONST0_RTX (<MODE>mode);
5012 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5013 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5014 (vec_merge:VI12_AVX512VL
5015 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5016 (match_operand:VI12_AVX512VL 3 "const0_operand")
5017 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5019 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5020 [(set_attr "prefix" "evex")
5021 (set_attr "mode" "<sseinsnmode>")])
5023 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5024 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5025 (vec_merge:VI48_AVX512VL
5028 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5031 operands[2] = CONSTM1_RTX (<MODE>mode);
5032 operands[3] = CONST0_RTX (<MODE>mode);
5035 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5036 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5037 (vec_merge:VI48_AVX512VL
5038 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5039 (match_operand:VI48_AVX512VL 3 "const0_operand")
5040 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5042 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5043 [(set_attr "prefix" "evex")
5044 (set_attr "mode" "<sseinsnmode>")])
5046 (define_insn "sse2_cvtps2pd<mask_name>"
5047 [(set (match_operand:V2DF 0 "register_operand" "=v")
5050 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5051 (parallel [(const_int 0) (const_int 1)]))))]
5052 "TARGET_SSE2 && <mask_avx512vl_condition>"
5053 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5054 [(set_attr "type" "ssecvt")
5055 (set_attr "amdfam10_decode" "direct")
5056 (set_attr "athlon_decode" "double")
5057 (set_attr "bdver1_decode" "double")
5058 (set_attr "prefix_data16" "0")
5059 (set_attr "prefix" "maybe_vex")
5060 (set_attr "mode" "V2DF")])
5062 (define_expand "vec_unpacks_hi_v4sf"
5067 (match_operand:V4SF 1 "nonimmediate_operand"))
5068 (parallel [(const_int 6) (const_int 7)
5069 (const_int 2) (const_int 3)])))
5070 (set (match_operand:V2DF 0 "register_operand")
5074 (parallel [(const_int 0) (const_int 1)]))))]
5076 "operands[2] = gen_reg_rtx (V4SFmode);")
5078 (define_expand "vec_unpacks_hi_v8sf"
5081 (match_operand:V8SF 1 "register_operand")
5082 (parallel [(const_int 4) (const_int 5)
5083 (const_int 6) (const_int 7)])))
5084 (set (match_operand:V4DF 0 "register_operand")
5088 "operands[2] = gen_reg_rtx (V4SFmode);")
5090 (define_expand "vec_unpacks_hi_v16sf"
5093 (match_operand:V16SF 1 "register_operand")
5094 (parallel [(const_int 8) (const_int 9)
5095 (const_int 10) (const_int 11)
5096 (const_int 12) (const_int 13)
5097 (const_int 14) (const_int 15)])))
5098 (set (match_operand:V8DF 0 "register_operand")
5102 "operands[2] = gen_reg_rtx (V8SFmode);")
5104 (define_expand "vec_unpacks_lo_v4sf"
5105 [(set (match_operand:V2DF 0 "register_operand")
5108 (match_operand:V4SF 1 "nonimmediate_operand")
5109 (parallel [(const_int 0) (const_int 1)]))))]
5112 (define_expand "vec_unpacks_lo_v8sf"
5113 [(set (match_operand:V4DF 0 "register_operand")
5116 (match_operand:V8SF 1 "nonimmediate_operand")
5117 (parallel [(const_int 0) (const_int 1)
5118 (const_int 2) (const_int 3)]))))]
5121 (define_mode_attr sseunpackfltmode
5122 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5123 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5125 (define_expand "vec_unpacks_float_hi_<mode>"
5126 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5127 (match_operand:VI2_AVX512F 1 "register_operand")]
5130 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5132 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5133 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5134 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5138 (define_expand "vec_unpacks_float_lo_<mode>"
5139 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5140 (match_operand:VI2_AVX512F 1 "register_operand")]
5143 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5145 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5146 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5147 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5151 (define_expand "vec_unpacku_float_hi_<mode>"
5152 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5153 (match_operand:VI2_AVX512F 1 "register_operand")]
5156 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5158 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5159 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5160 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5164 (define_expand "vec_unpacku_float_lo_<mode>"
5165 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5166 (match_operand:VI2_AVX512F 1 "register_operand")]
5169 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5171 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5172 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5173 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5177 (define_expand "vec_unpacks_float_hi_v4si"
5180 (match_operand:V4SI 1 "nonimmediate_operand")
5181 (parallel [(const_int 2) (const_int 3)
5182 (const_int 2) (const_int 3)])))
5183 (set (match_operand:V2DF 0 "register_operand")
5187 (parallel [(const_int 0) (const_int 1)]))))]
5189 "operands[2] = gen_reg_rtx (V4SImode);")
5191 (define_expand "vec_unpacks_float_lo_v4si"
5192 [(set (match_operand:V2DF 0 "register_operand")
5195 (match_operand:V4SI 1 "nonimmediate_operand")
5196 (parallel [(const_int 0) (const_int 1)]))))]
5199 (define_expand "vec_unpacks_float_hi_v8si"
5202 (match_operand:V8SI 1 "nonimmediate_operand")
5203 (parallel [(const_int 4) (const_int 5)
5204 (const_int 6) (const_int 7)])))
5205 (set (match_operand:V4DF 0 "register_operand")
5209 "operands[2] = gen_reg_rtx (V4SImode);")
5211 (define_expand "vec_unpacks_float_lo_v8si"
5212 [(set (match_operand:V4DF 0 "register_operand")
5215 (match_operand:V8SI 1 "nonimmediate_operand")
5216 (parallel [(const_int 0) (const_int 1)
5217 (const_int 2) (const_int 3)]))))]
5220 (define_expand "vec_unpacks_float_hi_v16si"
5223 (match_operand:V16SI 1 "nonimmediate_operand")
5224 (parallel [(const_int 8) (const_int 9)
5225 (const_int 10) (const_int 11)
5226 (const_int 12) (const_int 13)
5227 (const_int 14) (const_int 15)])))
5228 (set (match_operand:V8DF 0 "register_operand")
5232 "operands[2] = gen_reg_rtx (V8SImode);")
5234 (define_expand "vec_unpacks_float_lo_v16si"
5235 [(set (match_operand:V8DF 0 "register_operand")
5238 (match_operand:V16SI 1 "nonimmediate_operand")
5239 (parallel [(const_int 0) (const_int 1)
5240 (const_int 2) (const_int 3)
5241 (const_int 4) (const_int 5)
5242 (const_int 6) (const_int 7)]))))]
5245 (define_expand "vec_unpacku_float_hi_v4si"
5248 (match_operand:V4SI 1 "nonimmediate_operand")
5249 (parallel [(const_int 2) (const_int 3)
5250 (const_int 2) (const_int 3)])))
5255 (parallel [(const_int 0) (const_int 1)]))))
5257 (lt:V2DF (match_dup 6) (match_dup 3)))
5259 (and:V2DF (match_dup 7) (match_dup 4)))
5260 (set (match_operand:V2DF 0 "register_operand")
5261 (plus:V2DF (match_dup 6) (match_dup 8)))]
5264 REAL_VALUE_TYPE TWO32r;
5268 real_ldexp (&TWO32r, &dconst1, 32);
5269 x = const_double_from_real_value (TWO32r, DFmode);
5271 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5272 operands[4] = force_reg (V2DFmode,
5273 ix86_build_const_vector (V2DFmode, 1, x));
5275 operands[5] = gen_reg_rtx (V4SImode);
5277 for (i = 6; i < 9; i++)
5278 operands[i] = gen_reg_rtx (V2DFmode);
5281 (define_expand "vec_unpacku_float_lo_v4si"
5285 (match_operand:V4SI 1 "nonimmediate_operand")
5286 (parallel [(const_int 0) (const_int 1)]))))
5288 (lt:V2DF (match_dup 5) (match_dup 3)))
5290 (and:V2DF (match_dup 6) (match_dup 4)))
5291 (set (match_operand:V2DF 0 "register_operand")
5292 (plus:V2DF (match_dup 5) (match_dup 7)))]
5295 REAL_VALUE_TYPE TWO32r;
5299 real_ldexp (&TWO32r, &dconst1, 32);
5300 x = const_double_from_real_value (TWO32r, DFmode);
5302 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5303 operands[4] = force_reg (V2DFmode,
5304 ix86_build_const_vector (V2DFmode, 1, x));
5306 for (i = 5; i < 8; i++)
5307 operands[i] = gen_reg_rtx (V2DFmode);
5310 (define_expand "vec_unpacku_float_hi_v8si"
5311 [(match_operand:V4DF 0 "register_operand")
5312 (match_operand:V8SI 1 "register_operand")]
5315 REAL_VALUE_TYPE TWO32r;
5319 real_ldexp (&TWO32r, &dconst1, 32);
5320 x = const_double_from_real_value (TWO32r, DFmode);
5322 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5323 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5324 tmp[5] = gen_reg_rtx (V4SImode);
5326 for (i = 2; i < 5; i++)
5327 tmp[i] = gen_reg_rtx (V4DFmode);
5328 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5329 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5330 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5331 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5332 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5333 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5337 (define_expand "vec_unpacku_float_hi_v16si"
5338 [(match_operand:V8DF 0 "register_operand")
5339 (match_operand:V16SI 1 "register_operand")]
5342 REAL_VALUE_TYPE TWO32r;
5345 real_ldexp (&TWO32r, &dconst1, 32);
5346 x = const_double_from_real_value (TWO32r, DFmode);
5348 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5349 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5350 tmp[2] = gen_reg_rtx (V8DFmode);
5351 tmp[3] = gen_reg_rtx (V8SImode);
5352 k = gen_reg_rtx (QImode);
5354 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5355 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5356 emit_insn (gen_rtx_SET (VOIDmode, k,
5357 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5358 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5359 emit_move_insn (operands[0], tmp[2]);
5363 (define_expand "vec_unpacku_float_lo_v8si"
5364 [(match_operand:V4DF 0 "register_operand")
5365 (match_operand:V8SI 1 "nonimmediate_operand")]
5368 REAL_VALUE_TYPE TWO32r;
5372 real_ldexp (&TWO32r, &dconst1, 32);
5373 x = const_double_from_real_value (TWO32r, DFmode);
5375 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5376 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5378 for (i = 2; i < 5; i++)
5379 tmp[i] = gen_reg_rtx (V4DFmode);
5380 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5381 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5382 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5383 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5384 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5388 (define_expand "vec_unpacku_float_lo_v16si"
5389 [(match_operand:V8DF 0 "register_operand")
5390 (match_operand:V16SI 1 "nonimmediate_operand")]
5393 REAL_VALUE_TYPE TWO32r;
5396 real_ldexp (&TWO32r, &dconst1, 32);
5397 x = const_double_from_real_value (TWO32r, DFmode);
5399 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5400 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5401 tmp[2] = gen_reg_rtx (V8DFmode);
5402 k = gen_reg_rtx (QImode);
5404 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5405 emit_insn (gen_rtx_SET (VOIDmode, k,
5406 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5407 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5408 emit_move_insn (operands[0], tmp[2]);
5412 (define_expand "vec_pack_trunc_<mode>"
5414 (float_truncate:<sf2dfmode>
5415 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5417 (float_truncate:<sf2dfmode>
5418 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5419 (set (match_operand:<ssePSmode> 0 "register_operand")
5420 (vec_concat:<ssePSmode>
5425 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5426 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5429 (define_expand "vec_pack_trunc_v2df"
5430 [(match_operand:V4SF 0 "register_operand")
5431 (match_operand:V2DF 1 "nonimmediate_operand")
5432 (match_operand:V2DF 2 "nonimmediate_operand")]
5437 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5439 tmp0 = gen_reg_rtx (V4DFmode);
5440 tmp1 = force_reg (V2DFmode, operands[1]);
5442 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5443 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5447 tmp0 = gen_reg_rtx (V4SFmode);
5448 tmp1 = gen_reg_rtx (V4SFmode);
5450 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5451 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5452 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5457 (define_expand "vec_pack_sfix_trunc_v8df"
5458 [(match_operand:V16SI 0 "register_operand")
5459 (match_operand:V8DF 1 "nonimmediate_operand")
5460 (match_operand:V8DF 2 "nonimmediate_operand")]
5465 r1 = gen_reg_rtx (V8SImode);
5466 r2 = gen_reg_rtx (V8SImode);
5468 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5469 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5470 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5474 (define_expand "vec_pack_sfix_trunc_v4df"
5475 [(match_operand:V8SI 0 "register_operand")
5476 (match_operand:V4DF 1 "nonimmediate_operand")
5477 (match_operand:V4DF 2 "nonimmediate_operand")]
5482 r1 = gen_reg_rtx (V4SImode);
5483 r2 = gen_reg_rtx (V4SImode);
5485 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5486 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5487 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5491 (define_expand "vec_pack_sfix_trunc_v2df"
5492 [(match_operand:V4SI 0 "register_operand")
5493 (match_operand:V2DF 1 "nonimmediate_operand")
5494 (match_operand:V2DF 2 "nonimmediate_operand")]
5497 rtx tmp0, tmp1, tmp2;
5499 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5501 tmp0 = gen_reg_rtx (V4DFmode);
5502 tmp1 = force_reg (V2DFmode, operands[1]);
5504 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5505 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5509 tmp0 = gen_reg_rtx (V4SImode);
5510 tmp1 = gen_reg_rtx (V4SImode);
5511 tmp2 = gen_reg_rtx (V2DImode);
5513 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5514 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5515 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5516 gen_lowpart (V2DImode, tmp0),
5517 gen_lowpart (V2DImode, tmp1)));
5518 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5523 (define_mode_attr ssepackfltmode
5524 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5526 (define_expand "vec_pack_ufix_trunc_<mode>"
5527 [(match_operand:<ssepackfltmode> 0 "register_operand")
5528 (match_operand:VF2 1 "register_operand")
5529 (match_operand:VF2 2 "register_operand")]
5532 if (<MODE>mode == V8DFmode)
5536 r1 = gen_reg_rtx (V8SImode);
5537 r2 = gen_reg_rtx (V8SImode);
5539 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5540 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5541 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5546 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5547 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5548 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5549 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5550 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5552 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5553 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5557 tmp[5] = gen_reg_rtx (V8SFmode);
5558 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5559 gen_lowpart (V8SFmode, tmp[3]), 0);
5560 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5562 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5563 operands[0], 0, OPTAB_DIRECT);
5564 if (tmp[6] != operands[0])
5565 emit_move_insn (operands[0], tmp[6]);
5571 (define_expand "vec_pack_sfix_v4df"
5572 [(match_operand:V8SI 0 "register_operand")
5573 (match_operand:V4DF 1 "nonimmediate_operand")
5574 (match_operand:V4DF 2 "nonimmediate_operand")]
5579 r1 = gen_reg_rtx (V4SImode);
5580 r2 = gen_reg_rtx (V4SImode);
5582 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5583 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5584 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5588 (define_expand "vec_pack_sfix_v2df"
5589 [(match_operand:V4SI 0 "register_operand")
5590 (match_operand:V2DF 1 "nonimmediate_operand")
5591 (match_operand:V2DF 2 "nonimmediate_operand")]
5594 rtx tmp0, tmp1, tmp2;
5596 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5598 tmp0 = gen_reg_rtx (V4DFmode);
5599 tmp1 = force_reg (V2DFmode, operands[1]);
5601 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5602 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5606 tmp0 = gen_reg_rtx (V4SImode);
5607 tmp1 = gen_reg_rtx (V4SImode);
5608 tmp2 = gen_reg_rtx (V2DImode);
5610 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5611 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5612 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5613 gen_lowpart (V2DImode, tmp0),
5614 gen_lowpart (V2DImode, tmp1)));
5615 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5622 ;; Parallel single-precision floating point element swizzling
5624 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5626 (define_expand "sse_movhlps_exp"
5627 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5630 (match_operand:V4SF 1 "nonimmediate_operand")
5631 (match_operand:V4SF 2 "nonimmediate_operand"))
5632 (parallel [(const_int 6)
5638 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5640 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5642 /* Fix up the destination if needed. */
5643 if (dst != operands[0])
5644 emit_move_insn (operands[0], dst);
5649 (define_insn "sse_movhlps"
5650 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5653 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5654 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5655 (parallel [(const_int 6)
5659 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5661 movhlps\t{%2, %0|%0, %2}
5662 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5663 movlps\t{%H2, %0|%0, %H2}
5664 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5665 %vmovhps\t{%2, %0|%q0, %2}"
5666 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5667 (set_attr "type" "ssemov")
5668 (set_attr "ssememalign" "64")
5669 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5670 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5672 (define_expand "sse_movlhps_exp"
5673 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5676 (match_operand:V4SF 1 "nonimmediate_operand")
5677 (match_operand:V4SF 2 "nonimmediate_operand"))
5678 (parallel [(const_int 0)
5684 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5686 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5688 /* Fix up the destination if needed. */
5689 if (dst != operands[0])
5690 emit_move_insn (operands[0], dst);
5695 (define_insn "sse_movlhps"
5696 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5699 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5700 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5701 (parallel [(const_int 0)
5705 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5707 movlhps\t{%2, %0|%0, %2}
5708 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5709 movhps\t{%2, %0|%0, %q2}
5710 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5711 %vmovlps\t{%2, %H0|%H0, %2}"
5712 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5713 (set_attr "type" "ssemov")
5714 (set_attr "ssememalign" "64")
5715 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5716 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5718 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5719 [(set (match_operand:V16SF 0 "register_operand" "=v")
5722 (match_operand:V16SF 1 "register_operand" "v")
5723 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5724 (parallel [(const_int 2) (const_int 18)
5725 (const_int 3) (const_int 19)
5726 (const_int 6) (const_int 22)
5727 (const_int 7) (const_int 23)
5728 (const_int 10) (const_int 26)
5729 (const_int 11) (const_int 27)
5730 (const_int 14) (const_int 30)
5731 (const_int 15) (const_int 31)])))]
5733 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5734 [(set_attr "type" "sselog")
5735 (set_attr "prefix" "evex")
5736 (set_attr "mode" "V16SF")])
5738 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5739 (define_insn "avx_unpckhps256<mask_name>"
5740 [(set (match_operand:V8SF 0 "register_operand" "=v")
5743 (match_operand:V8SF 1 "register_operand" "v")
5744 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5745 (parallel [(const_int 2) (const_int 10)
5746 (const_int 3) (const_int 11)
5747 (const_int 6) (const_int 14)
5748 (const_int 7) (const_int 15)])))]
5749 "TARGET_AVX && <mask_avx512vl_condition>"
5750 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5751 [(set_attr "type" "sselog")
5752 (set_attr "prefix" "vex")
5753 (set_attr "mode" "V8SF")])
5755 (define_expand "vec_interleave_highv8sf"
5759 (match_operand:V8SF 1 "register_operand" "x")
5760 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5761 (parallel [(const_int 0) (const_int 8)
5762 (const_int 1) (const_int 9)
5763 (const_int 4) (const_int 12)
5764 (const_int 5) (const_int 13)])))
5770 (parallel [(const_int 2) (const_int 10)
5771 (const_int 3) (const_int 11)
5772 (const_int 6) (const_int 14)
5773 (const_int 7) (const_int 15)])))
5774 (set (match_operand:V8SF 0 "register_operand")
5779 (parallel [(const_int 4) (const_int 5)
5780 (const_int 6) (const_int 7)
5781 (const_int 12) (const_int 13)
5782 (const_int 14) (const_int 15)])))]
5785 operands[3] = gen_reg_rtx (V8SFmode);
5786 operands[4] = gen_reg_rtx (V8SFmode);
5789 (define_insn "vec_interleave_highv4sf<mask_name>"
5790 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5793 (match_operand:V4SF 1 "register_operand" "0,v")
5794 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5795 (parallel [(const_int 2) (const_int 6)
5796 (const_int 3) (const_int 7)])))]
5797 "TARGET_SSE && <mask_avx512vl_condition>"
5799 unpckhps\t{%2, %0|%0, %2}
5800 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5801 [(set_attr "isa" "noavx,avx")
5802 (set_attr "type" "sselog")
5803 (set_attr "prefix" "orig,vex")
5804 (set_attr "mode" "V4SF")])
5806 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5807 [(set (match_operand:V16SF 0 "register_operand" "=v")
5810 (match_operand:V16SF 1 "register_operand" "v")
5811 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5812 (parallel [(const_int 0) (const_int 16)
5813 (const_int 1) (const_int 17)
5814 (const_int 4) (const_int 20)
5815 (const_int 5) (const_int 21)
5816 (const_int 8) (const_int 24)
5817 (const_int 9) (const_int 25)
5818 (const_int 12) (const_int 28)
5819 (const_int 13) (const_int 29)])))]
5821 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5822 [(set_attr "type" "sselog")
5823 (set_attr "prefix" "evex")
5824 (set_attr "mode" "V16SF")])
5826 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5827 (define_insn "avx_unpcklps256<mask_name>"
5828 [(set (match_operand:V8SF 0 "register_operand" "=v")
5831 (match_operand:V8SF 1 "register_operand" "v")
5832 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5833 (parallel [(const_int 0) (const_int 8)
5834 (const_int 1) (const_int 9)
5835 (const_int 4) (const_int 12)
5836 (const_int 5) (const_int 13)])))]
5837 "TARGET_AVX && <mask_avx512vl_condition>"
5838 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5839 [(set_attr "type" "sselog")
5840 (set_attr "prefix" "vex")
5841 (set_attr "mode" "V8SF")])
5843 (define_insn "unpcklps128_mask"
5844 [(set (match_operand:V4SF 0 "register_operand" "=v")
5848 (match_operand:V4SF 1 "register_operand" "v")
5849 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5850 (parallel [(const_int 0) (const_int 4)
5851 (const_int 1) (const_int 5)]))
5852 (match_operand:V4SF 3 "vector_move_operand" "0C")
5853 (match_operand:QI 4 "register_operand" "Yk")))]
5855 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5856 [(set_attr "type" "sselog")
5857 (set_attr "prefix" "evex")
5858 (set_attr "mode" "V4SF")])
5860 (define_expand "vec_interleave_lowv8sf"
5864 (match_operand:V8SF 1 "register_operand" "x")
5865 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5866 (parallel [(const_int 0) (const_int 8)
5867 (const_int 1) (const_int 9)
5868 (const_int 4) (const_int 12)
5869 (const_int 5) (const_int 13)])))
5875 (parallel [(const_int 2) (const_int 10)
5876 (const_int 3) (const_int 11)
5877 (const_int 6) (const_int 14)
5878 (const_int 7) (const_int 15)])))
5879 (set (match_operand:V8SF 0 "register_operand")
5884 (parallel [(const_int 0) (const_int 1)
5885 (const_int 2) (const_int 3)
5886 (const_int 8) (const_int 9)
5887 (const_int 10) (const_int 11)])))]
5890 operands[3] = gen_reg_rtx (V8SFmode);
5891 operands[4] = gen_reg_rtx (V8SFmode);
5894 (define_insn "vec_interleave_lowv4sf"
5895 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5898 (match_operand:V4SF 1 "register_operand" "0,x")
5899 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5900 (parallel [(const_int 0) (const_int 4)
5901 (const_int 1) (const_int 5)])))]
5904 unpcklps\t{%2, %0|%0, %2}
5905 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5906 [(set_attr "isa" "noavx,avx")
5907 (set_attr "type" "sselog")
5908 (set_attr "prefix" "orig,vex")
5909 (set_attr "mode" "V4SF")])
5911 ;; These are modeled with the same vec_concat as the others so that we
5912 ;; capture users of shufps that can use the new instructions
5913 (define_insn "avx_movshdup256<mask_name>"
5914 [(set (match_operand:V8SF 0 "register_operand" "=v")
5917 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5919 (parallel [(const_int 1) (const_int 1)
5920 (const_int 3) (const_int 3)
5921 (const_int 5) (const_int 5)
5922 (const_int 7) (const_int 7)])))]
5923 "TARGET_AVX && <mask_avx512vl_condition>"
5924 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5925 [(set_attr "type" "sse")
5926 (set_attr "prefix" "vex")
5927 (set_attr "mode" "V8SF")])
5929 (define_insn "sse3_movshdup<mask_name>"
5930 [(set (match_operand:V4SF 0 "register_operand" "=v")
5933 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5935 (parallel [(const_int 1)
5939 "TARGET_SSE3 && <mask_avx512vl_condition>"
5940 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5941 [(set_attr "type" "sse")
5942 (set_attr "prefix_rep" "1")
5943 (set_attr "prefix" "maybe_vex")
5944 (set_attr "mode" "V4SF")])
5946 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5947 [(set (match_operand:V16SF 0 "register_operand" "=v")
5950 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5952 (parallel [(const_int 1) (const_int 1)
5953 (const_int 3) (const_int 3)
5954 (const_int 5) (const_int 5)
5955 (const_int 7) (const_int 7)
5956 (const_int 9) (const_int 9)
5957 (const_int 11) (const_int 11)
5958 (const_int 13) (const_int 13)
5959 (const_int 15) (const_int 15)])))]
5961 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5962 [(set_attr "type" "sse")
5963 (set_attr "prefix" "evex")
5964 (set_attr "mode" "V16SF")])
5966 (define_insn "avx_movsldup256<mask_name>"
5967 [(set (match_operand:V8SF 0 "register_operand" "=v")
5970 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5972 (parallel [(const_int 0) (const_int 0)
5973 (const_int 2) (const_int 2)
5974 (const_int 4) (const_int 4)
5975 (const_int 6) (const_int 6)])))]
5976 "TARGET_AVX && <mask_avx512vl_condition>"
5977 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5978 [(set_attr "type" "sse")
5979 (set_attr "prefix" "vex")
5980 (set_attr "mode" "V8SF")])
5982 (define_insn "sse3_movsldup<mask_name>"
5983 [(set (match_operand:V4SF 0 "register_operand" "=v")
5986 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5988 (parallel [(const_int 0)
5992 "TARGET_SSE3 && <mask_avx512vl_condition>"
5993 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5994 [(set_attr "type" "sse")
5995 (set_attr "prefix_rep" "1")
5996 (set_attr "prefix" "maybe_vex")
5997 (set_attr "mode" "V4SF")])
5999 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6000 [(set (match_operand:V16SF 0 "register_operand" "=v")
6003 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6005 (parallel [(const_int 0) (const_int 0)
6006 (const_int 2) (const_int 2)
6007 (const_int 4) (const_int 4)
6008 (const_int 6) (const_int 6)
6009 (const_int 8) (const_int 8)
6010 (const_int 10) (const_int 10)
6011 (const_int 12) (const_int 12)
6012 (const_int 14) (const_int 14)])))]
6014 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6015 [(set_attr "type" "sse")
6016 (set_attr "prefix" "evex")
6017 (set_attr "mode" "V16SF")])
6019 (define_expand "avx_shufps256<mask_expand4_name>"
6020 [(match_operand:V8SF 0 "register_operand")
6021 (match_operand:V8SF 1 "register_operand")
6022 (match_operand:V8SF 2 "nonimmediate_operand")
6023 (match_operand:SI 3 "const_int_operand")]
6026 int mask = INTVAL (operands[3]);
6027 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6030 GEN_INT ((mask >> 0) & 3),
6031 GEN_INT ((mask >> 2) & 3),
6032 GEN_INT (((mask >> 4) & 3) + 8),
6033 GEN_INT (((mask >> 6) & 3) + 8),
6034 GEN_INT (((mask >> 0) & 3) + 4),
6035 GEN_INT (((mask >> 2) & 3) + 4),
6036 GEN_INT (((mask >> 4) & 3) + 12),
6037 GEN_INT (((mask >> 6) & 3) + 12)
6038 <mask_expand4_args>));
6042 ;; One bit in mask selects 2 elements.
6043 (define_insn "avx_shufps256_1<mask_name>"
6044 [(set (match_operand:V8SF 0 "register_operand" "=v")
6047 (match_operand:V8SF 1 "register_operand" "v")
6048 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6049 (parallel [(match_operand 3 "const_0_to_3_operand" )
6050 (match_operand 4 "const_0_to_3_operand" )
6051 (match_operand 5 "const_8_to_11_operand" )
6052 (match_operand 6 "const_8_to_11_operand" )
6053 (match_operand 7 "const_4_to_7_operand" )
6054 (match_operand 8 "const_4_to_7_operand" )
6055 (match_operand 9 "const_12_to_15_operand")
6056 (match_operand 10 "const_12_to_15_operand")])))]
6058 && <mask_avx512vl_condition>
6059 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6060 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6061 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6062 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6065 mask = INTVAL (operands[3]);
6066 mask |= INTVAL (operands[4]) << 2;
6067 mask |= (INTVAL (operands[5]) - 8) << 4;
6068 mask |= (INTVAL (operands[6]) - 8) << 6;
6069 operands[3] = GEN_INT (mask);
6071 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6073 [(set_attr "type" "sseshuf")
6074 (set_attr "length_immediate" "1")
6075 (set_attr "prefix" "<mask_prefix>")
6076 (set_attr "mode" "V8SF")])
6078 (define_expand "sse_shufps<mask_expand4_name>"
6079 [(match_operand:V4SF 0 "register_operand")
6080 (match_operand:V4SF 1 "register_operand")
6081 (match_operand:V4SF 2 "nonimmediate_operand")
6082 (match_operand:SI 3 "const_int_operand")]
6085 int mask = INTVAL (operands[3]);
6086 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6089 GEN_INT ((mask >> 0) & 3),
6090 GEN_INT ((mask >> 2) & 3),
6091 GEN_INT (((mask >> 4) & 3) + 4),
6092 GEN_INT (((mask >> 6) & 3) + 4)
6093 <mask_expand4_args>));
6097 (define_insn "sse_shufps_v4sf_mask"
6098 [(set (match_operand:V4SF 0 "register_operand" "=v")
6102 (match_operand:V4SF 1 "register_operand" "v")
6103 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6104 (parallel [(match_operand 3 "const_0_to_3_operand")
6105 (match_operand 4 "const_0_to_3_operand")
6106 (match_operand 5 "const_4_to_7_operand")
6107 (match_operand 6 "const_4_to_7_operand")]))
6108 (match_operand:V4SF 7 "vector_move_operand" "0C")
6109 (match_operand:QI 8 "register_operand" "Yk")))]
6113 mask |= INTVAL (operands[3]) << 0;
6114 mask |= INTVAL (operands[4]) << 2;
6115 mask |= (INTVAL (operands[5]) - 4) << 4;
6116 mask |= (INTVAL (operands[6]) - 4) << 6;
6117 operands[3] = GEN_INT (mask);
6119 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6121 [(set_attr "type" "sseshuf")
6122 (set_attr "length_immediate" "1")
6123 (set_attr "prefix" "evex")
6124 (set_attr "mode" "V4SF")])
6126 (define_insn "sse_shufps_<mode>"
6127 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6128 (vec_select:VI4F_128
6129 (vec_concat:<ssedoublevecmode>
6130 (match_operand:VI4F_128 1 "register_operand" "0,x")
6131 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6132 (parallel [(match_operand 3 "const_0_to_3_operand")
6133 (match_operand 4 "const_0_to_3_operand")
6134 (match_operand 5 "const_4_to_7_operand")
6135 (match_operand 6 "const_4_to_7_operand")])))]
6139 mask |= INTVAL (operands[3]) << 0;
6140 mask |= INTVAL (operands[4]) << 2;
6141 mask |= (INTVAL (operands[5]) - 4) << 4;
6142 mask |= (INTVAL (operands[6]) - 4) << 6;
6143 operands[3] = GEN_INT (mask);
6145 switch (which_alternative)
6148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6150 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6155 [(set_attr "isa" "noavx,avx")
6156 (set_attr "type" "sseshuf")
6157 (set_attr "length_immediate" "1")
6158 (set_attr "prefix" "orig,vex")
6159 (set_attr "mode" "V4SF")])
6161 (define_insn "sse_storehps"
6162 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6164 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6165 (parallel [(const_int 2) (const_int 3)])))]
6168 %vmovhps\t{%1, %0|%q0, %1}
6169 %vmovhlps\t{%1, %d0|%d0, %1}
6170 %vmovlps\t{%H1, %d0|%d0, %H1}"
6171 [(set_attr "type" "ssemov")
6172 (set_attr "ssememalign" "64")
6173 (set_attr "prefix" "maybe_vex")
6174 (set_attr "mode" "V2SF,V4SF,V2SF")])
6176 (define_expand "sse_loadhps_exp"
6177 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6180 (match_operand:V4SF 1 "nonimmediate_operand")
6181 (parallel [(const_int 0) (const_int 1)]))
6182 (match_operand:V2SF 2 "nonimmediate_operand")))]
6185 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6187 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6189 /* Fix up the destination if needed. */
6190 if (dst != operands[0])
6191 emit_move_insn (operands[0], dst);
6196 (define_insn "sse_loadhps"
6197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6200 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6201 (parallel [(const_int 0) (const_int 1)]))
6202 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6205 movhps\t{%2, %0|%0, %q2}
6206 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6207 movlhps\t{%2, %0|%0, %2}
6208 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6209 %vmovlps\t{%2, %H0|%H0, %2}"
6210 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6211 (set_attr "type" "ssemov")
6212 (set_attr "ssememalign" "64")
6213 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6214 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6216 (define_insn "sse_storelps"
6217 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6219 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6220 (parallel [(const_int 0) (const_int 1)])))]
6223 %vmovlps\t{%1, %0|%q0, %1}
6224 %vmovaps\t{%1, %0|%0, %1}
6225 %vmovlps\t{%1, %d0|%d0, %q1}"
6226 [(set_attr "type" "ssemov")
6227 (set_attr "prefix" "maybe_vex")
6228 (set_attr "mode" "V2SF,V4SF,V2SF")])
6230 (define_expand "sse_loadlps_exp"
6231 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6233 (match_operand:V2SF 2 "nonimmediate_operand")
6235 (match_operand:V4SF 1 "nonimmediate_operand")
6236 (parallel [(const_int 2) (const_int 3)]))))]
6239 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6241 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6243 /* Fix up the destination if needed. */
6244 if (dst != operands[0])
6245 emit_move_insn (operands[0], dst);
6250 (define_insn "sse_loadlps"
6251 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6253 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6255 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6256 (parallel [(const_int 2) (const_int 3)]))))]
6259 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6260 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6261 movlps\t{%2, %0|%0, %q2}
6262 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6263 %vmovlps\t{%2, %0|%q0, %2}"
6264 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6265 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6266 (set_attr "ssememalign" "64")
6267 (set_attr "length_immediate" "1,1,*,*,*")
6268 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6269 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6271 (define_insn "sse_movss"
6272 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6274 (match_operand:V4SF 2 "register_operand" " x,x")
6275 (match_operand:V4SF 1 "register_operand" " 0,x")
6279 movss\t{%2, %0|%0, %2}
6280 vmovss\t{%2, %1, %0|%0, %1, %2}"
6281 [(set_attr "isa" "noavx,avx")
6282 (set_attr "type" "ssemov")
6283 (set_attr "prefix" "orig,vex")
6284 (set_attr "mode" "SF")])
6286 (define_insn "avx2_vec_dup<mode>"
6287 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6288 (vec_duplicate:VF1_128_256
6290 (match_operand:V4SF 1 "register_operand" "x")
6291 (parallel [(const_int 0)]))))]
6293 "vbroadcastss\t{%1, %0|%0, %1}"
6294 [(set_attr "type" "sselog1")
6295 (set_attr "prefix" "vex")
6296 (set_attr "mode" "<MODE>")])
6298 (define_insn "avx2_vec_dupv8sf_1"
6299 [(set (match_operand:V8SF 0 "register_operand" "=x")
6302 (match_operand:V8SF 1 "register_operand" "x")
6303 (parallel [(const_int 0)]))))]
6305 "vbroadcastss\t{%x1, %0|%0, %x1}"
6306 [(set_attr "type" "sselog1")
6307 (set_attr "prefix" "vex")
6308 (set_attr "mode" "V8SF")])
6310 (define_insn "avx512f_vec_dup<mode>_1"
6311 [(set (match_operand:VF_512 0 "register_operand" "=v")
6312 (vec_duplicate:VF_512
6313 (vec_select:<ssescalarmode>
6314 (match_operand:VF_512 1 "register_operand" "v")
6315 (parallel [(const_int 0)]))))]
6317 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6318 [(set_attr "type" "sselog1")
6319 (set_attr "prefix" "evex")
6320 (set_attr "mode" "<MODE>")])
6322 ;; Although insertps takes register source, we prefer
6323 ;; unpcklps with register source since it is shorter.
6324 (define_insn "*vec_concatv2sf_sse4_1"
6325 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6327 (match_operand:SF 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,m, 0 , m")
6328 (match_operand:SF 2 "vector_move_operand" " Yr,*x,x, m,m, m,C,*ym, C")))]
6331 unpcklps\t{%2, %0|%0, %2}
6332 unpcklps\t{%2, %0|%0, %2}
6333 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6334 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6335 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6336 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6337 %vmovss\t{%1, %0|%0, %1}
6338 punpckldq\t{%2, %0|%0, %2}
6339 movd\t{%1, %0|%0, %1}"
6340 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6341 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6342 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6343 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6344 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6345 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6346 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6348 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6349 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6350 ;; alternatives pretty much forces the MMX alternative to be chosen.
6351 (define_insn "*vec_concatv2sf_sse"
6352 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6354 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6355 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6358 unpcklps\t{%2, %0|%0, %2}
6359 movss\t{%1, %0|%0, %1}
6360 punpckldq\t{%2, %0|%0, %2}
6361 movd\t{%1, %0|%0, %1}"
6362 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6363 (set_attr "mode" "V4SF,SF,DI,DI")])
6365 (define_insn "*vec_concatv4sf"
6366 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6368 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6369 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6372 movlhps\t{%2, %0|%0, %2}
6373 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6374 movhps\t{%2, %0|%0, %q2}
6375 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6376 [(set_attr "isa" "noavx,avx,noavx,avx")
6377 (set_attr "type" "ssemov")
6378 (set_attr "prefix" "orig,vex,orig,vex")
6379 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6381 (define_expand "vec_init<mode>"
6382 [(match_operand:V_128 0 "register_operand")
6386 ix86_expand_vector_init (false, operands[0], operands[1]);
6390 ;; Avoid combining registers from different units in a single alternative,
6391 ;; see comment above inline_secondary_memory_needed function in i386.c
6392 (define_insn "vec_set<mode>_0"
6393 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6394 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6396 (vec_duplicate:VI4F_128
6397 (match_operand:<ssescalarmode> 2 "general_operand"
6398 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6399 (match_operand:VI4F_128 1 "vector_move_operand"
6400 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6404 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6405 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6406 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6407 %vmovd\t{%2, %0|%0, %2}
6408 movss\t{%2, %0|%0, %2}
6409 movss\t{%2, %0|%0, %2}
6410 vmovss\t{%2, %1, %0|%0, %1, %2}
6411 pinsrd\t{$0, %2, %0|%0, %2, 0}
6412 pinsrd\t{$0, %2, %0|%0, %2, 0}
6413 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6417 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6419 (cond [(eq_attr "alternative" "0,1,7,8,9")
6420 (const_string "sselog")
6421 (eq_attr "alternative" "11")
6422 (const_string "imov")
6423 (eq_attr "alternative" "12")
6424 (const_string "fmov")
6426 (const_string "ssemov")))
6427 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6428 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6429 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6430 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6432 ;; A subset is vec_setv4sf.
6433 (define_insn "*vec_setv4sf_sse4_1"
6434 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6437 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6438 (match_operand:V4SF 1 "register_operand" "0,0,x")
6439 (match_operand:SI 3 "const_int_operand")))]
6441 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6442 < GET_MODE_NUNITS (V4SFmode))"
6444 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6445 switch (which_alternative)
6449 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6451 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6456 [(set_attr "isa" "noavx,noavx,avx")
6457 (set_attr "type" "sselog")
6458 (set_attr "prefix_data16" "1,1,*")
6459 (set_attr "prefix_extra" "1")
6460 (set_attr "length_immediate" "1")
6461 (set_attr "prefix" "orig,orig,vex")
6462 (set_attr "mode" "V4SF")])
6464 (define_insn "sse4_1_insertps"
6465 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6466 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6467 (match_operand:V4SF 1 "register_operand" "0,0,x")
6468 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6472 if (MEM_P (operands[2]))
6474 unsigned count_s = INTVAL (operands[3]) >> 6;
6476 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6477 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6479 switch (which_alternative)
6483 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6485 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6490 [(set_attr "isa" "noavx,noavx,avx")
6491 (set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1,1,*")
6493 (set_attr "prefix_extra" "1")
6494 (set_attr "length_immediate" "1")
6495 (set_attr "prefix" "orig,orig,vex")
6496 (set_attr "mode" "V4SF")])
6499 [(set (match_operand:VI4F_128 0 "memory_operand")
6501 (vec_duplicate:VI4F_128
6502 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6505 "TARGET_SSE && reload_completed"
6506 [(set (match_dup 0) (match_dup 1))]
6507 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6509 (define_expand "vec_set<mode>"
6510 [(match_operand:V 0 "register_operand")
6511 (match_operand:<ssescalarmode> 1 "register_operand")
6512 (match_operand 2 "const_int_operand")]
6515 ix86_expand_vector_set (false, operands[0], operands[1],
6516 INTVAL (operands[2]));
6520 (define_insn_and_split "*vec_extractv4sf_0"
6521 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6523 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6524 (parallel [(const_int 0)])))]
6525 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6527 "&& reload_completed"
6528 [(set (match_dup 0) (match_dup 1))]
6530 if (REG_P (operands[1]))
6531 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6533 operands[1] = adjust_address (operands[1], SFmode, 0);
6536 (define_insn_and_split "*sse4_1_extractps"
6537 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6539 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6540 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6543 %vextractps\t{%2, %1, %0|%0, %1, %2}
6544 %vextractps\t{%2, %1, %0|%0, %1, %2}
6547 "&& reload_completed && SSE_REG_P (operands[0])"
6550 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6551 switch (INTVAL (operands[2]))
6555 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6556 operands[2], operands[2],
6557 GEN_INT (INTVAL (operands[2]) + 4),
6558 GEN_INT (INTVAL (operands[2]) + 4)));
6561 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6564 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6569 [(set_attr "isa" "*,*,noavx,avx")
6570 (set_attr "type" "sselog,sselog,*,*")
6571 (set_attr "prefix_data16" "1,1,*,*")
6572 (set_attr "prefix_extra" "1,1,*,*")
6573 (set_attr "length_immediate" "1,1,*,*")
6574 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6575 (set_attr "mode" "V4SF,V4SF,*,*")])
6577 (define_insn_and_split "*vec_extractv4sf_mem"
6578 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6580 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6581 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6584 "&& reload_completed"
6585 [(set (match_dup 0) (match_dup 1))]
6587 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6590 (define_mode_attr extract_type
6591 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6593 (define_mode_attr extract_suf
6594 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6596 (define_mode_iterator AVX512_VEC
6597 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6599 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6600 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6601 (match_operand:AVX512_VEC 1 "register_operand")
6602 (match_operand:SI 2 "const_0_to_3_operand")
6603 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6604 (match_operand:QI 4 "register_operand")]
6608 mask = INTVAL (operands[2]);
6610 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6611 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6613 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6614 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6615 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6616 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6619 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6620 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6625 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6626 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6627 (vec_merge:<ssequartermode>
6628 (vec_select:<ssequartermode>
6629 (match_operand:V8FI 1 "register_operand" "v")
6630 (parallel [(match_operand 2 "const_0_to_7_operand")
6631 (match_operand 3 "const_0_to_7_operand")]))
6632 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6633 (match_operand:QI 5 "register_operand" "k")))]
6635 && (INTVAL (operands[2]) % 2 == 0)
6636 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6637 && rtx_equal_p (operands[4], operands[0])"
6639 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6640 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6642 [(set_attr "type" "sselog")
6643 (set_attr "prefix_extra" "1")
6644 (set_attr "length_immediate" "1")
6645 (set_attr "memory" "store")
6646 (set_attr "prefix" "evex")
6647 (set_attr "mode" "<sseinsnmode>")])
6649 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6650 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6651 (vec_merge:<ssequartermode>
6652 (vec_select:<ssequartermode>
6653 (match_operand:V16FI 1 "register_operand" "v")
6654 (parallel [(match_operand 2 "const_0_to_15_operand")
6655 (match_operand 3 "const_0_to_15_operand")
6656 (match_operand 4 "const_0_to_15_operand")
6657 (match_operand 5 "const_0_to_15_operand")]))
6658 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6659 (match_operand:QI 7 "register_operand" "Yk")))]
6661 && ((INTVAL (operands[2]) % 4 == 0)
6662 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6663 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6664 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6665 && rtx_equal_p (operands[6], operands[0])"
6667 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6668 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "length_immediate" "1")
6673 (set_attr "memory" "store")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "<sseinsnmode>")])
6677 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6678 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6679 (vec_select:<ssequartermode>
6680 (match_operand:V8FI 1 "register_operand" "v")
6681 (parallel [(match_operand 2 "const_0_to_7_operand")
6682 (match_operand 3 "const_0_to_7_operand")])))]
6683 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6685 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6686 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6688 [(set_attr "type" "sselog1")
6689 (set_attr "prefix_extra" "1")
6690 (set_attr "length_immediate" "1")
6691 (set_attr "prefix" "evex")
6692 (set_attr "mode" "<sseinsnmode>")])
6694 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6695 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6696 (vec_select:<ssequartermode>
6697 (match_operand:V16FI 1 "register_operand" "v")
6698 (parallel [(match_operand 2 "const_0_to_15_operand")
6699 (match_operand 3 "const_0_to_15_operand")
6700 (match_operand 4 "const_0_to_15_operand")
6701 (match_operand 5 "const_0_to_15_operand")])))]
6703 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6704 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6705 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6707 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6708 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6710 [(set_attr "type" "sselog1")
6711 (set_attr "prefix_extra" "1")
6712 (set_attr "length_immediate" "1")
6713 (set_attr "prefix" "evex")
6714 (set_attr "mode" "<sseinsnmode>")])
6716 (define_mode_attr extract_type_2
6717 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6719 (define_mode_attr extract_suf_2
6720 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6722 (define_mode_iterator AVX512_VEC_2
6723 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6725 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6726 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6727 (match_operand:AVX512_VEC_2 1 "register_operand")
6728 (match_operand:SI 2 "const_0_to_1_operand")
6729 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6730 (match_operand:QI 4 "register_operand")]
6733 rtx (*insn)(rtx, rtx, rtx, rtx);
6735 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6736 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6738 switch (INTVAL (operands[2]))
6741 insn = gen_vec_extract_lo_<mode>_mask;
6744 insn = gen_vec_extract_hi_<mode>_mask;
6750 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6755 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6756 (vec_select:<ssehalfvecmode>
6757 (match_operand:V8FI 1 "nonimmediate_operand")
6758 (parallel [(const_int 0) (const_int 1)
6759 (const_int 2) (const_int 3)])))]
6760 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6761 && reload_completed"
6764 rtx op1 = operands[1];
6766 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6768 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6769 emit_move_insn (operands[0], op1);
6773 (define_insn "vec_extract_lo_<mode>_maskm"
6774 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6775 (vec_merge:<ssehalfvecmode>
6776 (vec_select:<ssehalfvecmode>
6777 (match_operand:V8FI 1 "register_operand" "v")
6778 (parallel [(const_int 0) (const_int 1)
6779 (const_int 2) (const_int 3)]))
6780 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6781 (match_operand:QI 3 "register_operand" "Yk")))]
6783 && rtx_equal_p (operands[2], operands[0])"
6784 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6785 [(set_attr "type" "sselog1")
6786 (set_attr "prefix_extra" "1")
6787 (set_attr "length_immediate" "1")
6788 (set_attr "prefix" "evex")
6789 (set_attr "mode" "<sseinsnmode>")])
6791 (define_insn "vec_extract_lo_<mode><mask_name>"
6792 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6793 (vec_select:<ssehalfvecmode>
6794 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6795 (parallel [(const_int 0) (const_int 1)
6796 (const_int 2) (const_int 3)])))]
6797 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6800 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6804 [(set_attr "type" "sselog1")
6805 (set_attr "prefix_extra" "1")
6806 (set_attr "length_immediate" "1")
6807 (set_attr "prefix" "evex")
6808 (set_attr "mode" "<sseinsnmode>")])
6810 (define_insn "vec_extract_hi_<mode>_maskm"
6811 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6812 (vec_merge:<ssehalfvecmode>
6813 (vec_select:<ssehalfvecmode>
6814 (match_operand:V8FI 1 "register_operand" "v")
6815 (parallel [(const_int 4) (const_int 5)
6816 (const_int 6) (const_int 7)]))
6817 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6818 (match_operand:QI 3 "register_operand" "Yk")))]
6820 && rtx_equal_p (operands[2], operands[0])"
6821 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6822 [(set_attr "type" "sselog")
6823 (set_attr "prefix_extra" "1")
6824 (set_attr "length_immediate" "1")
6825 (set_attr "memory" "store")
6826 (set_attr "prefix" "evex")
6827 (set_attr "mode" "<sseinsnmode>")])
6829 (define_insn "vec_extract_hi_<mode><mask_name>"
6830 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6831 (vec_select:<ssehalfvecmode>
6832 (match_operand:V8FI 1 "register_operand" "v")
6833 (parallel [(const_int 4) (const_int 5)
6834 (const_int 6) (const_int 7)])))]
6836 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6837 [(set_attr "type" "sselog1")
6838 (set_attr "prefix_extra" "1")
6839 (set_attr "length_immediate" "1")
6840 (set_attr "prefix" "evex")
6841 (set_attr "mode" "<sseinsnmode>")])
6843 (define_insn "vec_extract_hi_<mode>_maskm"
6844 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6845 (vec_merge:<ssehalfvecmode>
6846 (vec_select:<ssehalfvecmode>
6847 (match_operand:V16FI 1 "register_operand" "v")
6848 (parallel [(const_int 8) (const_int 9)
6849 (const_int 10) (const_int 11)
6850 (const_int 12) (const_int 13)
6851 (const_int 14) (const_int 15)]))
6852 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6853 (match_operand:QI 3 "register_operand" "k")))]
6855 && rtx_equal_p (operands[2], operands[0])"
6856 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6857 [(set_attr "type" "sselog1")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "length_immediate" "1")
6860 (set_attr "prefix" "evex")
6861 (set_attr "mode" "<sseinsnmode>")])
6863 (define_insn "vec_extract_hi_<mode><mask_name>"
6864 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6865 (vec_select:<ssehalfvecmode>
6866 (match_operand:V16FI 1 "register_operand" "v,v")
6867 (parallel [(const_int 8) (const_int 9)
6868 (const_int 10) (const_int 11)
6869 (const_int 12) (const_int 13)
6870 (const_int 14) (const_int 15)])))]
6871 "TARGET_AVX512F && <mask_avx512dq_condition>"
6873 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6874 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6875 [(set_attr "type" "sselog1")
6876 (set_attr "prefix_extra" "1")
6877 (set_attr "isa" "avx512dq,noavx512dq")
6878 (set_attr "length_immediate" "1")
6879 (set_attr "prefix" "evex")
6880 (set_attr "mode" "<sseinsnmode>")])
6882 (define_expand "avx512vl_vextractf128<mode>"
6883 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6884 (match_operand:VI48F_256 1 "register_operand")
6885 (match_operand:SI 2 "const_0_to_1_operand")
6886 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6887 (match_operand:QI 4 "register_operand")]
6888 "TARGET_AVX512DQ && TARGET_AVX512VL"
6890 rtx (*insn)(rtx, rtx, rtx, rtx);
6892 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6893 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6895 switch (INTVAL (operands[2]))
6898 insn = gen_vec_extract_lo_<mode>_mask;
6901 insn = gen_vec_extract_hi_<mode>_mask;
6907 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6911 (define_expand "avx_vextractf128<mode>"
6912 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6913 (match_operand:V_256 1 "register_operand")
6914 (match_operand:SI 2 "const_0_to_1_operand")]
6917 rtx (*insn)(rtx, rtx);
6919 switch (INTVAL (operands[2]))
6922 insn = gen_vec_extract_lo_<mode>;
6925 insn = gen_vec_extract_hi_<mode>;
6931 emit_insn (insn (operands[0], operands[1]));
6935 (define_insn "vec_extract_lo_<mode><mask_name>"
6936 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6937 (vec_select:<ssehalfvecmode>
6938 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6939 (parallel [(const_int 0) (const_int 1)
6940 (const_int 2) (const_int 3)
6941 (const_int 4) (const_int 5)
6942 (const_int 6) (const_int 7)])))]
6944 && <mask_mode512bit_condition>
6945 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6948 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6954 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6955 (vec_select:<ssehalfvecmode>
6956 (match_operand:V16FI 1 "nonimmediate_operand")
6957 (parallel [(const_int 0) (const_int 1)
6958 (const_int 2) (const_int 3)
6959 (const_int 4) (const_int 5)
6960 (const_int 6) (const_int 7)])))]
6961 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6962 && reload_completed"
6965 rtx op1 = operands[1];
6967 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6969 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6970 emit_move_insn (operands[0], op1);
6974 (define_insn "vec_extract_lo_<mode><mask_name>"
6975 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6976 (vec_select:<ssehalfvecmode>
6977 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6978 (parallel [(const_int 0) (const_int 1)])))]
6980 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
6981 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6984 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6988 [(set_attr "type" "sselog")
6989 (set_attr "prefix_extra" "1")
6990 (set_attr "length_immediate" "1")
6991 (set_attr "memory" "none,store")
6992 (set_attr "prefix" "evex")
6993 (set_attr "mode" "XI")])
6996 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6997 (vec_select:<ssehalfvecmode>
6998 (match_operand:VI8F_256 1 "nonimmediate_operand")
6999 (parallel [(const_int 0) (const_int 1)])))]
7000 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7001 && reload_completed"
7004 rtx op1 = operands[1];
7006 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7008 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7009 emit_move_insn (operands[0], op1);
7013 (define_insn "vec_extract_hi_<mode><mask_name>"
7014 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7015 (vec_select:<ssehalfvecmode>
7016 (match_operand:VI8F_256 1 "register_operand" "v,v")
7017 (parallel [(const_int 2) (const_int 3)])))]
7020 if (TARGET_AVX512DQ && TARGET_AVX512VL)
7021 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7023 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7025 [(set_attr "type" "sselog")
7026 (set_attr "prefix_extra" "1")
7027 (set_attr "length_immediate" "1")
7028 (set_attr "memory" "none,store")
7029 (set_attr "prefix" "vex")
7030 (set_attr "mode" "<sseinsnmode>")])
7033 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7034 (vec_select:<ssehalfvecmode>
7035 (match_operand:VI4F_256 1 "nonimmediate_operand")
7036 (parallel [(const_int 0) (const_int 1)
7037 (const_int 2) (const_int 3)])))]
7038 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7041 rtx op1 = operands[1];
7043 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7045 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7046 emit_move_insn (operands[0], op1);
7051 (define_insn "vec_extract_lo_<mode><mask_name>"
7052 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7053 (vec_select:<ssehalfvecmode>
7054 (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
7055 (parallel [(const_int 0) (const_int 1)
7056 (const_int 2) (const_int 3)])))]
7057 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7060 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7064 [(set_attr "type" "sselog1")
7065 (set_attr "prefix_extra" "1")
7066 (set_attr "length_immediate" "1")
7067 (set_attr "prefix" "evex")
7068 (set_attr "mode" "<sseinsnmode>")])
7070 (define_insn "vec_extract_lo_<mode>_maskm"
7071 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7072 (vec_merge:<ssehalfvecmode>
7073 (vec_select:<ssehalfvecmode>
7074 (match_operand:VI4F_256 1 "register_operand" "v")
7075 (parallel [(const_int 0) (const_int 1)
7076 (const_int 2) (const_int 3)]))
7077 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7078 (match_operand:QI 3 "register_operand" "k")))]
7079 "TARGET_AVX512VL && TARGET_AVX512F
7080 && rtx_equal_p (operands[2], operands[0])"
7081 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7082 [(set_attr "type" "sselog1")
7083 (set_attr "prefix_extra" "1")
7084 (set_attr "length_immediate" "1")
7085 (set_attr "prefix" "evex")
7086 (set_attr "mode" "<sseinsnmode>")])
7088 (define_insn "vec_extract_hi_<mode>_maskm"
7089 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7090 (vec_merge:<ssehalfvecmode>
7091 (vec_select:<ssehalfvecmode>
7092 (match_operand:VI4F_256 1 "register_operand" "v")
7093 (parallel [(const_int 4) (const_int 5)
7094 (const_int 6) (const_int 7)]))
7095 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7096 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7097 "TARGET_AVX512F && TARGET_AVX512VL
7098 && rtx_equal_p (operands[2], operands[0])"
7099 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7100 [(set_attr "type" "sselog1")
7101 (set_attr "prefix_extra" "1")
7102 (set_attr "length_immediate" "1")
7103 (set_attr "prefix" "evex")
7104 (set_attr "mode" "<sseinsnmode>")])
7106 (define_insn "vec_extract_hi_<mode><mask_name>"
7107 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7108 (vec_select:<ssehalfvecmode>
7109 (match_operand:VI4F_256 1 "register_operand" "v")
7110 (parallel [(const_int 4) (const_int 5)
7111 (const_int 6) (const_int 7)])))]
7112 "TARGET_AVX && <mask_avx512vl_condition>"
7114 if (TARGET_AVX512VL)
7115 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7117 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7119 [(set_attr "type" "sselog1")
7120 (set_attr "prefix_extra" "1")
7121 (set_attr "length_immediate" "1")
7122 (set (attr "prefix")
7124 (match_test "TARGET_AVX512VL")
7125 (const_string "evex")
7126 (const_string "vex")))
7127 (set_attr "mode" "<sseinsnmode>")])
7129 (define_insn_and_split "vec_extract_lo_v32hi"
7130 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7132 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7133 (parallel [(const_int 0) (const_int 1)
7134 (const_int 2) (const_int 3)
7135 (const_int 4) (const_int 5)
7136 (const_int 6) (const_int 7)
7137 (const_int 8) (const_int 9)
7138 (const_int 10) (const_int 11)
7139 (const_int 12) (const_int 13)
7140 (const_int 14) (const_int 15)])))]
7141 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7143 "&& reload_completed"
7144 [(set (match_dup 0) (match_dup 1))]
7146 if (REG_P (operands[1]))
7147 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7149 operands[1] = adjust_address (operands[1], V16HImode, 0);
7152 (define_insn "vec_extract_hi_v32hi"
7153 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7155 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
7156 (parallel [(const_int 16) (const_int 17)
7157 (const_int 18) (const_int 19)
7158 (const_int 20) (const_int 21)
7159 (const_int 22) (const_int 23)
7160 (const_int 24) (const_int 25)
7161 (const_int 26) (const_int 27)
7162 (const_int 28) (const_int 29)
7163 (const_int 30) (const_int 31)])))]
7165 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7166 [(set_attr "type" "sselog")
7167 (set_attr "prefix_extra" "1")
7168 (set_attr "length_immediate" "1")
7169 (set_attr "memory" "none,store")
7170 (set_attr "prefix" "evex")
7171 (set_attr "mode" "XI")])
7173 (define_insn_and_split "vec_extract_lo_v16hi"
7174 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7176 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7177 (parallel [(const_int 0) (const_int 1)
7178 (const_int 2) (const_int 3)
7179 (const_int 4) (const_int 5)
7180 (const_int 6) (const_int 7)])))]
7181 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7183 "&& reload_completed"
7184 [(set (match_dup 0) (match_dup 1))]
7186 if (REG_P (operands[1]))
7187 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7189 operands[1] = adjust_address (operands[1], V8HImode, 0);
7192 (define_insn "vec_extract_hi_v16hi"
7193 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7195 (match_operand:V16HI 1 "register_operand" "x,x")
7196 (parallel [(const_int 8) (const_int 9)
7197 (const_int 10) (const_int 11)
7198 (const_int 12) (const_int 13)
7199 (const_int 14) (const_int 15)])))]
7201 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7202 [(set_attr "type" "sselog")
7203 (set_attr "prefix_extra" "1")
7204 (set_attr "length_immediate" "1")
7205 (set_attr "memory" "none,store")
7206 (set_attr "prefix" "vex")
7207 (set_attr "mode" "OI")])
7209 (define_insn_and_split "vec_extract_lo_v64qi"
7210 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7212 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7213 (parallel [(const_int 0) (const_int 1)
7214 (const_int 2) (const_int 3)
7215 (const_int 4) (const_int 5)
7216 (const_int 6) (const_int 7)
7217 (const_int 8) (const_int 9)
7218 (const_int 10) (const_int 11)
7219 (const_int 12) (const_int 13)
7220 (const_int 14) (const_int 15)
7221 (const_int 16) (const_int 17)
7222 (const_int 18) (const_int 19)
7223 (const_int 20) (const_int 21)
7224 (const_int 22) (const_int 23)
7225 (const_int 24) (const_int 25)
7226 (const_int 26) (const_int 27)
7227 (const_int 28) (const_int 29)
7228 (const_int 30) (const_int 31)])))]
7229 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7231 "&& reload_completed"
7232 [(set (match_dup 0) (match_dup 1))]
7234 if (REG_P (operands[1]))
7235 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7237 operands[1] = adjust_address (operands[1], V32QImode, 0);
7240 (define_insn "vec_extract_hi_v64qi"
7241 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7243 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
7244 (parallel [(const_int 32) (const_int 33)
7245 (const_int 34) (const_int 35)
7246 (const_int 36) (const_int 37)
7247 (const_int 38) (const_int 39)
7248 (const_int 40) (const_int 41)
7249 (const_int 42) (const_int 43)
7250 (const_int 44) (const_int 45)
7251 (const_int 46) (const_int 47)
7252 (const_int 48) (const_int 49)
7253 (const_int 50) (const_int 51)
7254 (const_int 52) (const_int 53)
7255 (const_int 54) (const_int 55)
7256 (const_int 56) (const_int 57)
7257 (const_int 58) (const_int 59)
7258 (const_int 60) (const_int 61)
7259 (const_int 62) (const_int 63)])))]
7261 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7262 [(set_attr "type" "sselog")
7263 (set_attr "prefix_extra" "1")
7264 (set_attr "length_immediate" "1")
7265 (set_attr "memory" "none,store")
7266 (set_attr "prefix" "evex")
7267 (set_attr "mode" "XI")])
7269 (define_insn_and_split "vec_extract_lo_v32qi"
7270 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7272 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7273 (parallel [(const_int 0) (const_int 1)
7274 (const_int 2) (const_int 3)
7275 (const_int 4) (const_int 5)
7276 (const_int 6) (const_int 7)
7277 (const_int 8) (const_int 9)
7278 (const_int 10) (const_int 11)
7279 (const_int 12) (const_int 13)
7280 (const_int 14) (const_int 15)])))]
7281 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7283 "&& reload_completed"
7284 [(set (match_dup 0) (match_dup 1))]
7286 if (REG_P (operands[1]))
7287 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7289 operands[1] = adjust_address (operands[1], V16QImode, 0);
7292 (define_insn "vec_extract_hi_v32qi"
7293 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7295 (match_operand:V32QI 1 "register_operand" "x,x")
7296 (parallel [(const_int 16) (const_int 17)
7297 (const_int 18) (const_int 19)
7298 (const_int 20) (const_int 21)
7299 (const_int 22) (const_int 23)
7300 (const_int 24) (const_int 25)
7301 (const_int 26) (const_int 27)
7302 (const_int 28) (const_int 29)
7303 (const_int 30) (const_int 31)])))]
7305 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7306 [(set_attr "type" "sselog")
7307 (set_attr "prefix_extra" "1")
7308 (set_attr "length_immediate" "1")
7309 (set_attr "memory" "none,store")
7310 (set_attr "prefix" "vex")
7311 (set_attr "mode" "OI")])
7313 ;; Modes handled by vec_extract patterns.
7314 (define_mode_iterator VEC_EXTRACT_MODE
7315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7316 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7317 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7318 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7319 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7320 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7322 (define_expand "vec_extract<mode>"
7323 [(match_operand:<ssescalarmode> 0 "register_operand")
7324 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7325 (match_operand 2 "const_int_operand")]
7328 ix86_expand_vector_extract (false, operands[0], operands[1],
7329 INTVAL (operands[2]));
7333 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7335 ;; Parallel double-precision floating point element swizzling
7337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7339 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7340 [(set (match_operand:V8DF 0 "register_operand" "=v")
7343 (match_operand:V8DF 1 "nonimmediate_operand" "v")
7344 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7345 (parallel [(const_int 1) (const_int 9)
7346 (const_int 3) (const_int 11)
7347 (const_int 5) (const_int 13)
7348 (const_int 7) (const_int 15)])))]
7350 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7351 [(set_attr "type" "sselog")
7352 (set_attr "prefix" "evex")
7353 (set_attr "mode" "V8DF")])
7355 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7356 (define_insn "avx_unpckhpd256<mask_name>"
7357 [(set (match_operand:V4DF 0 "register_operand" "=v")
7360 (match_operand:V4DF 1 "register_operand" "v")
7361 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7362 (parallel [(const_int 1) (const_int 5)
7363 (const_int 3) (const_int 7)])))]
7364 "TARGET_AVX && <mask_avx512vl_condition>"
7365 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7366 [(set_attr "type" "sselog")
7367 (set_attr "prefix" "vex")
7368 (set_attr "mode" "V4DF")])
7370 (define_expand "vec_interleave_highv4df"
7374 (match_operand:V4DF 1 "register_operand" "x")
7375 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7376 (parallel [(const_int 0) (const_int 4)
7377 (const_int 2) (const_int 6)])))
7383 (parallel [(const_int 1) (const_int 5)
7384 (const_int 3) (const_int 7)])))
7385 (set (match_operand:V4DF 0 "register_operand")
7390 (parallel [(const_int 2) (const_int 3)
7391 (const_int 6) (const_int 7)])))]
7394 operands[3] = gen_reg_rtx (V4DFmode);
7395 operands[4] = gen_reg_rtx (V4DFmode);
7399 (define_insn "avx512vl_unpckhpd128_mask"
7400 [(set (match_operand:V2DF 0 "register_operand" "=v")
7404 (match_operand:V2DF 1 "register_operand" "v")
7405 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7406 (parallel [(const_int 1) (const_int 3)]))
7407 (match_operand:V2DF 3 "vector_move_operand" "0C")
7408 (match_operand:QI 4 "register_operand" "Yk")))]
7410 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7411 [(set_attr "type" "sselog")
7412 (set_attr "prefix" "evex")
7413 (set_attr "mode" "V2DF")])
7415 (define_expand "vec_interleave_highv2df"
7416 [(set (match_operand:V2DF 0 "register_operand")
7419 (match_operand:V2DF 1 "nonimmediate_operand")
7420 (match_operand:V2DF 2 "nonimmediate_operand"))
7421 (parallel [(const_int 1)
7425 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7426 operands[2] = force_reg (V2DFmode, operands[2]);
7429 (define_insn "*vec_interleave_highv2df"
7430 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7433 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7434 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7435 (parallel [(const_int 1)
7437 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7439 unpckhpd\t{%2, %0|%0, %2}
7440 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7441 %vmovddup\t{%H1, %0|%0, %H1}
7442 movlpd\t{%H1, %0|%0, %H1}
7443 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7444 %vmovhpd\t{%1, %0|%q0, %1}"
7445 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7446 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7447 (set_attr "ssememalign" "64")
7448 (set_attr "prefix_data16" "*,*,*,1,*,1")
7449 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7450 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7452 (define_expand "avx512f_movddup512<mask_name>"
7453 [(set (match_operand:V8DF 0 "register_operand")
7456 (match_operand:V8DF 1 "nonimmediate_operand")
7458 (parallel [(const_int 0) (const_int 8)
7459 (const_int 2) (const_int 10)
7460 (const_int 4) (const_int 12)
7461 (const_int 6) (const_int 14)])))]
7464 (define_expand "avx512f_unpcklpd512<mask_name>"
7465 [(set (match_operand:V8DF 0 "register_operand")
7468 (match_operand:V8DF 1 "register_operand")
7469 (match_operand:V8DF 2 "nonimmediate_operand"))
7470 (parallel [(const_int 0) (const_int 8)
7471 (const_int 2) (const_int 10)
7472 (const_int 4) (const_int 12)
7473 (const_int 6) (const_int 14)])))]
7476 (define_insn "*avx512f_unpcklpd512<mask_name>"
7477 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7480 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7481 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7482 (parallel [(const_int 0) (const_int 8)
7483 (const_int 2) (const_int 10)
7484 (const_int 4) (const_int 12)
7485 (const_int 6) (const_int 14)])))]
7488 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7489 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7490 [(set_attr "type" "sselog")
7491 (set_attr "prefix" "evex")
7492 (set_attr "mode" "V8DF")])
7494 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7495 (define_expand "avx_movddup256<mask_name>"
7496 [(set (match_operand:V4DF 0 "register_operand")
7499 (match_operand:V4DF 1 "nonimmediate_operand")
7501 (parallel [(const_int 0) (const_int 4)
7502 (const_int 2) (const_int 6)])))]
7503 "TARGET_AVX && <mask_avx512vl_condition>")
7505 (define_expand "avx_unpcklpd256<mask_name>"
7506 [(set (match_operand:V4DF 0 "register_operand")
7509 (match_operand:V4DF 1 "register_operand")
7510 (match_operand:V4DF 2 "nonimmediate_operand"))
7511 (parallel [(const_int 0) (const_int 4)
7512 (const_int 2) (const_int 6)])))]
7513 "TARGET_AVX && <mask_avx512vl_condition>")
7515 (define_insn "*avx_unpcklpd256<mask_name>"
7516 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7519 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7520 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7521 (parallel [(const_int 0) (const_int 4)
7522 (const_int 2) (const_int 6)])))]
7523 "TARGET_AVX && <mask_avx512vl_condition>"
7525 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7526 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7527 [(set_attr "type" "sselog")
7528 (set_attr "prefix" "vex")
7529 (set_attr "mode" "V4DF")])
7531 (define_expand "vec_interleave_lowv4df"
7535 (match_operand:V4DF 1 "register_operand" "x")
7536 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7537 (parallel [(const_int 0) (const_int 4)
7538 (const_int 2) (const_int 6)])))
7544 (parallel [(const_int 1) (const_int 5)
7545 (const_int 3) (const_int 7)])))
7546 (set (match_operand:V4DF 0 "register_operand")
7551 (parallel [(const_int 0) (const_int 1)
7552 (const_int 4) (const_int 5)])))]
7555 operands[3] = gen_reg_rtx (V4DFmode);
7556 operands[4] = gen_reg_rtx (V4DFmode);
7559 (define_insn "avx512vl_unpcklpd128_mask"
7560 [(set (match_operand:V2DF 0 "register_operand" "=v")
7564 (match_operand:V2DF 1 "register_operand" "v")
7565 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7566 (parallel [(const_int 0) (const_int 2)]))
7567 (match_operand:V2DF 3 "vector_move_operand" "0C")
7568 (match_operand:QI 4 "register_operand" "Yk")))]
7570 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7571 [(set_attr "type" "sselog")
7572 (set_attr "prefix" "evex")
7573 (set_attr "mode" "V2DF")])
7575 (define_expand "vec_interleave_lowv2df"
7576 [(set (match_operand:V2DF 0 "register_operand")
7579 (match_operand:V2DF 1 "nonimmediate_operand")
7580 (match_operand:V2DF 2 "nonimmediate_operand"))
7581 (parallel [(const_int 0)
7585 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7586 operands[1] = force_reg (V2DFmode, operands[1]);
7589 (define_insn "*vec_interleave_lowv2df"
7590 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7593 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7594 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7595 (parallel [(const_int 0)
7597 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7599 unpcklpd\t{%2, %0|%0, %2}
7600 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7601 %vmovddup\t{%1, %0|%0, %q1}
7602 movhpd\t{%2, %0|%0, %q2}
7603 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7604 %vmovlpd\t{%2, %H0|%H0, %2}"
7605 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7606 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7607 (set_attr "ssememalign" "64")
7608 (set_attr "prefix_data16" "*,*,*,1,*,1")
7609 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7610 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7613 [(set (match_operand:V2DF 0 "memory_operand")
7616 (match_operand:V2DF 1 "register_operand")
7618 (parallel [(const_int 0)
7620 "TARGET_SSE3 && reload_completed"
7623 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7624 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7625 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7630 [(set (match_operand:V2DF 0 "register_operand")
7633 (match_operand:V2DF 1 "memory_operand")
7635 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7636 (match_operand:SI 3 "const_int_operand")])))]
7637 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7638 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7640 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7643 (define_insn "avx512f_vmscalef<mode><round_name>"
7644 [(set (match_operand:VF_128 0 "register_operand" "=v")
7647 [(match_operand:VF_128 1 "register_operand" "v")
7648 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7653 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7654 [(set_attr "prefix" "evex")
7655 (set_attr "mode" "<ssescalarmode>")])
7657 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7658 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7660 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7661 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7664 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7665 [(set_attr "prefix" "evex")
7666 (set_attr "mode" "<MODE>")])
7668 (define_expand "<avx512>_vternlog<mode>_maskz"
7669 [(match_operand:VI48_AVX512VL 0 "register_operand")
7670 (match_operand:VI48_AVX512VL 1 "register_operand")
7671 (match_operand:VI48_AVX512VL 2 "register_operand")
7672 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7673 (match_operand:SI 4 "const_0_to_255_operand")
7674 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7677 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7678 operands[0], operands[1], operands[2], operands[3],
7679 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7683 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7684 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7685 (unspec:VI48_AVX512VL
7686 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7687 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7688 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7689 (match_operand:SI 4 "const_0_to_255_operand")]
7692 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7693 [(set_attr "type" "sselog")
7694 (set_attr "prefix" "evex")
7695 (set_attr "mode" "<sseinsnmode>")])
7697 (define_insn "<avx512>_vternlog<mode>_mask"
7698 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7699 (vec_merge:VI48_AVX512VL
7700 (unspec:VI48_AVX512VL
7701 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7702 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7703 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7704 (match_operand:SI 4 "const_0_to_255_operand")]
7707 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7709 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7710 [(set_attr "type" "sselog")
7711 (set_attr "prefix" "evex")
7712 (set_attr "mode" "<sseinsnmode>")])
7714 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7715 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7716 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7719 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7720 [(set_attr "prefix" "evex")
7721 (set_attr "mode" "<MODE>")])
7723 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7724 [(set (match_operand:VF_128 0 "register_operand" "=v")
7727 [(match_operand:VF_128 1 "register_operand" "v")
7728 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7733 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7734 [(set_attr "prefix" "evex")
7735 (set_attr "mode" "<ssescalarmode>")])
7737 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7738 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7739 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7740 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7741 (match_operand:SI 3 "const_0_to_255_operand")]
7744 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7745 [(set_attr "prefix" "evex")
7746 (set_attr "mode" "<sseinsnmode>")])
7748 (define_expand "avx512f_shufps512_mask"
7749 [(match_operand:V16SF 0 "register_operand")
7750 (match_operand:V16SF 1 "register_operand")
7751 (match_operand:V16SF 2 "nonimmediate_operand")
7752 (match_operand:SI 3 "const_0_to_255_operand")
7753 (match_operand:V16SF 4 "register_operand")
7754 (match_operand:HI 5 "register_operand")]
7757 int mask = INTVAL (operands[3]);
7758 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7759 GEN_INT ((mask >> 0) & 3),
7760 GEN_INT ((mask >> 2) & 3),
7761 GEN_INT (((mask >> 4) & 3) + 16),
7762 GEN_INT (((mask >> 6) & 3) + 16),
7763 GEN_INT (((mask >> 0) & 3) + 4),
7764 GEN_INT (((mask >> 2) & 3) + 4),
7765 GEN_INT (((mask >> 4) & 3) + 20),
7766 GEN_INT (((mask >> 6) & 3) + 20),
7767 GEN_INT (((mask >> 0) & 3) + 8),
7768 GEN_INT (((mask >> 2) & 3) + 8),
7769 GEN_INT (((mask >> 4) & 3) + 24),
7770 GEN_INT (((mask >> 6) & 3) + 24),
7771 GEN_INT (((mask >> 0) & 3) + 12),
7772 GEN_INT (((mask >> 2) & 3) + 12),
7773 GEN_INT (((mask >> 4) & 3) + 28),
7774 GEN_INT (((mask >> 6) & 3) + 28),
7775 operands[4], operands[5]));
7780 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7781 [(match_operand:VF_AVX512VL 0 "register_operand")
7782 (match_operand:VF_AVX512VL 1 "register_operand")
7783 (match_operand:VF_AVX512VL 2 "register_operand")
7784 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7785 (match_operand:SI 4 "const_0_to_255_operand")
7786 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7789 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7790 operands[0], operands[1], operands[2], operands[3],
7791 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7792 <round_saeonly_expand_operand6>));
7796 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7797 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7799 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7800 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7801 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7802 (match_operand:SI 4 "const_0_to_255_operand")]
7805 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7806 [(set_attr "prefix" "evex")
7807 (set_attr "mode" "<MODE>")])
7809 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7810 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7811 (vec_merge:VF_AVX512VL
7813 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7814 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7815 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7816 (match_operand:SI 4 "const_0_to_255_operand")]
7819 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7821 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7822 [(set_attr "prefix" "evex")
7823 (set_attr "mode" "<MODE>")])
7825 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7826 [(match_operand:VF_128 0 "register_operand")
7827 (match_operand:VF_128 1 "register_operand")
7828 (match_operand:VF_128 2 "register_operand")
7829 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7830 (match_operand:SI 4 "const_0_to_255_operand")
7831 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7834 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7835 operands[0], operands[1], operands[2], operands[3],
7836 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7837 <round_saeonly_expand_operand6>));
7841 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7842 [(set (match_operand:VF_128 0 "register_operand" "=v")
7845 [(match_operand:VF_128 1 "register_operand" "0")
7846 (match_operand:VF_128 2 "register_operand" "v")
7847 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7848 (match_operand:SI 4 "const_0_to_255_operand")]
7853 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7854 [(set_attr "prefix" "evex")
7855 (set_attr "mode" "<ssescalarmode>")])
7857 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7858 [(set (match_operand:VF_128 0 "register_operand" "=v")
7862 [(match_operand:VF_128 1 "register_operand" "0")
7863 (match_operand:VF_128 2 "register_operand" "v")
7864 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7865 (match_operand:SI 4 "const_0_to_255_operand")]
7870 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7872 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7873 [(set_attr "prefix" "evex")
7874 (set_attr "mode" "<ssescalarmode>")])
7876 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7877 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7879 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7880 (match_operand:SI 2 "const_0_to_255_operand")]
7883 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7884 [(set_attr "length_immediate" "1")
7885 (set_attr "prefix" "evex")
7886 (set_attr "mode" "<MODE>")])
7888 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7889 [(set (match_operand:VF_128 0 "register_operand" "=v")
7892 [(match_operand:VF_128 1 "register_operand" "v")
7893 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7894 (match_operand:SI 3 "const_0_to_255_operand")]
7899 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7900 [(set_attr "length_immediate" "1")
7901 (set_attr "prefix" "evex")
7902 (set_attr "mode" "<MODE>")])
7904 ;; One bit in mask selects 2 elements.
7905 (define_insn "avx512f_shufps512_1<mask_name>"
7906 [(set (match_operand:V16SF 0 "register_operand" "=v")
7909 (match_operand:V16SF 1 "register_operand" "v")
7910 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7911 (parallel [(match_operand 3 "const_0_to_3_operand")
7912 (match_operand 4 "const_0_to_3_operand")
7913 (match_operand 5 "const_16_to_19_operand")
7914 (match_operand 6 "const_16_to_19_operand")
7915 (match_operand 7 "const_4_to_7_operand")
7916 (match_operand 8 "const_4_to_7_operand")
7917 (match_operand 9 "const_20_to_23_operand")
7918 (match_operand 10 "const_20_to_23_operand")
7919 (match_operand 11 "const_8_to_11_operand")
7920 (match_operand 12 "const_8_to_11_operand")
7921 (match_operand 13 "const_24_to_27_operand")
7922 (match_operand 14 "const_24_to_27_operand")
7923 (match_operand 15 "const_12_to_15_operand")
7924 (match_operand 16 "const_12_to_15_operand")
7925 (match_operand 17 "const_28_to_31_operand")
7926 (match_operand 18 "const_28_to_31_operand")])))]
7928 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7929 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7930 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7931 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7932 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7933 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7934 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7935 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7936 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7937 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7938 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7939 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7942 mask = INTVAL (operands[3]);
7943 mask |= INTVAL (operands[4]) << 2;
7944 mask |= (INTVAL (operands[5]) - 16) << 4;
7945 mask |= (INTVAL (operands[6]) - 16) << 6;
7946 operands[3] = GEN_INT (mask);
7948 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7950 [(set_attr "type" "sselog")
7951 (set_attr "length_immediate" "1")
7952 (set_attr "prefix" "evex")
7953 (set_attr "mode" "V16SF")])
7955 (define_expand "avx512f_shufpd512_mask"
7956 [(match_operand:V8DF 0 "register_operand")
7957 (match_operand:V8DF 1 "register_operand")
7958 (match_operand:V8DF 2 "nonimmediate_operand")
7959 (match_operand:SI 3 "const_0_to_255_operand")
7960 (match_operand:V8DF 4 "register_operand")
7961 (match_operand:QI 5 "register_operand")]
7964 int mask = INTVAL (operands[3]);
7965 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7967 GEN_INT (mask & 2 ? 9 : 8),
7968 GEN_INT (mask & 4 ? 3 : 2),
7969 GEN_INT (mask & 8 ? 11 : 10),
7970 GEN_INT (mask & 16 ? 5 : 4),
7971 GEN_INT (mask & 32 ? 13 : 12),
7972 GEN_INT (mask & 64 ? 7 : 6),
7973 GEN_INT (mask & 128 ? 15 : 14),
7974 operands[4], operands[5]));
7978 (define_insn "avx512f_shufpd512_1<mask_name>"
7979 [(set (match_operand:V8DF 0 "register_operand" "=v")
7982 (match_operand:V8DF 1 "register_operand" "v")
7983 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7984 (parallel [(match_operand 3 "const_0_to_1_operand")
7985 (match_operand 4 "const_8_to_9_operand")
7986 (match_operand 5 "const_2_to_3_operand")
7987 (match_operand 6 "const_10_to_11_operand")
7988 (match_operand 7 "const_4_to_5_operand")
7989 (match_operand 8 "const_12_to_13_operand")
7990 (match_operand 9 "const_6_to_7_operand")
7991 (match_operand 10 "const_14_to_15_operand")])))]
7995 mask = INTVAL (operands[3]);
7996 mask |= (INTVAL (operands[4]) - 8) << 1;
7997 mask |= (INTVAL (operands[5]) - 2) << 2;
7998 mask |= (INTVAL (operands[6]) - 10) << 3;
7999 mask |= (INTVAL (operands[7]) - 4) << 4;
8000 mask |= (INTVAL (operands[8]) - 12) << 5;
8001 mask |= (INTVAL (operands[9]) - 6) << 6;
8002 mask |= (INTVAL (operands[10]) - 14) << 7;
8003 operands[3] = GEN_INT (mask);
8005 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8007 [(set_attr "type" "sselog")
8008 (set_attr "length_immediate" "1")
8009 (set_attr "prefix" "evex")
8010 (set_attr "mode" "V8DF")])
8012 (define_expand "avx_shufpd256<mask_expand4_name>"
8013 [(match_operand:V4DF 0 "register_operand")
8014 (match_operand:V4DF 1 "register_operand")
8015 (match_operand:V4DF 2 "nonimmediate_operand")
8016 (match_operand:SI 3 "const_int_operand")]
8019 int mask = INTVAL (operands[3]);
8020 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8024 GEN_INT (mask & 2 ? 5 : 4),
8025 GEN_INT (mask & 4 ? 3 : 2),
8026 GEN_INT (mask & 8 ? 7 : 6)
8027 <mask_expand4_args>));
8031 (define_insn "avx_shufpd256_1<mask_name>"
8032 [(set (match_operand:V4DF 0 "register_operand" "=v")
8035 (match_operand:V4DF 1 "register_operand" "v")
8036 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8037 (parallel [(match_operand 3 "const_0_to_1_operand")
8038 (match_operand 4 "const_4_to_5_operand")
8039 (match_operand 5 "const_2_to_3_operand")
8040 (match_operand 6 "const_6_to_7_operand")])))]
8041 "TARGET_AVX && <mask_avx512vl_condition>"
8044 mask = INTVAL (operands[3]);
8045 mask |= (INTVAL (operands[4]) - 4) << 1;
8046 mask |= (INTVAL (operands[5]) - 2) << 2;
8047 mask |= (INTVAL (operands[6]) - 6) << 3;
8048 operands[3] = GEN_INT (mask);
8050 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8052 [(set_attr "type" "sseshuf")
8053 (set_attr "length_immediate" "1")
8054 (set_attr "prefix" "vex")
8055 (set_attr "mode" "V4DF")])
8057 (define_expand "sse2_shufpd<mask_expand4_name>"
8058 [(match_operand:V2DF 0 "register_operand")
8059 (match_operand:V2DF 1 "register_operand")
8060 (match_operand:V2DF 2 "nonimmediate_operand")
8061 (match_operand:SI 3 "const_int_operand")]
8064 int mask = INTVAL (operands[3]);
8065 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8066 operands[2], GEN_INT (mask & 1),
8067 GEN_INT (mask & 2 ? 3 : 2)
8068 <mask_expand4_args>));
8072 (define_insn "sse2_shufpd_v2df_mask"
8073 [(set (match_operand:V2DF 0 "register_operand" "=v")
8077 (match_operand:V2DF 1 "register_operand" "v")
8078 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8079 (parallel [(match_operand 3 "const_0_to_1_operand")
8080 (match_operand 4 "const_2_to_3_operand")]))
8081 (match_operand:V2DF 5 "vector_move_operand" "0C")
8082 (match_operand:QI 6 "register_operand" "Yk")))]
8086 mask = INTVAL (operands[3]);
8087 mask |= (INTVAL (operands[4]) - 2) << 1;
8088 operands[3] = GEN_INT (mask);
8090 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8092 [(set_attr "type" "sseshuf")
8093 (set_attr "length_immediate" "1")
8094 (set_attr "prefix" "evex")
8095 (set_attr "mode" "V2DF")])
8097 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8098 (define_insn "avx2_interleave_highv4di<mask_name>"
8099 [(set (match_operand:V4DI 0 "register_operand" "=v")
8102 (match_operand:V4DI 1 "register_operand" "v")
8103 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8104 (parallel [(const_int 1)
8108 "TARGET_AVX2 && <mask_avx512vl_condition>"
8109 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8110 [(set_attr "type" "sselog")
8111 (set_attr "prefix" "vex")
8112 (set_attr "mode" "OI")])
8114 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8115 [(set (match_operand:V8DI 0 "register_operand" "=v")
8118 (match_operand:V8DI 1 "register_operand" "v")
8119 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8120 (parallel [(const_int 1) (const_int 9)
8121 (const_int 3) (const_int 11)
8122 (const_int 5) (const_int 13)
8123 (const_int 7) (const_int 15)])))]
8125 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8126 [(set_attr "type" "sselog")
8127 (set_attr "prefix" "evex")
8128 (set_attr "mode" "XI")])
8130 (define_insn "vec_interleave_highv2di<mask_name>"
8131 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8134 (match_operand:V2DI 1 "register_operand" "0,v")
8135 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8136 (parallel [(const_int 1)
8138 "TARGET_SSE2 && <mask_avx512vl_condition>"
8140 punpckhqdq\t{%2, %0|%0, %2}
8141 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8142 [(set_attr "isa" "noavx,avx")
8143 (set_attr "type" "sselog")
8144 (set_attr "prefix_data16" "1,*")
8145 (set_attr "prefix" "orig,<mask_prefix>")
8146 (set_attr "mode" "TI")])
8148 (define_insn "avx2_interleave_lowv4di<mask_name>"
8149 [(set (match_operand:V4DI 0 "register_operand" "=v")
8152 (match_operand:V4DI 1 "register_operand" "v")
8153 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8154 (parallel [(const_int 0)
8158 "TARGET_AVX2 && <mask_avx512vl_condition>"
8159 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8160 [(set_attr "type" "sselog")
8161 (set_attr "prefix" "vex")
8162 (set_attr "mode" "OI")])
8164 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8165 [(set (match_operand:V8DI 0 "register_operand" "=v")
8168 (match_operand:V8DI 1 "register_operand" "v")
8169 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8170 (parallel [(const_int 0) (const_int 8)
8171 (const_int 2) (const_int 10)
8172 (const_int 4) (const_int 12)
8173 (const_int 6) (const_int 14)])))]
8175 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8176 [(set_attr "type" "sselog")
8177 (set_attr "prefix" "evex")
8178 (set_attr "mode" "XI")])
8180 (define_insn "vec_interleave_lowv2di<mask_name>"
8181 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8184 (match_operand:V2DI 1 "register_operand" "0,v")
8185 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8186 (parallel [(const_int 0)
8188 "TARGET_SSE2 && <mask_avx512vl_condition>"
8190 punpcklqdq\t{%2, %0|%0, %2}
8191 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8192 [(set_attr "isa" "noavx,avx")
8193 (set_attr "type" "sselog")
8194 (set_attr "prefix_data16" "1,*")
8195 (set_attr "prefix" "orig,vex")
8196 (set_attr "mode" "TI")])
8198 (define_insn "sse2_shufpd_<mode>"
8199 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8200 (vec_select:VI8F_128
8201 (vec_concat:<ssedoublevecmode>
8202 (match_operand:VI8F_128 1 "register_operand" "0,x")
8203 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8204 (parallel [(match_operand 3 "const_0_to_1_operand")
8205 (match_operand 4 "const_2_to_3_operand")])))]
8209 mask = INTVAL (operands[3]);
8210 mask |= (INTVAL (operands[4]) - 2) << 1;
8211 operands[3] = GEN_INT (mask);
8213 switch (which_alternative)
8216 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8218 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8223 [(set_attr "isa" "noavx,avx")
8224 (set_attr "type" "sseshuf")
8225 (set_attr "length_immediate" "1")
8226 (set_attr "prefix" "orig,vex")
8227 (set_attr "mode" "V2DF")])
8229 ;; Avoid combining registers from different units in a single alternative,
8230 ;; see comment above inline_secondary_memory_needed function in i386.c
8231 (define_insn "sse2_storehpd"
8232 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8234 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8235 (parallel [(const_int 1)])))]
8236 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8238 %vmovhpd\t{%1, %0|%0, %1}
8240 vunpckhpd\t{%d1, %0|%0, %d1}
8244 [(set_attr "isa" "*,noavx,avx,*,*,*")
8245 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8246 (set (attr "prefix_data16")
8248 (and (eq_attr "alternative" "0")
8249 (not (match_test "TARGET_AVX")))
8251 (const_string "*")))
8252 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8253 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8256 [(set (match_operand:DF 0 "register_operand")
8258 (match_operand:V2DF 1 "memory_operand")
8259 (parallel [(const_int 1)])))]
8260 "TARGET_SSE2 && reload_completed"
8261 [(set (match_dup 0) (match_dup 1))]
8262 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8264 (define_insn "*vec_extractv2df_1_sse"
8265 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8267 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8268 (parallel [(const_int 1)])))]
8269 "!TARGET_SSE2 && TARGET_SSE
8270 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8272 movhps\t{%1, %0|%q0, %1}
8273 movhlps\t{%1, %0|%0, %1}
8274 movlps\t{%H1, %0|%0, %H1}"
8275 [(set_attr "type" "ssemov")
8276 (set_attr "ssememalign" "64")
8277 (set_attr "mode" "V2SF,V4SF,V2SF")])
8279 ;; Avoid combining registers from different units in a single alternative,
8280 ;; see comment above inline_secondary_memory_needed function in i386.c
8281 (define_insn "sse2_storelpd"
8282 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8284 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8285 (parallel [(const_int 0)])))]
8286 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8288 %vmovlpd\t{%1, %0|%0, %1}
8293 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8294 (set_attr "prefix_data16" "1,*,*,*,*")
8295 (set_attr "prefix" "maybe_vex")
8296 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8299 [(set (match_operand:DF 0 "register_operand")
8301 (match_operand:V2DF 1 "nonimmediate_operand")
8302 (parallel [(const_int 0)])))]
8303 "TARGET_SSE2 && reload_completed"
8304 [(set (match_dup 0) (match_dup 1))]
8306 if (REG_P (operands[1]))
8307 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8309 operands[1] = adjust_address (operands[1], DFmode, 0);
8312 (define_insn "*vec_extractv2df_0_sse"
8313 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8315 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8316 (parallel [(const_int 0)])))]
8317 "!TARGET_SSE2 && TARGET_SSE
8318 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8320 movlps\t{%1, %0|%0, %1}
8321 movaps\t{%1, %0|%0, %1}
8322 movlps\t{%1, %0|%0, %q1}"
8323 [(set_attr "type" "ssemov")
8324 (set_attr "mode" "V2SF,V4SF,V2SF")])
8326 (define_expand "sse2_loadhpd_exp"
8327 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8330 (match_operand:V2DF 1 "nonimmediate_operand")
8331 (parallel [(const_int 0)]))
8332 (match_operand:DF 2 "nonimmediate_operand")))]
8335 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8337 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8339 /* Fix up the destination if needed. */
8340 if (dst != operands[0])
8341 emit_move_insn (operands[0], dst);
8346 ;; Avoid combining registers from different units in a single alternative,
8347 ;; see comment above inline_secondary_memory_needed function in i386.c
8348 (define_insn "sse2_loadhpd"
8349 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8353 (match_operand:V2DF 1 "nonimmediate_operand"
8355 (parallel [(const_int 0)]))
8356 (match_operand:DF 2 "nonimmediate_operand"
8357 " m,m,x,x,x,*f,r")))]
8358 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8360 movhpd\t{%2, %0|%0, %2}
8361 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8362 unpcklpd\t{%2, %0|%0, %2}
8363 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8367 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8368 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8369 (set_attr "ssememalign" "64")
8370 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8371 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8372 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8375 [(set (match_operand:V2DF 0 "memory_operand")
8377 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8378 (match_operand:DF 1 "register_operand")))]
8379 "TARGET_SSE2 && reload_completed"
8380 [(set (match_dup 0) (match_dup 1))]
8381 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8383 (define_expand "sse2_loadlpd_exp"
8384 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8386 (match_operand:DF 2 "nonimmediate_operand")
8388 (match_operand:V2DF 1 "nonimmediate_operand")
8389 (parallel [(const_int 1)]))))]
8392 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8394 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8396 /* Fix up the destination if needed. */
8397 if (dst != operands[0])
8398 emit_move_insn (operands[0], dst);
8403 ;; Avoid combining registers from different units in a single alternative,
8404 ;; see comment above inline_secondary_memory_needed function in i386.c
8405 (define_insn "sse2_loadlpd"
8406 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8407 "=x,x,x,x,x,x,x,x,m,m ,m")
8409 (match_operand:DF 2 "nonimmediate_operand"
8410 " m,m,m,x,x,0,0,x,x,*f,r")
8412 (match_operand:V2DF 1 "vector_move_operand"
8413 " C,0,x,0,x,x,o,o,0,0 ,0")
8414 (parallel [(const_int 1)]))))]
8415 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8417 %vmovsd\t{%2, %0|%0, %2}
8418 movlpd\t{%2, %0|%0, %2}
8419 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8420 movsd\t{%2, %0|%0, %2}
8421 vmovsd\t{%2, %1, %0|%0, %1, %2}
8422 shufpd\t{$2, %1, %0|%0, %1, 2}
8423 movhpd\t{%H1, %0|%0, %H1}
8424 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8428 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8430 (cond [(eq_attr "alternative" "5")
8431 (const_string "sselog")
8432 (eq_attr "alternative" "9")
8433 (const_string "fmov")
8434 (eq_attr "alternative" "10")
8435 (const_string "imov")
8437 (const_string "ssemov")))
8438 (set_attr "ssememalign" "64")
8439 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8440 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8441 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8442 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8445 [(set (match_operand:V2DF 0 "memory_operand")
8447 (match_operand:DF 1 "register_operand")
8448 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8449 "TARGET_SSE2 && reload_completed"
8450 [(set (match_dup 0) (match_dup 1))]
8451 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8453 (define_insn "sse2_movsd"
8454 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8456 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8457 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8461 movsd\t{%2, %0|%0, %2}
8462 vmovsd\t{%2, %1, %0|%0, %1, %2}
8463 movlpd\t{%2, %0|%0, %q2}
8464 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8465 %vmovlpd\t{%2, %0|%q0, %2}
8466 shufpd\t{$2, %1, %0|%0, %1, 2}
8467 movhps\t{%H1, %0|%0, %H1}
8468 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8469 %vmovhps\t{%1, %H0|%H0, %1}"
8470 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8473 (eq_attr "alternative" "5")
8474 (const_string "sselog")
8475 (const_string "ssemov")))
8476 (set (attr "prefix_data16")
8478 (and (eq_attr "alternative" "2,4")
8479 (not (match_test "TARGET_AVX")))
8481 (const_string "*")))
8482 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8483 (set_attr "ssememalign" "64")
8484 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8485 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8487 (define_insn "vec_dupv2df<mask_name>"
8488 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8490 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8491 "TARGET_SSE2 && <mask_avx512vl_condition>"
8494 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8495 [(set_attr "isa" "noavx,sse3")
8496 (set_attr "type" "sselog1")
8497 (set_attr "prefix" "orig,maybe_vex")
8498 (set_attr "mode" "V2DF,DF")])
8500 (define_insn "*vec_concatv2df"
8501 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8503 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8504 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8507 unpcklpd\t{%2, %0|%0, %2}
8508 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8509 %vmovddup\t{%1, %0|%0, %1}
8510 movhpd\t{%2, %0|%0, %2}
8511 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8512 %vmovsd\t{%1, %0|%0, %1}
8513 movlhps\t{%2, %0|%0, %2}
8514 movhps\t{%2, %0|%0, %2}"
8515 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8518 (eq_attr "alternative" "0,1,2")
8519 (const_string "sselog")
8520 (const_string "ssemov")))
8521 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8522 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8523 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8525 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8527 ;; Parallel integer down-conversion operations
8529 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8531 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8532 (define_mode_attr pmov_src_mode
8533 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8534 (define_mode_attr pmov_src_lower
8535 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8536 (define_mode_attr pmov_suff_1
8537 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8539 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8540 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8541 (any_truncate:PMOV_DST_MODE_1
8542 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8544 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8545 [(set_attr "type" "ssemov")
8546 (set_attr "memory" "none,store")
8547 (set_attr "prefix" "evex")
8548 (set_attr "mode" "<sseinsnmode>")])
8550 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8551 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8552 (vec_merge:PMOV_DST_MODE_1
8553 (any_truncate:PMOV_DST_MODE_1
8554 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8555 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8556 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8558 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8559 [(set_attr "type" "ssemov")
8560 (set_attr "memory" "none,store")
8561 (set_attr "prefix" "evex")
8562 (set_attr "mode" "<sseinsnmode>")])
8564 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8565 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8566 (vec_merge:PMOV_DST_MODE_1
8567 (any_truncate:PMOV_DST_MODE_1
8568 (match_operand:<pmov_src_mode> 1 "register_operand"))
8570 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8573 (define_insn "*avx512bw_<code>v32hiv32qi2"
8574 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8576 (match_operand:V32HI 1 "register_operand" "v,v")))]
8578 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8579 [(set_attr "type" "ssemov")
8580 (set_attr "memory" "none,store")
8581 (set_attr "prefix" "evex")
8582 (set_attr "mode" "XI")])
8584 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8585 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8588 (match_operand:V32HI 1 "register_operand" "v,v"))
8589 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8590 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8592 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8593 [(set_attr "type" "ssemov")
8594 (set_attr "memory" "none,store")
8595 (set_attr "prefix" "evex")
8596 (set_attr "mode" "XI")])
8598 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8599 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8602 (match_operand:V32HI 1 "register_operand"))
8604 (match_operand:SI 2 "register_operand")))]
8607 (define_mode_iterator PMOV_DST_MODE_2
8608 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8609 (define_mode_attr pmov_suff_2
8610 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8612 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8613 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8614 (any_truncate:PMOV_DST_MODE_2
8615 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8617 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8618 [(set_attr "type" "ssemov")
8619 (set_attr "memory" "none,store")
8620 (set_attr "prefix" "evex")
8621 (set_attr "mode" "<sseinsnmode>")])
8623 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8624 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8625 (vec_merge:PMOV_DST_MODE_2
8626 (any_truncate:PMOV_DST_MODE_2
8627 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8628 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8629 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8631 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8632 [(set_attr "type" "ssemov")
8633 (set_attr "memory" "none,store")
8634 (set_attr "prefix" "evex")
8635 (set_attr "mode" "<sseinsnmode>")])
8637 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8638 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8639 (vec_merge:PMOV_DST_MODE_2
8640 (any_truncate:PMOV_DST_MODE_2
8641 (match_operand:<ssedoublemode> 1 "register_operand"))
8643 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8646 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8647 (define_mode_attr pmov_dst_3
8648 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8649 (define_mode_attr pmov_dst_zeroed_3
8650 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8651 (define_mode_attr pmov_suff_3
8652 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8654 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8655 [(set (match_operand:V16QI 0 "register_operand" "=v")
8657 (any_truncate:<pmov_dst_3>
8658 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8659 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8661 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8662 [(set_attr "type" "ssemov")
8663 (set_attr "prefix" "evex")
8664 (set_attr "mode" "TI")])
8666 (define_insn "*avx512vl_<code>v2div2qi2_store"
8667 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8670 (match_operand:V2DI 1 "register_operand" "v"))
8673 (parallel [(const_int 2) (const_int 3)
8674 (const_int 4) (const_int 5)
8675 (const_int 6) (const_int 7)
8676 (const_int 8) (const_int 9)
8677 (const_int 10) (const_int 11)
8678 (const_int 12) (const_int 13)
8679 (const_int 14) (const_int 15)]))))]
8681 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8682 [(set_attr "type" "ssemov")
8683 (set_attr "memory" "store")
8684 (set_attr "prefix" "evex")
8685 (set_attr "mode" "TI")])
8687 (define_insn "avx512vl_<code>v2div2qi2_mask"
8688 [(set (match_operand:V16QI 0 "register_operand" "=v")
8692 (match_operand:V2DI 1 "register_operand" "v"))
8694 (match_operand:V16QI 2 "vector_move_operand" "0C")
8695 (parallel [(const_int 0) (const_int 1)]))
8696 (match_operand:QI 3 "register_operand" "Yk"))
8697 (const_vector:V14QI [(const_int 0) (const_int 0)
8698 (const_int 0) (const_int 0)
8699 (const_int 0) (const_int 0)
8700 (const_int 0) (const_int 0)
8701 (const_int 0) (const_int 0)
8702 (const_int 0) (const_int 0)
8703 (const_int 0) (const_int 0)])))]
8705 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8706 [(set_attr "type" "ssemov")
8707 (set_attr "prefix" "evex")
8708 (set_attr "mode" "TI")])
8710 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8711 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8715 (match_operand:V2DI 1 "register_operand" "v"))
8718 (parallel [(const_int 0) (const_int 1)]))
8719 (match_operand:QI 2 "register_operand" "Yk"))
8722 (parallel [(const_int 2) (const_int 3)
8723 (const_int 4) (const_int 5)
8724 (const_int 6) (const_int 7)
8725 (const_int 8) (const_int 9)
8726 (const_int 10) (const_int 11)
8727 (const_int 12) (const_int 13)
8728 (const_int 14) (const_int 15)]))))]
8730 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8731 [(set_attr "type" "ssemov")
8732 (set_attr "memory" "store")
8733 (set_attr "prefix" "evex")
8734 (set_attr "mode" "TI")])
8736 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8737 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8740 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8743 (parallel [(const_int 4) (const_int 5)
8744 (const_int 6) (const_int 7)
8745 (const_int 8) (const_int 9)
8746 (const_int 10) (const_int 11)
8747 (const_int 12) (const_int 13)
8748 (const_int 14) (const_int 15)]))))]
8750 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8751 [(set_attr "type" "ssemov")
8752 (set_attr "memory" "store")
8753 (set_attr "prefix" "evex")
8754 (set_attr "mode" "TI")])
8756 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8757 [(set (match_operand:V16QI 0 "register_operand" "=v")
8761 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8763 (match_operand:V16QI 2 "vector_move_operand" "0C")
8764 (parallel [(const_int 0) (const_int 1)
8765 (const_int 2) (const_int 3)]))
8766 (match_operand:QI 3 "register_operand" "Yk"))
8767 (const_vector:V12QI [(const_int 0) (const_int 0)
8768 (const_int 0) (const_int 0)
8769 (const_int 0) (const_int 0)
8770 (const_int 0) (const_int 0)
8771 (const_int 0) (const_int 0)
8772 (const_int 0) (const_int 0)])))]
8774 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8775 [(set_attr "type" "ssemov")
8776 (set_attr "prefix" "evex")
8777 (set_attr "mode" "TI")])
8779 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8780 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8784 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8787 (parallel [(const_int 0) (const_int 1)
8788 (const_int 2) (const_int 3)]))
8789 (match_operand:QI 2 "register_operand" "Yk"))
8792 (parallel [(const_int 4) (const_int 5)
8793 (const_int 6) (const_int 7)
8794 (const_int 8) (const_int 9)
8795 (const_int 10) (const_int 11)
8796 (const_int 12) (const_int 13)
8797 (const_int 14) (const_int 15)]))))]
8799 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8800 [(set_attr "type" "ssemov")
8801 (set_attr "memory" "store")
8802 (set_attr "prefix" "evex")
8803 (set_attr "mode" "TI")])
8805 (define_mode_iterator VI2_128_BW_4_256
8806 [(V8HI "TARGET_AVX512BW") V8SI])
8808 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8809 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8812 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8815 (parallel [(const_int 8) (const_int 9)
8816 (const_int 10) (const_int 11)
8817 (const_int 12) (const_int 13)
8818 (const_int 14) (const_int 15)]))))]
8820 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8821 [(set_attr "type" "ssemov")
8822 (set_attr "memory" "store")
8823 (set_attr "prefix" "evex")
8824 (set_attr "mode" "TI")])
8826 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8827 [(set (match_operand:V16QI 0 "register_operand" "=v")
8831 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8833 (match_operand:V16QI 2 "vector_move_operand" "0C")
8834 (parallel [(const_int 0) (const_int 1)
8835 (const_int 2) (const_int 3)
8836 (const_int 4) (const_int 5)
8837 (const_int 6) (const_int 7)]))
8838 (match_operand:QI 3 "register_operand" "Yk"))
8839 (const_vector:V8QI [(const_int 0) (const_int 0)
8840 (const_int 0) (const_int 0)
8841 (const_int 0) (const_int 0)
8842 (const_int 0) (const_int 0)])))]
8844 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8845 [(set_attr "type" "ssemov")
8846 (set_attr "prefix" "evex")
8847 (set_attr "mode" "TI")])
8849 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8850 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8854 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8857 (parallel [(const_int 0) (const_int 1)
8858 (const_int 2) (const_int 3)
8859 (const_int 4) (const_int 5)
8860 (const_int 6) (const_int 7)]))
8861 (match_operand:QI 2 "register_operand" "Yk"))
8864 (parallel [(const_int 8) (const_int 9)
8865 (const_int 10) (const_int 11)
8866 (const_int 12) (const_int 13)
8867 (const_int 14) (const_int 15)]))))]
8869 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8870 [(set_attr "type" "ssemov")
8871 (set_attr "memory" "store")
8872 (set_attr "prefix" "evex")
8873 (set_attr "mode" "TI")])
8875 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8876 (define_mode_attr pmov_dst_4
8877 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8878 (define_mode_attr pmov_dst_zeroed_4
8879 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8880 (define_mode_attr pmov_suff_4
8881 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8883 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8884 [(set (match_operand:V8HI 0 "register_operand" "=v")
8886 (any_truncate:<pmov_dst_4>
8887 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8888 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8890 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8891 [(set_attr "type" "ssemov")
8892 (set_attr "prefix" "evex")
8893 (set_attr "mode" "TI")])
8895 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8896 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8899 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8902 (parallel [(const_int 4) (const_int 5)
8903 (const_int 6) (const_int 7)]))))]
8905 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8906 [(set_attr "type" "ssemov")
8907 (set_attr "memory" "store")
8908 (set_attr "prefix" "evex")
8909 (set_attr "mode" "TI")])
8911 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8912 [(set (match_operand:V8HI 0 "register_operand" "=v")
8916 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8918 (match_operand:V8HI 2 "vector_move_operand" "0C")
8919 (parallel [(const_int 0) (const_int 1)
8920 (const_int 2) (const_int 3)]))
8921 (match_operand:QI 3 "register_operand" "Yk"))
8922 (const_vector:V4HI [(const_int 0) (const_int 0)
8923 (const_int 0) (const_int 0)])))]
8925 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8926 [(set_attr "type" "ssemov")
8927 (set_attr "prefix" "evex")
8928 (set_attr "mode" "TI")])
8930 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8931 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8935 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8938 (parallel [(const_int 0) (const_int 1)
8939 (const_int 2) (const_int 3)]))
8940 (match_operand:QI 2 "register_operand" "Yk"))
8943 (parallel [(const_int 4) (const_int 5)
8944 (const_int 6) (const_int 7)]))))]
8946 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8947 [(set_attr "type" "ssemov")
8948 (set_attr "memory" "store")
8949 (set_attr "prefix" "evex")
8950 (set_attr "mode" "TI")])
8952 (define_insn "*avx512vl_<code>v2div2hi2_store"
8953 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8956 (match_operand:V2DI 1 "register_operand" "v"))
8959 (parallel [(const_int 2) (const_int 3)
8960 (const_int 4) (const_int 5)
8961 (const_int 6) (const_int 7)]))))]
8963 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8964 [(set_attr "type" "ssemov")
8965 (set_attr "memory" "store")
8966 (set_attr "prefix" "evex")
8967 (set_attr "mode" "TI")])
8969 (define_insn "avx512vl_<code>v2div2hi2_mask"
8970 [(set (match_operand:V8HI 0 "register_operand" "=v")
8974 (match_operand:V2DI 1 "register_operand" "v"))
8976 (match_operand:V8HI 2 "vector_move_operand" "0C")
8977 (parallel [(const_int 0) (const_int 1)]))
8978 (match_operand:QI 3 "register_operand" "Yk"))
8979 (const_vector:V6HI [(const_int 0) (const_int 0)
8980 (const_int 0) (const_int 0)
8981 (const_int 0) (const_int 0)])))]
8983 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8984 [(set_attr "type" "ssemov")
8985 (set_attr "prefix" "evex")
8986 (set_attr "mode" "TI")])
8988 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
8989 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8993 (match_operand:V2DI 1 "register_operand" "v"))
8996 (parallel [(const_int 0) (const_int 1)]))
8997 (match_operand:QI 2 "register_operand" "Yk"))
9000 (parallel [(const_int 2) (const_int 3)
9001 (const_int 4) (const_int 5)
9002 (const_int 6) (const_int 7)]))))]
9004 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9005 [(set_attr "type" "ssemov")
9006 (set_attr "memory" "store")
9007 (set_attr "prefix" "evex")
9008 (set_attr "mode" "TI")])
9010 (define_insn "*avx512vl_<code>v2div2si2"
9011 [(set (match_operand:V4SI 0 "register_operand" "=v")
9014 (match_operand:V2DI 1 "register_operand" "v"))
9015 (match_operand:V2SI 2 "const0_operand")))]
9017 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9018 [(set_attr "type" "ssemov")
9019 (set_attr "prefix" "evex")
9020 (set_attr "mode" "TI")])
9022 (define_insn "*avx512vl_<code>v2div2si2_store"
9023 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9026 (match_operand:V2DI 1 "register_operand" "v"))
9029 (parallel [(const_int 2) (const_int 3)]))))]
9031 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9032 [(set_attr "type" "ssemov")
9033 (set_attr "memory" "store")
9034 (set_attr "prefix" "evex")
9035 (set_attr "mode" "TI")])
9037 (define_insn "avx512vl_<code>v2div2si2_mask"
9038 [(set (match_operand:V4SI 0 "register_operand" "=v")
9042 (match_operand:V2DI 1 "register_operand" "v"))
9044 (match_operand:V4SI 2 "vector_move_operand" "0C")
9045 (parallel [(const_int 0) (const_int 1)]))
9046 (match_operand:QI 3 "register_operand" "Yk"))
9047 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9049 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9050 [(set_attr "type" "ssemov")
9051 (set_attr "prefix" "evex")
9052 (set_attr "mode" "TI")])
9054 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9055 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9059 (match_operand:V2DI 1 "register_operand" "v"))
9062 (parallel [(const_int 0) (const_int 1)]))
9063 (match_operand:QI 2 "register_operand" "Yk"))
9066 (parallel [(const_int 2) (const_int 3)]))))]
9068 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9069 [(set_attr "type" "ssemov")
9070 (set_attr "memory" "store")
9071 (set_attr "prefix" "evex")
9072 (set_attr "mode" "TI")])
9074 (define_insn "*avx512f_<code>v8div16qi2"
9075 [(set (match_operand:V16QI 0 "register_operand" "=v")
9078 (match_operand:V8DI 1 "register_operand" "v"))
9079 (const_vector:V8QI [(const_int 0) (const_int 0)
9080 (const_int 0) (const_int 0)
9081 (const_int 0) (const_int 0)
9082 (const_int 0) (const_int 0)])))]
9084 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9085 [(set_attr "type" "ssemov")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "TI")])
9089 (define_insn "*avx512f_<code>v8div16qi2_store"
9090 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9093 (match_operand:V8DI 1 "register_operand" "v"))
9096 (parallel [(const_int 8) (const_int 9)
9097 (const_int 10) (const_int 11)
9098 (const_int 12) (const_int 13)
9099 (const_int 14) (const_int 15)]))))]
9101 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9102 [(set_attr "type" "ssemov")
9103 (set_attr "memory" "store")
9104 (set_attr "prefix" "evex")
9105 (set_attr "mode" "TI")])
9107 (define_insn "avx512f_<code>v8div16qi2_mask"
9108 [(set (match_operand:V16QI 0 "register_operand" "=v")
9112 (match_operand:V8DI 1 "register_operand" "v"))
9114 (match_operand:V16QI 2 "vector_move_operand" "0C")
9115 (parallel [(const_int 0) (const_int 1)
9116 (const_int 2) (const_int 3)
9117 (const_int 4) (const_int 5)
9118 (const_int 6) (const_int 7)]))
9119 (match_operand:QI 3 "register_operand" "Yk"))
9120 (const_vector:V8QI [(const_int 0) (const_int 0)
9121 (const_int 0) (const_int 0)
9122 (const_int 0) (const_int 0)
9123 (const_int 0) (const_int 0)])))]
9125 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9126 [(set_attr "type" "ssemov")
9127 (set_attr "prefix" "evex")
9128 (set_attr "mode" "TI")])
9130 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9131 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9135 (match_operand:V8DI 1 "register_operand" "v"))
9138 (parallel [(const_int 0) (const_int 1)
9139 (const_int 2) (const_int 3)
9140 (const_int 4) (const_int 5)
9141 (const_int 6) (const_int 7)]))
9142 (match_operand:QI 2 "register_operand" "Yk"))
9145 (parallel [(const_int 8) (const_int 9)
9146 (const_int 10) (const_int 11)
9147 (const_int 12) (const_int 13)
9148 (const_int 14) (const_int 15)]))))]
9150 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9151 [(set_attr "type" "ssemov")
9152 (set_attr "memory" "store")
9153 (set_attr "prefix" "evex")
9154 (set_attr "mode" "TI")])
9156 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9158 ;; Parallel integral arithmetic
9160 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9162 (define_expand "neg<mode>2"
9163 [(set (match_operand:VI_AVX2 0 "register_operand")
9166 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9168 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9170 (define_expand "<plusminus_insn><mode>3"
9171 [(set (match_operand:VI_AVX2 0 "register_operand")
9173 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9174 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9176 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9178 (define_expand "<plusminus_insn><mode>3_mask"
9179 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9180 (vec_merge:VI48_AVX512VL
9181 (plusminus:VI48_AVX512VL
9182 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9183 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9184 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9185 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9187 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9189 (define_expand "<plusminus_insn><mode>3_mask"
9190 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9191 (vec_merge:VI12_AVX512VL
9192 (plusminus:VI12_AVX512VL
9193 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9194 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9195 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9196 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9198 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9200 (define_insn "*<plusminus_insn><mode>3"
9201 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9203 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9204 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9206 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9208 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9209 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9210 [(set_attr "isa" "noavx,avx")
9211 (set_attr "type" "sseiadd")
9212 (set_attr "prefix_data16" "1,*")
9213 (set_attr "prefix" "<mask_prefix3>")
9214 (set_attr "mode" "<sseinsnmode>")])
9216 (define_insn "*<plusminus_insn><mode>3_mask"
9217 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9218 (vec_merge:VI48_AVX512VL
9219 (plusminus:VI48_AVX512VL
9220 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9221 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9222 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9223 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9225 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9226 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9227 [(set_attr "type" "sseiadd")
9228 (set_attr "prefix" "evex")
9229 (set_attr "mode" "<sseinsnmode>")])
9231 (define_insn "*<plusminus_insn><mode>3_mask"
9232 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9233 (vec_merge:VI12_AVX512VL
9234 (plusminus:VI12_AVX512VL
9235 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9236 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9237 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9238 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9239 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9240 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9241 [(set_attr "type" "sseiadd")
9242 (set_attr "prefix" "evex")
9243 (set_attr "mode" "<sseinsnmode>")])
9245 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9246 [(set (match_operand:VI12_AVX2 0 "register_operand")
9247 (sat_plusminus:VI12_AVX2
9248 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9249 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9250 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9251 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9253 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9254 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9255 (sat_plusminus:VI12_AVX2
9256 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9257 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9258 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9259 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9261 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9262 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9263 [(set_attr "isa" "noavx,avx")
9264 (set_attr "type" "sseiadd")
9265 (set_attr "prefix_data16" "1,*")
9266 (set_attr "prefix" "orig,maybe_evex")
9267 (set_attr "mode" "TI")])
9269 (define_expand "mul<mode>3<mask_name>"
9270 [(set (match_operand:VI1_AVX512 0 "register_operand")
9271 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9272 (match_operand:VI1_AVX512 2 "register_operand")))]
9273 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9275 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9279 (define_expand "mul<mode>3<mask_name>"
9280 [(set (match_operand:VI2_AVX2 0 "register_operand")
9281 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9282 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9283 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9284 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9286 (define_insn "*mul<mode>3<mask_name>"
9287 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9288 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9289 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9291 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9292 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9294 pmullw\t{%2, %0|%0, %2}
9295 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9296 [(set_attr "isa" "noavx,avx")
9297 (set_attr "type" "sseimul")
9298 (set_attr "prefix_data16" "1,*")
9299 (set_attr "prefix" "orig,vex")
9300 (set_attr "mode" "<sseinsnmode>")])
9302 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9303 [(set (match_operand:VI2_AVX2 0 "register_operand")
9305 (lshiftrt:<ssedoublemode>
9306 (mult:<ssedoublemode>
9307 (any_extend:<ssedoublemode>
9308 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9309 (any_extend:<ssedoublemode>
9310 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9313 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9314 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9316 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9317 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9319 (lshiftrt:<ssedoublemode>
9320 (mult:<ssedoublemode>
9321 (any_extend:<ssedoublemode>
9322 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9323 (any_extend:<ssedoublemode>
9324 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9327 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9328 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9330 pmulh<u>w\t{%2, %0|%0, %2}
9331 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9332 [(set_attr "isa" "noavx,avx")
9333 (set_attr "type" "sseimul")
9334 (set_attr "prefix_data16" "1,*")
9335 (set_attr "prefix" "orig,vex")
9336 (set_attr "mode" "<sseinsnmode>")])
9338 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9339 [(set (match_operand:V8DI 0 "register_operand")
9343 (match_operand:V16SI 1 "nonimmediate_operand")
9344 (parallel [(const_int 0) (const_int 2)
9345 (const_int 4) (const_int 6)
9346 (const_int 8) (const_int 10)
9347 (const_int 12) (const_int 14)])))
9350 (match_operand:V16SI 2 "nonimmediate_operand")
9351 (parallel [(const_int 0) (const_int 2)
9352 (const_int 4) (const_int 6)
9353 (const_int 8) (const_int 10)
9354 (const_int 12) (const_int 14)])))))]
9356 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9358 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9359 [(set (match_operand:V8DI 0 "register_operand" "=v")
9363 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9364 (parallel [(const_int 0) (const_int 2)
9365 (const_int 4) (const_int 6)
9366 (const_int 8) (const_int 10)
9367 (const_int 12) (const_int 14)])))
9370 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9371 (parallel [(const_int 0) (const_int 2)
9372 (const_int 4) (const_int 6)
9373 (const_int 8) (const_int 10)
9374 (const_int 12) (const_int 14)])))))]
9375 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9376 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9377 [(set_attr "isa" "avx512f")
9378 (set_attr "type" "sseimul")
9379 (set_attr "prefix_extra" "1")
9380 (set_attr "prefix" "evex")
9381 (set_attr "mode" "XI")])
9383 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9384 [(set (match_operand:V4DI 0 "register_operand")
9388 (match_operand:V8SI 1 "nonimmediate_operand")
9389 (parallel [(const_int 0) (const_int 2)
9390 (const_int 4) (const_int 6)])))
9393 (match_operand:V8SI 2 "nonimmediate_operand")
9394 (parallel [(const_int 0) (const_int 2)
9395 (const_int 4) (const_int 6)])))))]
9396 "TARGET_AVX2 && <mask_avx512vl_condition>"
9397 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9399 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9400 [(set (match_operand:V4DI 0 "register_operand" "=v")
9404 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9405 (parallel [(const_int 0) (const_int 2)
9406 (const_int 4) (const_int 6)])))
9409 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9410 (parallel [(const_int 0) (const_int 2)
9411 (const_int 4) (const_int 6)])))))]
9412 "TARGET_AVX2 && <mask_avx512vl_condition>
9413 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9414 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9415 [(set_attr "type" "sseimul")
9416 (set_attr "prefix" "maybe_evex")
9417 (set_attr "mode" "OI")])
9419 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9420 [(set (match_operand:V2DI 0 "register_operand")
9424 (match_operand:V4SI 1 "nonimmediate_operand")
9425 (parallel [(const_int 0) (const_int 2)])))
9428 (match_operand:V4SI 2 "nonimmediate_operand")
9429 (parallel [(const_int 0) (const_int 2)])))))]
9430 "TARGET_SSE2 && <mask_avx512vl_condition>"
9431 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9433 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9434 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9438 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9439 (parallel [(const_int 0) (const_int 2)])))
9442 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9443 (parallel [(const_int 0) (const_int 2)])))))]
9444 "TARGET_SSE2 && <mask_avx512vl_condition>
9445 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9447 pmuludq\t{%2, %0|%0, %2}
9448 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9449 [(set_attr "isa" "noavx,avx")
9450 (set_attr "type" "sseimul")
9451 (set_attr "prefix_data16" "1,*")
9452 (set_attr "prefix" "orig,maybe_evex")
9453 (set_attr "mode" "TI")])
9455 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9456 [(set (match_operand:V8DI 0 "register_operand")
9460 (match_operand:V16SI 1 "nonimmediate_operand")
9461 (parallel [(const_int 0) (const_int 2)
9462 (const_int 4) (const_int 6)
9463 (const_int 8) (const_int 10)
9464 (const_int 12) (const_int 14)])))
9467 (match_operand:V16SI 2 "nonimmediate_operand")
9468 (parallel [(const_int 0) (const_int 2)
9469 (const_int 4) (const_int 6)
9470 (const_int 8) (const_int 10)
9471 (const_int 12) (const_int 14)])))))]
9473 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9475 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9476 [(set (match_operand:V8DI 0 "register_operand" "=v")
9480 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9481 (parallel [(const_int 0) (const_int 2)
9482 (const_int 4) (const_int 6)
9483 (const_int 8) (const_int 10)
9484 (const_int 12) (const_int 14)])))
9487 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9488 (parallel [(const_int 0) (const_int 2)
9489 (const_int 4) (const_int 6)
9490 (const_int 8) (const_int 10)
9491 (const_int 12) (const_int 14)])))))]
9492 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9493 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9494 [(set_attr "isa" "avx512f")
9495 (set_attr "type" "sseimul")
9496 (set_attr "prefix_extra" "1")
9497 (set_attr "prefix" "evex")
9498 (set_attr "mode" "XI")])
9500 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9501 [(set (match_operand:V4DI 0 "register_operand")
9505 (match_operand:V8SI 1 "nonimmediate_operand")
9506 (parallel [(const_int 0) (const_int 2)
9507 (const_int 4) (const_int 6)])))
9510 (match_operand:V8SI 2 "nonimmediate_operand")
9511 (parallel [(const_int 0) (const_int 2)
9512 (const_int 4) (const_int 6)])))))]
9513 "TARGET_AVX2 && <mask_avx512vl_condition>"
9514 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9516 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9517 [(set (match_operand:V4DI 0 "register_operand" "=v")
9521 (match_operand:V8SI 1 "nonimmediate_operand" "v")
9522 (parallel [(const_int 0) (const_int 2)
9523 (const_int 4) (const_int 6)])))
9526 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9527 (parallel [(const_int 0) (const_int 2)
9528 (const_int 4) (const_int 6)])))))]
9530 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9531 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9532 [(set_attr "type" "sseimul")
9533 (set_attr "prefix_extra" "1")
9534 (set_attr "prefix" "vex")
9535 (set_attr "mode" "OI")])
9537 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9538 [(set (match_operand:V2DI 0 "register_operand")
9542 (match_operand:V4SI 1 "nonimmediate_operand")
9543 (parallel [(const_int 0) (const_int 2)])))
9546 (match_operand:V4SI 2 "nonimmediate_operand")
9547 (parallel [(const_int 0) (const_int 2)])))))]
9548 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9549 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9551 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9552 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9556 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
9557 (parallel [(const_int 0) (const_int 2)])))
9560 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
9561 (parallel [(const_int 0) (const_int 2)])))))]
9562 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9563 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9565 pmuldq\t{%2, %0|%0, %2}
9566 pmuldq\t{%2, %0|%0, %2}
9567 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9568 [(set_attr "isa" "noavx,noavx,avx")
9569 (set_attr "type" "sseimul")
9570 (set_attr "prefix_data16" "1,1,*")
9571 (set_attr "prefix_extra" "1")
9572 (set_attr "prefix" "orig,orig,vex")
9573 (set_attr "mode" "TI")])
9575 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9576 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9577 (unspec:<sseunpackmode>
9578 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9579 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9580 UNSPEC_PMADDWD512))]
9581 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9582 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9583 [(set_attr "type" "sseiadd")
9584 (set_attr "prefix" "evex")
9585 (set_attr "mode" "XI")])
9587 (define_expand "avx2_pmaddwd"
9588 [(set (match_operand:V8SI 0 "register_operand")
9593 (match_operand:V16HI 1 "nonimmediate_operand")
9594 (parallel [(const_int 0) (const_int 2)
9595 (const_int 4) (const_int 6)
9596 (const_int 8) (const_int 10)
9597 (const_int 12) (const_int 14)])))
9600 (match_operand:V16HI 2 "nonimmediate_operand")
9601 (parallel [(const_int 0) (const_int 2)
9602 (const_int 4) (const_int 6)
9603 (const_int 8) (const_int 10)
9604 (const_int 12) (const_int 14)]))))
9607 (vec_select:V8HI (match_dup 1)
9608 (parallel [(const_int 1) (const_int 3)
9609 (const_int 5) (const_int 7)
9610 (const_int 9) (const_int 11)
9611 (const_int 13) (const_int 15)])))
9613 (vec_select:V8HI (match_dup 2)
9614 (parallel [(const_int 1) (const_int 3)
9615 (const_int 5) (const_int 7)
9616 (const_int 9) (const_int 11)
9617 (const_int 13) (const_int 15)]))))))]
9619 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9621 (define_insn "*avx2_pmaddwd"
9622 [(set (match_operand:V8SI 0 "register_operand" "=x")
9627 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9628 (parallel [(const_int 0) (const_int 2)
9629 (const_int 4) (const_int 6)
9630 (const_int 8) (const_int 10)
9631 (const_int 12) (const_int 14)])))
9634 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9635 (parallel [(const_int 0) (const_int 2)
9636 (const_int 4) (const_int 6)
9637 (const_int 8) (const_int 10)
9638 (const_int 12) (const_int 14)]))))
9641 (vec_select:V8HI (match_dup 1)
9642 (parallel [(const_int 1) (const_int 3)
9643 (const_int 5) (const_int 7)
9644 (const_int 9) (const_int 11)
9645 (const_int 13) (const_int 15)])))
9647 (vec_select:V8HI (match_dup 2)
9648 (parallel [(const_int 1) (const_int 3)
9649 (const_int 5) (const_int 7)
9650 (const_int 9) (const_int 11)
9651 (const_int 13) (const_int 15)]))))))]
9652 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9653 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9654 [(set_attr "type" "sseiadd")
9655 (set_attr "prefix" "vex")
9656 (set_attr "mode" "OI")])
9658 (define_expand "sse2_pmaddwd"
9659 [(set (match_operand:V4SI 0 "register_operand")
9664 (match_operand:V8HI 1 "nonimmediate_operand")
9665 (parallel [(const_int 0) (const_int 2)
9666 (const_int 4) (const_int 6)])))
9669 (match_operand:V8HI 2 "nonimmediate_operand")
9670 (parallel [(const_int 0) (const_int 2)
9671 (const_int 4) (const_int 6)]))))
9674 (vec_select:V4HI (match_dup 1)
9675 (parallel [(const_int 1) (const_int 3)
9676 (const_int 5) (const_int 7)])))
9678 (vec_select:V4HI (match_dup 2)
9679 (parallel [(const_int 1) (const_int 3)
9680 (const_int 5) (const_int 7)]))))))]
9682 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9684 (define_insn "*sse2_pmaddwd"
9685 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9690 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9691 (parallel [(const_int 0) (const_int 2)
9692 (const_int 4) (const_int 6)])))
9695 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9696 (parallel [(const_int 0) (const_int 2)
9697 (const_int 4) (const_int 6)]))))
9700 (vec_select:V4HI (match_dup 1)
9701 (parallel [(const_int 1) (const_int 3)
9702 (const_int 5) (const_int 7)])))
9704 (vec_select:V4HI (match_dup 2)
9705 (parallel [(const_int 1) (const_int 3)
9706 (const_int 5) (const_int 7)]))))))]
9707 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9709 pmaddwd\t{%2, %0|%0, %2}
9710 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9711 [(set_attr "isa" "noavx,avx")
9712 (set_attr "type" "sseiadd")
9713 (set_attr "atom_unit" "simul")
9714 (set_attr "prefix_data16" "1,*")
9715 (set_attr "prefix" "orig,vex")
9716 (set_attr "mode" "TI")])
9718 (define_insn "avx512dq_mul<mode>3<mask_name>"
9719 [(set (match_operand:VI8 0 "register_operand" "=v")
9721 (match_operand:VI8 1 "register_operand" "v")
9722 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9723 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9724 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9725 [(set_attr "type" "sseimul")
9726 (set_attr "prefix" "evex")
9727 (set_attr "mode" "<sseinsnmode>")])
9729 (define_expand "mul<mode>3<mask_name>"
9730 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9732 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9733 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9734 "TARGET_SSE2 && <mask_mode512bit_condition>"
9738 if (!nonimmediate_operand (operands[1], <MODE>mode))
9739 operands[1] = force_reg (<MODE>mode, operands[1]);
9740 if (!nonimmediate_operand (operands[2], <MODE>mode))
9741 operands[2] = force_reg (<MODE>mode, operands[2]);
9742 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9746 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9751 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9752 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9754 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
9755 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
9756 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9758 pmulld\t{%2, %0|%0, %2}
9759 pmulld\t{%2, %0|%0, %2}
9760 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9761 [(set_attr "isa" "noavx,noavx,avx")
9762 (set_attr "type" "sseimul")
9763 (set_attr "prefix_extra" "1")
9764 (set_attr "prefix" "<mask_prefix4>")
9765 (set_attr "btver2_decode" "vector,vector,vector")
9766 (set_attr "mode" "<sseinsnmode>")])
9768 (define_expand "mul<mode>3"
9769 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9770 (mult:VI8_AVX2_AVX512F
9771 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9772 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9775 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9779 (define_expand "vec_widen_<s>mult_hi_<mode>"
9780 [(match_operand:<sseunpackmode> 0 "register_operand")
9781 (any_extend:<sseunpackmode>
9782 (match_operand:VI124_AVX2 1 "register_operand"))
9783 (match_operand:VI124_AVX2 2 "register_operand")]
9786 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9791 (define_expand "vec_widen_<s>mult_lo_<mode>"
9792 [(match_operand:<sseunpackmode> 0 "register_operand")
9793 (any_extend:<sseunpackmode>
9794 (match_operand:VI124_AVX2 1 "register_operand"))
9795 (match_operand:VI124_AVX2 2 "register_operand")]
9798 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9803 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9804 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9805 (define_expand "vec_widen_smult_even_v4si"
9806 [(match_operand:V2DI 0 "register_operand")
9807 (match_operand:V4SI 1 "nonimmediate_operand")
9808 (match_operand:V4SI 2 "nonimmediate_operand")]
9811 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9816 (define_expand "vec_widen_<s>mult_odd_<mode>"
9817 [(match_operand:<sseunpackmode> 0 "register_operand")
9818 (any_extend:<sseunpackmode>
9819 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9820 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9823 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9828 (define_mode_attr SDOT_PMADD_SUF
9829 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9831 (define_expand "sdot_prod<mode>"
9832 [(match_operand:<sseunpackmode> 0 "register_operand")
9833 (match_operand:VI2_AVX2 1 "register_operand")
9834 (match_operand:VI2_AVX2 2 "register_operand")
9835 (match_operand:<sseunpackmode> 3 "register_operand")]
9838 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9839 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9840 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9841 gen_rtx_PLUS (<sseunpackmode>mode,
9846 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9847 ;; back together when madd is available.
9848 (define_expand "sdot_prodv4si"
9849 [(match_operand:V2DI 0 "register_operand")
9850 (match_operand:V4SI 1 "register_operand")
9851 (match_operand:V4SI 2 "register_operand")
9852 (match_operand:V2DI 3 "register_operand")]
9855 rtx t = gen_reg_rtx (V2DImode);
9856 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9857 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9861 (define_expand "usadv16qi"
9862 [(match_operand:V4SI 0 "register_operand")
9863 (match_operand:V16QI 1 "register_operand")
9864 (match_operand:V16QI 2 "nonimmediate_operand")
9865 (match_operand:V4SI 3 "nonimmediate_operand")]
9868 rtx t1 = gen_reg_rtx (V2DImode);
9869 rtx t2 = gen_reg_rtx (V4SImode);
9870 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9871 convert_move (t2, t1, 0);
9872 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9876 (define_expand "usadv32qi"
9877 [(match_operand:V8SI 0 "register_operand")
9878 (match_operand:V32QI 1 "register_operand")
9879 (match_operand:V32QI 2 "nonimmediate_operand")
9880 (match_operand:V8SI 3 "nonimmediate_operand")]
9883 rtx t1 = gen_reg_rtx (V4DImode);
9884 rtx t2 = gen_reg_rtx (V8SImode);
9885 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9886 convert_move (t2, t1, 0);
9887 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9891 (define_insn "ashr<mode>3"
9892 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9894 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9895 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9898 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9899 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9900 [(set_attr "isa" "noavx,avx")
9901 (set_attr "type" "sseishft")
9902 (set (attr "length_immediate")
9903 (if_then_else (match_operand 2 "const_int_operand")
9905 (const_string "0")))
9906 (set_attr "prefix_data16" "1,*")
9907 (set_attr "prefix" "orig,vex")
9908 (set_attr "mode" "<sseinsnmode>")])
9910 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9911 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9912 (ashiftrt:VI24_AVX512BW_1
9913 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9914 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9916 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9917 [(set_attr "type" "sseishft")
9918 (set (attr "length_immediate")
9919 (if_then_else (match_operand 2 "const_int_operand")
9921 (const_string "0")))
9922 (set_attr "mode" "<sseinsnmode>")])
9924 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
9925 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9927 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9928 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9930 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9931 [(set_attr "type" "sseishft")
9932 (set (attr "length_immediate")
9933 (if_then_else (match_operand 2 "const_int_operand")
9935 (const_string "0")))
9936 (set_attr "mode" "TI")])
9938 (define_insn "ashr<mode>3<mask_name>"
9939 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9940 (ashiftrt:VI248_AVX512BW_AVX512VL
9941 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9942 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9944 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9945 [(set_attr "type" "sseishft")
9946 (set (attr "length_immediate")
9947 (if_then_else (match_operand 2 "const_int_operand")
9949 (const_string "0")))
9950 (set_attr "mode" "<sseinsnmode>")])
9952 (define_insn "<shift_insn><mode>3<mask_name>"
9953 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9954 (any_lshift:VI2_AVX2_AVX512BW
9955 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9956 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9957 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9959 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9960 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9961 [(set_attr "isa" "noavx,avx")
9962 (set_attr "type" "sseishft")
9963 (set (attr "length_immediate")
9964 (if_then_else (match_operand 2 "const_int_operand")
9966 (const_string "0")))
9967 (set_attr "prefix_data16" "1,*")
9968 (set_attr "prefix" "orig,vex")
9969 (set_attr "mode" "<sseinsnmode>")])
9971 (define_insn "<shift_insn><mode>3<mask_name>"
9972 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
9973 (any_lshift:VI48_AVX2
9974 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
9975 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9976 "TARGET_SSE2 && <mask_mode512bit_condition>"
9978 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9979 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9980 [(set_attr "isa" "noavx,avx")
9981 (set_attr "type" "sseishft")
9982 (set (attr "length_immediate")
9983 (if_then_else (match_operand 2 "const_int_operand")
9985 (const_string "0")))
9986 (set_attr "prefix_data16" "1,*")
9987 (set_attr "prefix" "orig,vex")
9988 (set_attr "mode" "<sseinsnmode>")])
9990 (define_insn "<shift_insn><mode>3<mask_name>"
9991 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9992 (any_lshift:VI48_512
9993 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
9994 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
9995 "TARGET_AVX512F && <mask_mode512bit_condition>"
9996 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9997 [(set_attr "isa" "avx512f")
9998 (set_attr "type" "sseishft")
9999 (set (attr "length_immediate")
10000 (if_then_else (match_operand 2 "const_int_operand")
10002 (const_string "0")))
10003 (set_attr "prefix" "evex")
10004 (set_attr "mode" "<sseinsnmode>")])
10007 (define_expand "vec_shl_<mode>"
10008 [(set (match_dup 3)
10010 (match_operand:VI_128 1 "register_operand")
10011 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10012 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10015 operands[1] = gen_lowpart (V1TImode, operands[1]);
10016 operands[3] = gen_reg_rtx (V1TImode);
10017 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10020 (define_insn "<sse2_avx2>_ashl<mode>3"
10021 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10023 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10024 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10027 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10029 switch (which_alternative)
10032 return "pslldq\t{%2, %0|%0, %2}";
10034 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10036 gcc_unreachable ();
10039 [(set_attr "isa" "noavx,avx")
10040 (set_attr "type" "sseishft")
10041 (set_attr "length_immediate" "1")
10042 (set_attr "prefix_data16" "1,*")
10043 (set_attr "prefix" "orig,vex")
10044 (set_attr "mode" "<sseinsnmode>")])
10046 (define_expand "vec_shr_<mode>"
10047 [(set (match_dup 3)
10049 (match_operand:VI_128 1 "register_operand")
10050 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10051 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10054 operands[1] = gen_lowpart (V1TImode, operands[1]);
10055 operands[3] = gen_reg_rtx (V1TImode);
10056 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10059 (define_insn "<sse2_avx2>_lshr<mode>3"
10060 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10061 (lshiftrt:VIMAX_AVX2
10062 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10063 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10066 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10068 switch (which_alternative)
10071 return "psrldq\t{%2, %0|%0, %2}";
10073 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10075 gcc_unreachable ();
10078 [(set_attr "isa" "noavx,avx")
10079 (set_attr "type" "sseishft")
10080 (set_attr "length_immediate" "1")
10081 (set_attr "atom_unit" "sishuf")
10082 (set_attr "prefix_data16" "1,*")
10083 (set_attr "prefix" "orig,vex")
10084 (set_attr "mode" "<sseinsnmode>")])
10086 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10087 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10088 (any_rotate:VI48_AVX512VL
10089 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10090 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10092 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10093 [(set_attr "prefix" "evex")
10094 (set_attr "mode" "<sseinsnmode>")])
10096 (define_insn "<avx512>_<rotate><mode><mask_name>"
10097 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10098 (any_rotate:VI48_AVX512VL
10099 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10100 (match_operand:SI 2 "const_0_to_255_operand")))]
10102 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10103 [(set_attr "prefix" "evex")
10104 (set_attr "mode" "<sseinsnmode>")])
10106 (define_expand "<code><mode>3"
10107 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10108 (maxmin:VI124_256_AVX512F_AVX512BW
10109 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10110 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10112 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10114 (define_insn "*avx2_<code><mode>3"
10115 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10117 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10118 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10119 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10120 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10121 [(set_attr "type" "sseiadd")
10122 (set_attr "prefix_extra" "1")
10123 (set_attr "prefix" "vex")
10124 (set_attr "mode" "OI")])
10126 (define_expand "<code><mode>3_mask"
10127 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10128 (vec_merge:VI48_AVX512VL
10129 (maxmin:VI48_AVX512VL
10130 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10131 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10132 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10133 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10135 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10137 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10138 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10139 (maxmin:VI48_AVX512VL
10140 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10141 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10142 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10143 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10144 [(set_attr "type" "sseiadd")
10145 (set_attr "prefix_extra" "1")
10146 (set_attr "prefix" "maybe_evex")
10147 (set_attr "mode" "<sseinsnmode>")])
10149 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10150 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10151 (maxmin:VI12_AVX512VL
10152 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10153 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10155 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10156 [(set_attr "type" "sseiadd")
10157 (set_attr "prefix" "evex")
10158 (set_attr "mode" "<sseinsnmode>")])
10160 (define_expand "<code><mode>3"
10161 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10162 (maxmin:VI8_AVX2_AVX512BW
10163 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10164 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10168 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10169 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10172 enum rtx_code code;
10177 xops[0] = operands[0];
10179 if (<CODE> == SMAX || <CODE> == UMAX)
10181 xops[1] = operands[1];
10182 xops[2] = operands[2];
10186 xops[1] = operands[2];
10187 xops[2] = operands[1];
10190 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10192 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10193 xops[4] = operands[1];
10194 xops[5] = operands[2];
10196 ok = ix86_expand_int_vcond (xops);
10202 (define_expand "<code><mode>3"
10203 [(set (match_operand:VI124_128 0 "register_operand")
10205 (match_operand:VI124_128 1 "nonimmediate_operand")
10206 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10209 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10210 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10216 xops[0] = operands[0];
10217 operands[1] = force_reg (<MODE>mode, operands[1]);
10218 operands[2] = force_reg (<MODE>mode, operands[2]);
10220 if (<CODE> == SMAX)
10222 xops[1] = operands[1];
10223 xops[2] = operands[2];
10227 xops[1] = operands[2];
10228 xops[2] = operands[1];
10231 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10232 xops[4] = operands[1];
10233 xops[5] = operands[2];
10235 ok = ix86_expand_int_vcond (xops);
10241 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10242 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10244 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10245 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10247 && <mask_mode512bit_condition>
10248 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10250 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10251 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10252 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10253 [(set_attr "isa" "noavx,noavx,avx")
10254 (set_attr "type" "sseiadd")
10255 (set_attr "prefix_extra" "1,1,*")
10256 (set_attr "prefix" "orig,orig,vex")
10257 (set_attr "mode" "TI")])
10259 (define_insn "*<code>v8hi3"
10260 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10262 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10263 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10264 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10266 p<maxmin_int>w\t{%2, %0|%0, %2}
10267 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10268 [(set_attr "isa" "noavx,avx")
10269 (set_attr "type" "sseiadd")
10270 (set_attr "prefix_data16" "1,*")
10271 (set_attr "prefix_extra" "*,1")
10272 (set_attr "prefix" "orig,vex")
10273 (set_attr "mode" "TI")])
10275 (define_expand "<code><mode>3"
10276 [(set (match_operand:VI124_128 0 "register_operand")
10278 (match_operand:VI124_128 1 "nonimmediate_operand")
10279 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10282 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10283 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10284 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10286 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10287 operands[1] = force_reg (<MODE>mode, operands[1]);
10288 if (rtx_equal_p (op3, op2))
10289 op3 = gen_reg_rtx (V8HImode);
10290 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10291 emit_insn (gen_addv8hi3 (op0, op3, op2));
10299 operands[1] = force_reg (<MODE>mode, operands[1]);
10300 operands[2] = force_reg (<MODE>mode, operands[2]);
10302 xops[0] = operands[0];
10304 if (<CODE> == UMAX)
10306 xops[1] = operands[1];
10307 xops[2] = operands[2];
10311 xops[1] = operands[2];
10312 xops[2] = operands[1];
10315 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10316 xops[4] = operands[1];
10317 xops[5] = operands[2];
10319 ok = ix86_expand_int_vcond (xops);
10325 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10326 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10328 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10329 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10331 && <mask_mode512bit_condition>
10332 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10334 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10335 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10336 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10337 [(set_attr "isa" "noavx,noavx,avx")
10338 (set_attr "type" "sseiadd")
10339 (set_attr "prefix_extra" "1,1,*")
10340 (set_attr "prefix" "orig,orig,vex")
10341 (set_attr "mode" "TI")])
10343 (define_insn "*<code>v16qi3"
10344 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10346 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10347 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10348 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10350 p<maxmin_int>b\t{%2, %0|%0, %2}
10351 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10352 [(set_attr "isa" "noavx,avx")
10353 (set_attr "type" "sseiadd")
10354 (set_attr "prefix_data16" "1,*")
10355 (set_attr "prefix_extra" "*,1")
10356 (set_attr "prefix" "orig,vex")
10357 (set_attr "mode" "TI")])
10359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10361 ;; Parallel integral comparisons
10363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10365 (define_expand "avx2_eq<mode>3"
10366 [(set (match_operand:VI_256 0 "register_operand")
10368 (match_operand:VI_256 1 "nonimmediate_operand")
10369 (match_operand:VI_256 2 "nonimmediate_operand")))]
10371 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10373 (define_insn "*avx2_eq<mode>3"
10374 [(set (match_operand:VI_256 0 "register_operand" "=x")
10376 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10377 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10378 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10379 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10380 [(set_attr "type" "ssecmp")
10381 (set_attr "prefix_extra" "1")
10382 (set_attr "prefix" "vex")
10383 (set_attr "mode" "OI")])
10385 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10386 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10387 (unspec:<avx512fmaskmode>
10388 [(match_operand:VI12_AVX512VL 1 "register_operand")
10389 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10390 UNSPEC_MASKED_EQ))]
10392 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10394 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10395 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10396 (unspec:<avx512fmaskmode>
10397 [(match_operand:VI48_AVX512VL 1 "register_operand")
10398 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10399 UNSPEC_MASKED_EQ))]
10401 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10403 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10404 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10405 (unspec:<avx512fmaskmode>
10406 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10407 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10408 UNSPEC_MASKED_EQ))]
10409 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10410 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10411 [(set_attr "type" "ssecmp")
10412 (set_attr "prefix_extra" "1")
10413 (set_attr "prefix" "evex")
10414 (set_attr "mode" "<sseinsnmode>")])
10416 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10417 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10418 (unspec:<avx512fmaskmode>
10419 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10420 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10421 UNSPEC_MASKED_EQ))]
10422 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10423 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10424 [(set_attr "type" "ssecmp")
10425 (set_attr "prefix_extra" "1")
10426 (set_attr "prefix" "evex")
10427 (set_attr "mode" "<sseinsnmode>")])
10429 (define_insn "*sse4_1_eqv2di3"
10430 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10432 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10433 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10434 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10436 pcmpeqq\t{%2, %0|%0, %2}
10437 pcmpeqq\t{%2, %0|%0, %2}
10438 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10439 [(set_attr "isa" "noavx,noavx,avx")
10440 (set_attr "type" "ssecmp")
10441 (set_attr "prefix_extra" "1")
10442 (set_attr "prefix" "orig,orig,vex")
10443 (set_attr "mode" "TI")])
10445 (define_insn "*sse2_eq<mode>3"
10446 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10448 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10449 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10450 "TARGET_SSE2 && !TARGET_XOP
10451 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10453 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10454 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10455 [(set_attr "isa" "noavx,avx")
10456 (set_attr "type" "ssecmp")
10457 (set_attr "prefix_data16" "1,*")
10458 (set_attr "prefix" "orig,vex")
10459 (set_attr "mode" "TI")])
10461 (define_expand "sse2_eq<mode>3"
10462 [(set (match_operand:VI124_128 0 "register_operand")
10464 (match_operand:VI124_128 1 "nonimmediate_operand")
10465 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10466 "TARGET_SSE2 && !TARGET_XOP "
10467 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10469 (define_expand "sse4_1_eqv2di3"
10470 [(set (match_operand:V2DI 0 "register_operand")
10472 (match_operand:V2DI 1 "nonimmediate_operand")
10473 (match_operand:V2DI 2 "nonimmediate_operand")))]
10475 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10477 (define_insn "sse4_2_gtv2di3"
10478 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10480 (match_operand:V2DI 1 "register_operand" "0,0,x")
10481 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10484 pcmpgtq\t{%2, %0|%0, %2}
10485 pcmpgtq\t{%2, %0|%0, %2}
10486 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10487 [(set_attr "isa" "noavx,noavx,avx")
10488 (set_attr "type" "ssecmp")
10489 (set_attr "prefix_extra" "1")
10490 (set_attr "prefix" "orig,orig,vex")
10491 (set_attr "mode" "TI")])
10493 (define_insn "avx2_gt<mode>3"
10494 [(set (match_operand:VI_256 0 "register_operand" "=x")
10496 (match_operand:VI_256 1 "register_operand" "x")
10497 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10499 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10500 [(set_attr "type" "ssecmp")
10501 (set_attr "prefix_extra" "1")
10502 (set_attr "prefix" "vex")
10503 (set_attr "mode" "OI")])
10505 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10506 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10507 (unspec:<avx512fmaskmode>
10508 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10509 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10511 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10512 [(set_attr "type" "ssecmp")
10513 (set_attr "prefix_extra" "1")
10514 (set_attr "prefix" "evex")
10515 (set_attr "mode" "<sseinsnmode>")])
10517 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10518 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10519 (unspec:<avx512fmaskmode>
10520 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10521 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10523 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10524 [(set_attr "type" "ssecmp")
10525 (set_attr "prefix_extra" "1")
10526 (set_attr "prefix" "evex")
10527 (set_attr "mode" "<sseinsnmode>")])
10529 (define_insn "sse2_gt<mode>3"
10530 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10532 (match_operand:VI124_128 1 "register_operand" "0,x")
10533 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10534 "TARGET_SSE2 && !TARGET_XOP"
10536 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10537 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10538 [(set_attr "isa" "noavx,avx")
10539 (set_attr "type" "ssecmp")
10540 (set_attr "prefix_data16" "1,*")
10541 (set_attr "prefix" "orig,vex")
10542 (set_attr "mode" "TI")])
10544 (define_expand "vcond<V_512:mode><VI_512:mode>"
10545 [(set (match_operand:V_512 0 "register_operand")
10546 (if_then_else:V_512
10547 (match_operator 3 ""
10548 [(match_operand:VI_512 4 "nonimmediate_operand")
10549 (match_operand:VI_512 5 "general_operand")])
10550 (match_operand:V_512 1)
10551 (match_operand:V_512 2)))]
10553 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10554 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10556 bool ok = ix86_expand_int_vcond (operands);
10561 (define_expand "vcond<V_256:mode><VI_256:mode>"
10562 [(set (match_operand:V_256 0 "register_operand")
10563 (if_then_else:V_256
10564 (match_operator 3 ""
10565 [(match_operand:VI_256 4 "nonimmediate_operand")
10566 (match_operand:VI_256 5 "general_operand")])
10567 (match_operand:V_256 1)
10568 (match_operand:V_256 2)))]
10570 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10571 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10573 bool ok = ix86_expand_int_vcond (operands);
10578 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10579 [(set (match_operand:V_128 0 "register_operand")
10580 (if_then_else:V_128
10581 (match_operator 3 ""
10582 [(match_operand:VI124_128 4 "nonimmediate_operand")
10583 (match_operand:VI124_128 5 "general_operand")])
10584 (match_operand:V_128 1)
10585 (match_operand:V_128 2)))]
10587 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10588 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10590 bool ok = ix86_expand_int_vcond (operands);
10595 (define_expand "vcond<VI8F_128:mode>v2di"
10596 [(set (match_operand:VI8F_128 0 "register_operand")
10597 (if_then_else:VI8F_128
10598 (match_operator 3 ""
10599 [(match_operand:V2DI 4 "nonimmediate_operand")
10600 (match_operand:V2DI 5 "general_operand")])
10601 (match_operand:VI8F_128 1)
10602 (match_operand:VI8F_128 2)))]
10605 bool ok = ix86_expand_int_vcond (operands);
10610 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10611 [(set (match_operand:V_512 0 "register_operand")
10612 (if_then_else:V_512
10613 (match_operator 3 ""
10614 [(match_operand:VI_512 4 "nonimmediate_operand")
10615 (match_operand:VI_512 5 "nonimmediate_operand")])
10616 (match_operand:V_512 1 "general_operand")
10617 (match_operand:V_512 2 "general_operand")))]
10619 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10620 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10622 bool ok = ix86_expand_int_vcond (operands);
10627 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10628 [(set (match_operand:V_256 0 "register_operand")
10629 (if_then_else:V_256
10630 (match_operator 3 ""
10631 [(match_operand:VI_256 4 "nonimmediate_operand")
10632 (match_operand:VI_256 5 "nonimmediate_operand")])
10633 (match_operand:V_256 1 "general_operand")
10634 (match_operand:V_256 2 "general_operand")))]
10636 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10637 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10639 bool ok = ix86_expand_int_vcond (operands);
10644 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10645 [(set (match_operand:V_128 0 "register_operand")
10646 (if_then_else:V_128
10647 (match_operator 3 ""
10648 [(match_operand:VI124_128 4 "nonimmediate_operand")
10649 (match_operand:VI124_128 5 "nonimmediate_operand")])
10650 (match_operand:V_128 1 "general_operand")
10651 (match_operand:V_128 2 "general_operand")))]
10653 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10654 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10656 bool ok = ix86_expand_int_vcond (operands);
10661 (define_expand "vcondu<VI8F_128:mode>v2di"
10662 [(set (match_operand:VI8F_128 0 "register_operand")
10663 (if_then_else:VI8F_128
10664 (match_operator 3 ""
10665 [(match_operand:V2DI 4 "nonimmediate_operand")
10666 (match_operand:V2DI 5 "nonimmediate_operand")])
10667 (match_operand:VI8F_128 1 "general_operand")
10668 (match_operand:VI8F_128 2 "general_operand")))]
10671 bool ok = ix86_expand_int_vcond (operands);
10676 (define_mode_iterator VEC_PERM_AVX2
10677 [V16QI V8HI V4SI V2DI V4SF V2DF
10678 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10679 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10680 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10681 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10682 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10683 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10685 (define_expand "vec_perm<mode>"
10686 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10687 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10688 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10689 (match_operand:<sseintvecmode> 3 "register_operand")]
10690 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10692 ix86_expand_vec_perm (operands);
10696 (define_mode_iterator VEC_PERM_CONST
10697 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10698 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10699 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10700 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10701 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10702 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10703 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10704 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10705 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10707 (define_expand "vec_perm_const<mode>"
10708 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10709 (match_operand:VEC_PERM_CONST 1 "register_operand")
10710 (match_operand:VEC_PERM_CONST 2 "register_operand")
10711 (match_operand:<sseintvecmode> 3)]
10714 if (ix86_expand_vec_perm_const (operands))
10720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10722 ;; Parallel bitwise logical operations
10724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10726 (define_expand "one_cmpl<mode>2"
10727 [(set (match_operand:VI 0 "register_operand")
10728 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10732 int i, n = GET_MODE_NUNITS (<MODE>mode);
10733 rtvec v = rtvec_alloc (n);
10735 for (i = 0; i < n; ++i)
10736 RTVEC_ELT (v, i) = constm1_rtx;
10738 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10741 (define_expand "<sse2_avx2>_andnot<mode>3"
10742 [(set (match_operand:VI_AVX2 0 "register_operand")
10744 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10745 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10748 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10749 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10750 (vec_merge:VI48_AVX512VL
10753 (match_operand:VI48_AVX512VL 1 "register_operand"))
10754 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10755 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10756 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10759 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10760 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10761 (vec_merge:VI12_AVX512VL
10764 (match_operand:VI12_AVX512VL 1 "register_operand"))
10765 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10766 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10767 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10770 (define_insn "*andnot<mode>3"
10771 [(set (match_operand:VI 0 "register_operand" "=x,v")
10773 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10774 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10777 static char buf[64];
10781 switch (get_attr_mode (insn))
10784 gcc_assert (TARGET_AVX512F);
10786 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10788 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10789 switch (<MODE>mode)
10793 if (TARGET_AVX512F)
10795 tmp = "pandn<ssemodesuffix>";
10802 if (TARGET_AVX512VL)
10804 tmp = "pandn<ssemodesuffix>";
10808 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10813 gcc_assert (TARGET_AVX512F);
10815 gcc_assert (TARGET_AVX);
10817 gcc_assert (TARGET_SSE);
10823 gcc_unreachable ();
10826 switch (which_alternative)
10829 ops = "%s\t{%%2, %%0|%%0, %%2}";
10832 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10835 gcc_unreachable ();
10838 snprintf (buf, sizeof (buf), ops, tmp);
10841 [(set_attr "isa" "noavx,avx")
10842 (set_attr "type" "sselog")
10843 (set (attr "prefix_data16")
10845 (and (eq_attr "alternative" "0")
10846 (eq_attr "mode" "TI"))
10848 (const_string "*")))
10849 (set_attr "prefix" "orig,vex")
10851 (cond [(and (match_test "<MODE_SIZE> == 16")
10852 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10853 (const_string "<ssePSmode>")
10854 (match_test "TARGET_AVX2")
10855 (const_string "<sseinsnmode>")
10856 (match_test "TARGET_AVX")
10858 (match_test "<MODE_SIZE> > 16")
10859 (const_string "V8SF")
10860 (const_string "<sseinsnmode>"))
10861 (ior (not (match_test "TARGET_SSE2"))
10862 (match_test "optimize_function_for_size_p (cfun)"))
10863 (const_string "V4SF")
10865 (const_string "<sseinsnmode>")))])
10867 (define_insn "*andnot<mode>3_mask"
10868 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10869 (vec_merge:VI48_AVX512VL
10872 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10873 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10874 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10875 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10877 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10878 [(set_attr "type" "sselog")
10879 (set_attr "prefix" "evex")
10880 (set_attr "mode" "<sseinsnmode>")])
10882 (define_insn "*andnot<mode>3_mask"
10883 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10884 (vec_merge:VI12_AVX512VL
10887 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
10888 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10889 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10890 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10892 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10893 [(set_attr "type" "sselog")
10894 (set_attr "prefix" "evex")
10895 (set_attr "mode" "<sseinsnmode>")])
10897 (define_expand "<code><mode>3"
10898 [(set (match_operand:VI 0 "register_operand")
10900 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10901 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10904 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10908 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10909 [(set (match_operand:VI 0 "register_operand" "=x,v")
10911 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10912 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10913 "TARGET_SSE && <mask_mode512bit_condition>
10914 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10916 static char buf[64];
10920 switch (get_attr_mode (insn))
10923 gcc_assert (TARGET_AVX512F);
10925 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10927 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10928 switch (<MODE>mode)
10932 if (TARGET_AVX512F)
10934 tmp = "p<logic><ssemodesuffix>";
10941 if (TARGET_AVX512VL)
10943 tmp = "p<logic><ssemodesuffix>";
10947 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10952 gcc_assert (TARGET_AVX512F);
10954 gcc_assert (TARGET_AVX);
10956 gcc_assert (TARGET_SSE);
10962 gcc_unreachable ();
10965 switch (which_alternative)
10968 ops = "%s\t{%%2, %%0|%%0, %%2}";
10971 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10974 gcc_unreachable ();
10977 snprintf (buf, sizeof (buf), ops, tmp);
10980 [(set_attr "isa" "noavx,avx")
10981 (set_attr "type" "sselog")
10982 (set (attr "prefix_data16")
10984 (and (eq_attr "alternative" "0")
10985 (eq_attr "mode" "TI"))
10987 (const_string "*")))
10988 (set_attr "prefix" "<mask_prefix3>")
10990 (cond [(and (match_test "<MODE_SIZE> == 16")
10991 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10992 (const_string "<ssePSmode>")
10993 (match_test "TARGET_AVX2")
10994 (const_string "<sseinsnmode>")
10995 (match_test "TARGET_AVX")
10997 (match_test "<MODE_SIZE> > 16")
10998 (const_string "V8SF")
10999 (const_string "<sseinsnmode>"))
11000 (ior (not (match_test "TARGET_SSE2"))
11001 (match_test "optimize_function_for_size_p (cfun)"))
11002 (const_string "V4SF")
11004 (const_string "<sseinsnmode>")))])
11006 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11007 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11008 (unspec:<avx512fmaskmode>
11009 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11010 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11013 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11014 [(set_attr "prefix" "evex")
11015 (set_attr "mode" "<sseinsnmode>")])
11017 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11018 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11019 (unspec:<avx512fmaskmode>
11020 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11021 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11024 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11025 [(set_attr "prefix" "evex")
11026 (set_attr "mode" "<sseinsnmode>")])
11028 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11029 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11030 (unspec:<avx512fmaskmode>
11031 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11032 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11035 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11036 [(set_attr "prefix" "evex")
11037 (set_attr "mode" "<sseinsnmode>")])
11039 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11040 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11041 (unspec:<avx512fmaskmode>
11042 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11043 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11046 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11047 [(set_attr "prefix" "evex")
11048 (set_attr "mode" "<sseinsnmode>")])
11050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11052 ;; Parallel integral element swizzling
11054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11056 (define_expand "vec_pack_trunc_<mode>"
11057 [(match_operand:<ssepackmode> 0 "register_operand")
11058 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
11059 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
11062 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11063 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11064 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11068 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11069 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11070 (vec_concat:VI1_AVX512
11071 (ss_truncate:<ssehalfvecmode>
11072 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11073 (ss_truncate:<ssehalfvecmode>
11074 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11075 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11077 packsswb\t{%2, %0|%0, %2}
11078 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11079 [(set_attr "isa" "noavx,avx")
11080 (set_attr "type" "sselog")
11081 (set_attr "prefix_data16" "1,*")
11082 (set_attr "prefix" "orig,maybe_evex")
11083 (set_attr "mode" "<sseinsnmode>")])
11085 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11086 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11087 (vec_concat:VI2_AVX2
11088 (ss_truncate:<ssehalfvecmode>
11089 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11090 (ss_truncate:<ssehalfvecmode>
11091 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11092 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11094 packssdw\t{%2, %0|%0, %2}
11095 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11096 [(set_attr "isa" "noavx,avx")
11097 (set_attr "type" "sselog")
11098 (set_attr "prefix_data16" "1,*")
11099 (set_attr "prefix" "orig,vex")
11100 (set_attr "mode" "<sseinsnmode>")])
11102 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11103 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11104 (vec_concat:VI1_AVX512
11105 (us_truncate:<ssehalfvecmode>
11106 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11107 (us_truncate:<ssehalfvecmode>
11108 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11109 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11111 packuswb\t{%2, %0|%0, %2}
11112 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11113 [(set_attr "isa" "noavx,avx")
11114 (set_attr "type" "sselog")
11115 (set_attr "prefix_data16" "1,*")
11116 (set_attr "prefix" "orig,vex")
11117 (set_attr "mode" "<sseinsnmode>")])
11119 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11120 [(set (match_operand:V64QI 0 "register_operand" "=v")
11123 (match_operand:V64QI 1 "register_operand" "v")
11124 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11125 (parallel [(const_int 8) (const_int 72)
11126 (const_int 9) (const_int 73)
11127 (const_int 10) (const_int 74)
11128 (const_int 11) (const_int 75)
11129 (const_int 12) (const_int 76)
11130 (const_int 13) (const_int 77)
11131 (const_int 14) (const_int 78)
11132 (const_int 15) (const_int 79)
11133 (const_int 24) (const_int 88)
11134 (const_int 25) (const_int 89)
11135 (const_int 26) (const_int 90)
11136 (const_int 27) (const_int 91)
11137 (const_int 28) (const_int 92)
11138 (const_int 29) (const_int 93)
11139 (const_int 30) (const_int 94)
11140 (const_int 31) (const_int 95)
11141 (const_int 40) (const_int 104)
11142 (const_int 41) (const_int 105)
11143 (const_int 42) (const_int 106)
11144 (const_int 43) (const_int 107)
11145 (const_int 44) (const_int 108)
11146 (const_int 45) (const_int 109)
11147 (const_int 46) (const_int 110)
11148 (const_int 47) (const_int 111)
11149 (const_int 56) (const_int 120)
11150 (const_int 57) (const_int 121)
11151 (const_int 58) (const_int 122)
11152 (const_int 59) (const_int 123)
11153 (const_int 60) (const_int 124)
11154 (const_int 61) (const_int 125)
11155 (const_int 62) (const_int 126)
11156 (const_int 63) (const_int 127)])))]
11158 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11159 [(set_attr "type" "sselog")
11160 (set_attr "prefix" "evex")
11161 (set_attr "mode" "XI")])
11163 (define_insn "avx2_interleave_highv32qi<mask_name>"
11164 [(set (match_operand:V32QI 0 "register_operand" "=v")
11167 (match_operand:V32QI 1 "register_operand" "v")
11168 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11169 (parallel [(const_int 8) (const_int 40)
11170 (const_int 9) (const_int 41)
11171 (const_int 10) (const_int 42)
11172 (const_int 11) (const_int 43)
11173 (const_int 12) (const_int 44)
11174 (const_int 13) (const_int 45)
11175 (const_int 14) (const_int 46)
11176 (const_int 15) (const_int 47)
11177 (const_int 24) (const_int 56)
11178 (const_int 25) (const_int 57)
11179 (const_int 26) (const_int 58)
11180 (const_int 27) (const_int 59)
11181 (const_int 28) (const_int 60)
11182 (const_int 29) (const_int 61)
11183 (const_int 30) (const_int 62)
11184 (const_int 31) (const_int 63)])))]
11185 "TARGET_AVX2 && <mask_avx512vl_condition>"
11186 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11187 [(set_attr "type" "sselog")
11188 (set_attr "prefix" "<mask_prefix>")
11189 (set_attr "mode" "OI")])
11191 (define_insn "vec_interleave_highv16qi<mask_name>"
11192 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11195 (match_operand:V16QI 1 "register_operand" "0,v")
11196 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11197 (parallel [(const_int 8) (const_int 24)
11198 (const_int 9) (const_int 25)
11199 (const_int 10) (const_int 26)
11200 (const_int 11) (const_int 27)
11201 (const_int 12) (const_int 28)
11202 (const_int 13) (const_int 29)
11203 (const_int 14) (const_int 30)
11204 (const_int 15) (const_int 31)])))]
11205 "TARGET_SSE2 && <mask_avx512vl_condition>"
11207 punpckhbw\t{%2, %0|%0, %2}
11208 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11209 [(set_attr "isa" "noavx,avx")
11210 (set_attr "type" "sselog")
11211 (set_attr "prefix_data16" "1,*")
11212 (set_attr "prefix" "orig,<mask_prefix>")
11213 (set_attr "mode" "TI")])
11215 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11216 [(set (match_operand:V64QI 0 "register_operand" "=v")
11219 (match_operand:V64QI 1 "register_operand" "v")
11220 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11221 (parallel [(const_int 0) (const_int 64)
11222 (const_int 1) (const_int 65)
11223 (const_int 2) (const_int 66)
11224 (const_int 3) (const_int 67)
11225 (const_int 4) (const_int 68)
11226 (const_int 5) (const_int 69)
11227 (const_int 6) (const_int 70)
11228 (const_int 7) (const_int 71)
11229 (const_int 16) (const_int 80)
11230 (const_int 17) (const_int 81)
11231 (const_int 18) (const_int 82)
11232 (const_int 19) (const_int 83)
11233 (const_int 20) (const_int 84)
11234 (const_int 21) (const_int 85)
11235 (const_int 22) (const_int 86)
11236 (const_int 23) (const_int 87)
11237 (const_int 32) (const_int 96)
11238 (const_int 33) (const_int 97)
11239 (const_int 34) (const_int 98)
11240 (const_int 35) (const_int 99)
11241 (const_int 36) (const_int 100)
11242 (const_int 37) (const_int 101)
11243 (const_int 38) (const_int 102)
11244 (const_int 39) (const_int 103)
11245 (const_int 48) (const_int 112)
11246 (const_int 49) (const_int 113)
11247 (const_int 50) (const_int 114)
11248 (const_int 51) (const_int 115)
11249 (const_int 52) (const_int 116)
11250 (const_int 53) (const_int 117)
11251 (const_int 54) (const_int 118)
11252 (const_int 55) (const_int 119)])))]
11254 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11255 [(set_attr "type" "sselog")
11256 (set_attr "prefix" "evex")
11257 (set_attr "mode" "XI")])
11259 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11260 [(set (match_operand:V32QI 0 "register_operand" "=v")
11263 (match_operand:V32QI 1 "register_operand" "v")
11264 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11265 (parallel [(const_int 0) (const_int 32)
11266 (const_int 1) (const_int 33)
11267 (const_int 2) (const_int 34)
11268 (const_int 3) (const_int 35)
11269 (const_int 4) (const_int 36)
11270 (const_int 5) (const_int 37)
11271 (const_int 6) (const_int 38)
11272 (const_int 7) (const_int 39)
11273 (const_int 16) (const_int 48)
11274 (const_int 17) (const_int 49)
11275 (const_int 18) (const_int 50)
11276 (const_int 19) (const_int 51)
11277 (const_int 20) (const_int 52)
11278 (const_int 21) (const_int 53)
11279 (const_int 22) (const_int 54)
11280 (const_int 23) (const_int 55)])))]
11281 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11282 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11283 [(set_attr "type" "sselog")
11284 (set_attr "prefix" "maybe_vex")
11285 (set_attr "mode" "OI")])
11287 (define_insn "vec_interleave_lowv16qi<mask_name>"
11288 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11291 (match_operand:V16QI 1 "register_operand" "0,v")
11292 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11293 (parallel [(const_int 0) (const_int 16)
11294 (const_int 1) (const_int 17)
11295 (const_int 2) (const_int 18)
11296 (const_int 3) (const_int 19)
11297 (const_int 4) (const_int 20)
11298 (const_int 5) (const_int 21)
11299 (const_int 6) (const_int 22)
11300 (const_int 7) (const_int 23)])))]
11301 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11303 punpcklbw\t{%2, %0|%0, %2}
11304 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11305 [(set_attr "isa" "noavx,avx")
11306 (set_attr "type" "sselog")
11307 (set_attr "prefix_data16" "1,*")
11308 (set_attr "prefix" "orig,vex")
11309 (set_attr "mode" "TI")])
11311 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11312 [(set (match_operand:V32HI 0 "register_operand" "=v")
11315 (match_operand:V32HI 1 "register_operand" "v")
11316 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11317 (parallel [(const_int 4) (const_int 36)
11318 (const_int 5) (const_int 37)
11319 (const_int 6) (const_int 38)
11320 (const_int 7) (const_int 39)
11321 (const_int 12) (const_int 44)
11322 (const_int 13) (const_int 45)
11323 (const_int 14) (const_int 46)
11324 (const_int 15) (const_int 47)
11325 (const_int 20) (const_int 52)
11326 (const_int 21) (const_int 53)
11327 (const_int 22) (const_int 54)
11328 (const_int 23) (const_int 55)
11329 (const_int 28) (const_int 60)
11330 (const_int 29) (const_int 61)
11331 (const_int 30) (const_int 62)
11332 (const_int 31) (const_int 63)])))]
11334 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11335 [(set_attr "type" "sselog")
11336 (set_attr "prefix" "evex")
11337 (set_attr "mode" "XI")])
11339 (define_insn "avx2_interleave_highv16hi<mask_name>"
11340 [(set (match_operand:V16HI 0 "register_operand" "=v")
11343 (match_operand:V16HI 1 "register_operand" "v")
11344 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11345 (parallel [(const_int 4) (const_int 20)
11346 (const_int 5) (const_int 21)
11347 (const_int 6) (const_int 22)
11348 (const_int 7) (const_int 23)
11349 (const_int 12) (const_int 28)
11350 (const_int 13) (const_int 29)
11351 (const_int 14) (const_int 30)
11352 (const_int 15) (const_int 31)])))]
11353 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11354 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11355 [(set_attr "type" "sselog")
11356 (set_attr "prefix" "maybe_evex")
11357 (set_attr "mode" "OI")])
11359 (define_insn "vec_interleave_highv8hi<mask_name>"
11360 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11363 (match_operand:V8HI 1 "register_operand" "0,v")
11364 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11365 (parallel [(const_int 4) (const_int 12)
11366 (const_int 5) (const_int 13)
11367 (const_int 6) (const_int 14)
11368 (const_int 7) (const_int 15)])))]
11369 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11371 punpckhwd\t{%2, %0|%0, %2}
11372 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11373 [(set_attr "isa" "noavx,avx")
11374 (set_attr "type" "sselog")
11375 (set_attr "prefix_data16" "1,*")
11376 (set_attr "prefix" "orig,maybe_vex")
11377 (set_attr "mode" "TI")])
11379 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11380 [(set (match_operand:V32HI 0 "register_operand" "=v")
11383 (match_operand:V32HI 1 "register_operand" "v")
11384 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11385 (parallel [(const_int 0) (const_int 32)
11386 (const_int 1) (const_int 33)
11387 (const_int 2) (const_int 34)
11388 (const_int 3) (const_int 35)
11389 (const_int 8) (const_int 40)
11390 (const_int 9) (const_int 41)
11391 (const_int 10) (const_int 42)
11392 (const_int 11) (const_int 43)
11393 (const_int 16) (const_int 48)
11394 (const_int 17) (const_int 49)
11395 (const_int 18) (const_int 50)
11396 (const_int 19) (const_int 51)
11397 (const_int 24) (const_int 56)
11398 (const_int 25) (const_int 57)
11399 (const_int 26) (const_int 58)
11400 (const_int 27) (const_int 59)])))]
11402 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11403 [(set_attr "type" "sselog")
11404 (set_attr "prefix" "evex")
11405 (set_attr "mode" "XI")])
11407 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11408 [(set (match_operand:V16HI 0 "register_operand" "=v")
11411 (match_operand:V16HI 1 "register_operand" "v")
11412 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11413 (parallel [(const_int 0) (const_int 16)
11414 (const_int 1) (const_int 17)
11415 (const_int 2) (const_int 18)
11416 (const_int 3) (const_int 19)
11417 (const_int 8) (const_int 24)
11418 (const_int 9) (const_int 25)
11419 (const_int 10) (const_int 26)
11420 (const_int 11) (const_int 27)])))]
11421 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11422 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11423 [(set_attr "type" "sselog")
11424 (set_attr "prefix" "maybe_evex")
11425 (set_attr "mode" "OI")])
11427 (define_insn "vec_interleave_lowv8hi<mask_name>"
11428 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11431 (match_operand:V8HI 1 "register_operand" "0,v")
11432 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11433 (parallel [(const_int 0) (const_int 8)
11434 (const_int 1) (const_int 9)
11435 (const_int 2) (const_int 10)
11436 (const_int 3) (const_int 11)])))]
11437 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11439 punpcklwd\t{%2, %0|%0, %2}
11440 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11441 [(set_attr "isa" "noavx,avx")
11442 (set_attr "type" "sselog")
11443 (set_attr "prefix_data16" "1,*")
11444 (set_attr "prefix" "orig,maybe_evex")
11445 (set_attr "mode" "TI")])
11447 (define_insn "avx2_interleave_highv8si<mask_name>"
11448 [(set (match_operand:V8SI 0 "register_operand" "=v")
11451 (match_operand:V8SI 1 "register_operand" "v")
11452 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11453 (parallel [(const_int 2) (const_int 10)
11454 (const_int 3) (const_int 11)
11455 (const_int 6) (const_int 14)
11456 (const_int 7) (const_int 15)])))]
11457 "TARGET_AVX2 && <mask_avx512vl_condition>"
11458 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11459 [(set_attr "type" "sselog")
11460 (set_attr "prefix" "maybe_evex")
11461 (set_attr "mode" "OI")])
11463 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11464 [(set (match_operand:V16SI 0 "register_operand" "=v")
11467 (match_operand:V16SI 1 "register_operand" "v")
11468 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11469 (parallel [(const_int 2) (const_int 18)
11470 (const_int 3) (const_int 19)
11471 (const_int 6) (const_int 22)
11472 (const_int 7) (const_int 23)
11473 (const_int 10) (const_int 26)
11474 (const_int 11) (const_int 27)
11475 (const_int 14) (const_int 30)
11476 (const_int 15) (const_int 31)])))]
11478 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11479 [(set_attr "type" "sselog")
11480 (set_attr "prefix" "evex")
11481 (set_attr "mode" "XI")])
11484 (define_insn "vec_interleave_highv4si<mask_name>"
11485 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11488 (match_operand:V4SI 1 "register_operand" "0,v")
11489 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11490 (parallel [(const_int 2) (const_int 6)
11491 (const_int 3) (const_int 7)])))]
11492 "TARGET_SSE2 && <mask_avx512vl_condition>"
11494 punpckhdq\t{%2, %0|%0, %2}
11495 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11496 [(set_attr "isa" "noavx,avx")
11497 (set_attr "type" "sselog")
11498 (set_attr "prefix_data16" "1,*")
11499 (set_attr "prefix" "orig,maybe_vex")
11500 (set_attr "mode" "TI")])
11502 (define_insn "avx2_interleave_lowv8si<mask_name>"
11503 [(set (match_operand:V8SI 0 "register_operand" "=v")
11506 (match_operand:V8SI 1 "register_operand" "v")
11507 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11508 (parallel [(const_int 0) (const_int 8)
11509 (const_int 1) (const_int 9)
11510 (const_int 4) (const_int 12)
11511 (const_int 5) (const_int 13)])))]
11512 "TARGET_AVX2 && <mask_avx512vl_condition>"
11513 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11514 [(set_attr "type" "sselog")
11515 (set_attr "prefix" "maybe_evex")
11516 (set_attr "mode" "OI")])
11518 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11519 [(set (match_operand:V16SI 0 "register_operand" "=v")
11522 (match_operand:V16SI 1 "register_operand" "v")
11523 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11524 (parallel [(const_int 0) (const_int 16)
11525 (const_int 1) (const_int 17)
11526 (const_int 4) (const_int 20)
11527 (const_int 5) (const_int 21)
11528 (const_int 8) (const_int 24)
11529 (const_int 9) (const_int 25)
11530 (const_int 12) (const_int 28)
11531 (const_int 13) (const_int 29)])))]
11533 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11534 [(set_attr "type" "sselog")
11535 (set_attr "prefix" "evex")
11536 (set_attr "mode" "XI")])
11538 (define_insn "vec_interleave_lowv4si<mask_name>"
11539 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11542 (match_operand:V4SI 1 "register_operand" "0,v")
11543 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11544 (parallel [(const_int 0) (const_int 4)
11545 (const_int 1) (const_int 5)])))]
11546 "TARGET_SSE2 && <mask_avx512vl_condition>"
11548 punpckldq\t{%2, %0|%0, %2}
11549 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11550 [(set_attr "isa" "noavx,avx")
11551 (set_attr "type" "sselog")
11552 (set_attr "prefix_data16" "1,*")
11553 (set_attr "prefix" "orig,vex")
11554 (set_attr "mode" "TI")])
11556 (define_expand "vec_interleave_high<mode>"
11557 [(match_operand:VI_256 0 "register_operand" "=x")
11558 (match_operand:VI_256 1 "register_operand" "x")
11559 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11562 rtx t1 = gen_reg_rtx (<MODE>mode);
11563 rtx t2 = gen_reg_rtx (<MODE>mode);
11564 rtx t3 = gen_reg_rtx (V4DImode);
11565 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11566 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11567 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11568 gen_lowpart (V4DImode, t2),
11569 GEN_INT (1 + (3 << 4))));
11570 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11574 (define_expand "vec_interleave_low<mode>"
11575 [(match_operand:VI_256 0 "register_operand" "=x")
11576 (match_operand:VI_256 1 "register_operand" "x")
11577 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11580 rtx t1 = gen_reg_rtx (<MODE>mode);
11581 rtx t2 = gen_reg_rtx (<MODE>mode);
11582 rtx t3 = gen_reg_rtx (V4DImode);
11583 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11584 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11585 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11586 gen_lowpart (V4DImode, t2),
11587 GEN_INT (0 + (2 << 4))));
11588 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11592 ;; Modes handled by pinsr patterns.
11593 (define_mode_iterator PINSR_MODE
11594 [(V16QI "TARGET_SSE4_1") V8HI
11595 (V4SI "TARGET_SSE4_1")
11596 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11598 (define_mode_attr sse2p4_1
11599 [(V16QI "sse4_1") (V8HI "sse2")
11600 (V4SI "sse4_1") (V2DI "sse4_1")])
11602 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11603 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11604 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11605 (vec_merge:PINSR_MODE
11606 (vec_duplicate:PINSR_MODE
11607 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11608 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11609 (match_operand:SI 3 "const_int_operand")))]
11611 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11612 < GET_MODE_NUNITS (<MODE>mode))"
11614 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11616 switch (which_alternative)
11619 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11620 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11623 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11625 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11626 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11629 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11631 gcc_unreachable ();
11634 [(set_attr "isa" "noavx,noavx,avx,avx")
11635 (set_attr "type" "sselog")
11636 (set (attr "prefix_rex")
11638 (and (not (match_test "TARGET_AVX"))
11639 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11641 (const_string "*")))
11642 (set (attr "prefix_data16")
11644 (and (not (match_test "TARGET_AVX"))
11645 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11647 (const_string "*")))
11648 (set (attr "prefix_extra")
11650 (and (not (match_test "TARGET_AVX"))
11651 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11653 (const_string "1")))
11654 (set_attr "length_immediate" "1")
11655 (set_attr "prefix" "orig,orig,vex,vex")
11656 (set_attr "mode" "TI")])
11658 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11659 [(match_operand:AVX512_VEC 0 "register_operand")
11660 (match_operand:AVX512_VEC 1 "register_operand")
11661 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11662 (match_operand:SI 3 "const_0_to_3_operand")
11663 (match_operand:AVX512_VEC 4 "register_operand")
11664 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11668 mask = INTVAL (operands[3]);
11669 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11670 0xFFFF ^ (0xF000 >> mask * 4)
11671 : 0xFF ^ (0xC0 >> mask * 2);
11672 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11673 (operands[0], operands[1], operands[2], GEN_INT (selector),
11674 operands[4], operands[5]));
11678 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11679 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11680 (vec_merge:AVX512_VEC
11681 (match_operand:AVX512_VEC 1 "register_operand" "v")
11682 (vec_duplicate:AVX512_VEC
11683 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11684 (match_operand:SI 3 "const_int_operand" "n")))]
11688 int selector = INTVAL (operands[3]);
11690 if (selector == 0xFFF || selector == 0x3F)
11692 else if ( selector == 0xF0FF || selector == 0xCF)
11694 else if ( selector == 0xFF0F || selector == 0xF3)
11696 else if ( selector == 0xFFF0 || selector == 0xFC)
11699 gcc_unreachable ();
11701 operands[3] = GEN_INT (mask);
11703 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11705 [(set_attr "type" "sselog")
11706 (set_attr "length_immediate" "1")
11707 (set_attr "prefix" "evex")
11708 (set_attr "mode" "<sseinsnmode>")])
11710 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11711 [(match_operand:AVX512_VEC_2 0 "register_operand")
11712 (match_operand:AVX512_VEC_2 1 "register_operand")
11713 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11714 (match_operand:SI 3 "const_0_to_1_operand")
11715 (match_operand:AVX512_VEC_2 4 "register_operand")
11716 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11719 int mask = INTVAL (operands[3]);
11721 emit_insn (gen_vec_set_lo_<mode>_mask
11722 (operands[0], operands[1], operands[2],
11723 operands[4], operands[5]));
11725 emit_insn (gen_vec_set_hi_<mode>_mask
11726 (operands[0], operands[1], operands[2],
11727 operands[4], operands[5]));
11731 (define_insn "vec_set_lo_<mode><mask_name>"
11732 [(set (match_operand:V16FI 0 "register_operand" "=v")
11734 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11735 (vec_select:<ssehalfvecmode>
11736 (match_operand:V16FI 1 "register_operand" "v")
11737 (parallel [(const_int 8) (const_int 9)
11738 (const_int 10) (const_int 11)
11739 (const_int 12) (const_int 13)
11740 (const_int 14) (const_int 15)]))))]
11742 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11743 [(set_attr "type" "sselog")
11744 (set_attr "length_immediate" "1")
11745 (set_attr "prefix" "evex")
11746 (set_attr "mode" "<sseinsnmode>")])
11748 (define_insn "vec_set_hi_<mode><mask_name>"
11749 [(set (match_operand:V16FI 0 "register_operand" "=v")
11751 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11752 (vec_select:<ssehalfvecmode>
11753 (match_operand:V16FI 1 "register_operand" "v")
11754 (parallel [(const_int 0) (const_int 1)
11755 (const_int 2) (const_int 3)
11756 (const_int 4) (const_int 5)
11757 (const_int 6) (const_int 7)]))))]
11759 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11760 [(set_attr "type" "sselog")
11761 (set_attr "length_immediate" "1")
11762 (set_attr "prefix" "evex")
11763 (set_attr "mode" "<sseinsnmode>")])
11765 (define_insn "vec_set_lo_<mode><mask_name>"
11766 [(set (match_operand:V8FI 0 "register_operand" "=v")
11768 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11769 (vec_select:<ssehalfvecmode>
11770 (match_operand:V8FI 1 "register_operand" "v")
11771 (parallel [(const_int 4) (const_int 5)
11772 (const_int 6) (const_int 7)]))))]
11774 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11775 [(set_attr "type" "sselog")
11776 (set_attr "length_immediate" "1")
11777 (set_attr "prefix" "evex")
11778 (set_attr "mode" "XI")])
11780 (define_insn "vec_set_hi_<mode><mask_name>"
11781 [(set (match_operand:V8FI 0 "register_operand" "=v")
11783 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11784 (vec_select:<ssehalfvecmode>
11785 (match_operand:V8FI 1 "register_operand" "v")
11786 (parallel [(const_int 0) (const_int 1)
11787 (const_int 2) (const_int 3)]))))]
11789 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11790 [(set_attr "type" "sselog")
11791 (set_attr "length_immediate" "1")
11792 (set_attr "prefix" "evex")
11793 (set_attr "mode" "XI")])
11795 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11796 [(match_operand:VI8F_256 0 "register_operand")
11797 (match_operand:VI8F_256 1 "register_operand")
11798 (match_operand:VI8F_256 2 "nonimmediate_operand")
11799 (match_operand:SI 3 "const_0_to_3_operand")
11800 (match_operand:VI8F_256 4 "register_operand")
11801 (match_operand:QI 5 "register_operand")]
11804 int mask = INTVAL (operands[3]);
11805 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11806 (operands[0], operands[1], operands[2],
11807 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11808 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11809 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11810 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11811 operands[4], operands[5]));
11815 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11816 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11817 (vec_select:VI8F_256
11818 (vec_concat:<ssedoublemode>
11819 (match_operand:VI8F_256 1 "register_operand" "v")
11820 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11821 (parallel [(match_operand 3 "const_0_to_3_operand")
11822 (match_operand 4 "const_0_to_3_operand")
11823 (match_operand 5 "const_4_to_7_operand")
11824 (match_operand 6 "const_4_to_7_operand")])))]
11826 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11827 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11830 mask = INTVAL (operands[3]) / 2;
11831 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11832 operands[3] = GEN_INT (mask);
11833 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11835 [(set_attr "type" "sselog")
11836 (set_attr "length_immediate" "1")
11837 (set_attr "prefix" "evex")
11838 (set_attr "mode" "XI")])
11840 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11841 [(match_operand:V8FI 0 "register_operand")
11842 (match_operand:V8FI 1 "register_operand")
11843 (match_operand:V8FI 2 "nonimmediate_operand")
11844 (match_operand:SI 3 "const_0_to_255_operand")
11845 (match_operand:V8FI 4 "register_operand")
11846 (match_operand:QI 5 "register_operand")]
11849 int mask = INTVAL (operands[3]);
11850 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11851 (operands[0], operands[1], operands[2],
11852 GEN_INT (((mask >> 0) & 3) * 2),
11853 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11854 GEN_INT (((mask >> 2) & 3) * 2),
11855 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11856 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11857 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11858 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11859 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11860 operands[4], operands[5]));
11864 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11865 [(set (match_operand:V8FI 0 "register_operand" "=v")
11867 (vec_concat:<ssedoublemode>
11868 (match_operand:V8FI 1 "register_operand" "v")
11869 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11870 (parallel [(match_operand 3 "const_0_to_7_operand")
11871 (match_operand 4 "const_0_to_7_operand")
11872 (match_operand 5 "const_0_to_7_operand")
11873 (match_operand 6 "const_0_to_7_operand")
11874 (match_operand 7 "const_8_to_15_operand")
11875 (match_operand 8 "const_8_to_15_operand")
11876 (match_operand 9 "const_8_to_15_operand")
11877 (match_operand 10 "const_8_to_15_operand")])))]
11879 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11880 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11881 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11882 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11885 mask = INTVAL (operands[3]) / 2;
11886 mask |= INTVAL (operands[5]) / 2 << 2;
11887 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11888 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11889 operands[3] = GEN_INT (mask);
11891 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11893 [(set_attr "type" "sselog")
11894 (set_attr "length_immediate" "1")
11895 (set_attr "prefix" "evex")
11896 (set_attr "mode" "<sseinsnmode>")])
11898 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11899 [(match_operand:VI4F_256 0 "register_operand")
11900 (match_operand:VI4F_256 1 "register_operand")
11901 (match_operand:VI4F_256 2 "nonimmediate_operand")
11902 (match_operand:SI 3 "const_0_to_3_operand")
11903 (match_operand:VI4F_256 4 "register_operand")
11904 (match_operand:QI 5 "register_operand")]
11907 int mask = INTVAL (operands[3]);
11908 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11909 (operands[0], operands[1], operands[2],
11910 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11911 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11912 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11913 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11914 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11915 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11916 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11917 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11918 operands[4], operands[5]));
11922 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11923 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11924 (vec_select:VI4F_256
11925 (vec_concat:<ssedoublemode>
11926 (match_operand:VI4F_256 1 "register_operand" "v")
11927 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11928 (parallel [(match_operand 3 "const_0_to_7_operand")
11929 (match_operand 4 "const_0_to_7_operand")
11930 (match_operand 5 "const_0_to_7_operand")
11931 (match_operand 6 "const_0_to_7_operand")
11932 (match_operand 7 "const_8_to_15_operand")
11933 (match_operand 8 "const_8_to_15_operand")
11934 (match_operand 9 "const_8_to_15_operand")
11935 (match_operand 10 "const_8_to_15_operand")])))]
11937 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11938 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11939 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11940 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11941 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11942 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11945 mask = INTVAL (operands[3]) / 4;
11946 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11947 operands[3] = GEN_INT (mask);
11949 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11951 [(set_attr "type" "sselog")
11952 (set_attr "length_immediate" "1")
11953 (set_attr "prefix" "evex")
11954 (set_attr "mode" "<sseinsnmode>")])
11956 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11957 [(match_operand:V16FI 0 "register_operand")
11958 (match_operand:V16FI 1 "register_operand")
11959 (match_operand:V16FI 2 "nonimmediate_operand")
11960 (match_operand:SI 3 "const_0_to_255_operand")
11961 (match_operand:V16FI 4 "register_operand")
11962 (match_operand:HI 5 "register_operand")]
11965 int mask = INTVAL (operands[3]);
11966 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
11967 (operands[0], operands[1], operands[2],
11968 GEN_INT (((mask >> 0) & 3) * 4),
11969 GEN_INT (((mask >> 0) & 3) * 4 + 1),
11970 GEN_INT (((mask >> 0) & 3) * 4 + 2),
11971 GEN_INT (((mask >> 0) & 3) * 4 + 3),
11972 GEN_INT (((mask >> 2) & 3) * 4),
11973 GEN_INT (((mask >> 2) & 3) * 4 + 1),
11974 GEN_INT (((mask >> 2) & 3) * 4 + 2),
11975 GEN_INT (((mask >> 2) & 3) * 4 + 3),
11976 GEN_INT (((mask >> 4) & 3) * 4 + 16),
11977 GEN_INT (((mask >> 4) & 3) * 4 + 17),
11978 GEN_INT (((mask >> 4) & 3) * 4 + 18),
11979 GEN_INT (((mask >> 4) & 3) * 4 + 19),
11980 GEN_INT (((mask >> 6) & 3) * 4 + 16),
11981 GEN_INT (((mask >> 6) & 3) * 4 + 17),
11982 GEN_INT (((mask >> 6) & 3) * 4 + 18),
11983 GEN_INT (((mask >> 6) & 3) * 4 + 19),
11984 operands[4], operands[5]));
11988 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
11989 [(set (match_operand:V16FI 0 "register_operand" "=v")
11991 (vec_concat:<ssedoublemode>
11992 (match_operand:V16FI 1 "register_operand" "v")
11993 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
11994 (parallel [(match_operand 3 "const_0_to_15_operand")
11995 (match_operand 4 "const_0_to_15_operand")
11996 (match_operand 5 "const_0_to_15_operand")
11997 (match_operand 6 "const_0_to_15_operand")
11998 (match_operand 7 "const_0_to_15_operand")
11999 (match_operand 8 "const_0_to_15_operand")
12000 (match_operand 9 "const_0_to_15_operand")
12001 (match_operand 10 "const_0_to_15_operand")
12002 (match_operand 11 "const_16_to_31_operand")
12003 (match_operand 12 "const_16_to_31_operand")
12004 (match_operand 13 "const_16_to_31_operand")
12005 (match_operand 14 "const_16_to_31_operand")
12006 (match_operand 15 "const_16_to_31_operand")
12007 (match_operand 16 "const_16_to_31_operand")
12008 (match_operand 17 "const_16_to_31_operand")
12009 (match_operand 18 "const_16_to_31_operand")])))]
12011 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12012 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12013 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12014 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12015 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12016 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12017 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12018 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12019 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12020 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12021 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12022 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12025 mask = INTVAL (operands[3]) / 4;
12026 mask |= INTVAL (operands[7]) / 4 << 2;
12027 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12028 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12029 operands[3] = GEN_INT (mask);
12031 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12033 [(set_attr "type" "sselog")
12034 (set_attr "length_immediate" "1")
12035 (set_attr "prefix" "evex")
12036 (set_attr "mode" "<sseinsnmode>")])
12038 (define_expand "avx512f_pshufdv3_mask"
12039 [(match_operand:V16SI 0 "register_operand")
12040 (match_operand:V16SI 1 "nonimmediate_operand")
12041 (match_operand:SI 2 "const_0_to_255_operand")
12042 (match_operand:V16SI 3 "register_operand")
12043 (match_operand:HI 4 "register_operand")]
12046 int mask = INTVAL (operands[2]);
12047 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12048 GEN_INT ((mask >> 0) & 3),
12049 GEN_INT ((mask >> 2) & 3),
12050 GEN_INT ((mask >> 4) & 3),
12051 GEN_INT ((mask >> 6) & 3),
12052 GEN_INT (((mask >> 0) & 3) + 4),
12053 GEN_INT (((mask >> 2) & 3) + 4),
12054 GEN_INT (((mask >> 4) & 3) + 4),
12055 GEN_INT (((mask >> 6) & 3) + 4),
12056 GEN_INT (((mask >> 0) & 3) + 8),
12057 GEN_INT (((mask >> 2) & 3) + 8),
12058 GEN_INT (((mask >> 4) & 3) + 8),
12059 GEN_INT (((mask >> 6) & 3) + 8),
12060 GEN_INT (((mask >> 0) & 3) + 12),
12061 GEN_INT (((mask >> 2) & 3) + 12),
12062 GEN_INT (((mask >> 4) & 3) + 12),
12063 GEN_INT (((mask >> 6) & 3) + 12),
12064 operands[3], operands[4]));
12068 (define_insn "avx512f_pshufd_1<mask_name>"
12069 [(set (match_operand:V16SI 0 "register_operand" "=v")
12071 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12072 (parallel [(match_operand 2 "const_0_to_3_operand")
12073 (match_operand 3 "const_0_to_3_operand")
12074 (match_operand 4 "const_0_to_3_operand")
12075 (match_operand 5 "const_0_to_3_operand")
12076 (match_operand 6 "const_4_to_7_operand")
12077 (match_operand 7 "const_4_to_7_operand")
12078 (match_operand 8 "const_4_to_7_operand")
12079 (match_operand 9 "const_4_to_7_operand")
12080 (match_operand 10 "const_8_to_11_operand")
12081 (match_operand 11 "const_8_to_11_operand")
12082 (match_operand 12 "const_8_to_11_operand")
12083 (match_operand 13 "const_8_to_11_operand")
12084 (match_operand 14 "const_12_to_15_operand")
12085 (match_operand 15 "const_12_to_15_operand")
12086 (match_operand 16 "const_12_to_15_operand")
12087 (match_operand 17 "const_12_to_15_operand")])))]
12089 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12090 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12091 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12092 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12093 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12094 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12095 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12096 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12097 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12098 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12099 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12100 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12103 mask |= INTVAL (operands[2]) << 0;
12104 mask |= INTVAL (operands[3]) << 2;
12105 mask |= INTVAL (operands[4]) << 4;
12106 mask |= INTVAL (operands[5]) << 6;
12107 operands[2] = GEN_INT (mask);
12109 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12111 [(set_attr "type" "sselog1")
12112 (set_attr "prefix" "evex")
12113 (set_attr "length_immediate" "1")
12114 (set_attr "mode" "XI")])
12116 (define_expand "avx512vl_pshufdv3_mask"
12117 [(match_operand:V8SI 0 "register_operand")
12118 (match_operand:V8SI 1 "nonimmediate_operand")
12119 (match_operand:SI 2 "const_0_to_255_operand")
12120 (match_operand:V8SI 3 "register_operand")
12121 (match_operand:QI 4 "register_operand")]
12124 int mask = INTVAL (operands[2]);
12125 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12126 GEN_INT ((mask >> 0) & 3),
12127 GEN_INT ((mask >> 2) & 3),
12128 GEN_INT ((mask >> 4) & 3),
12129 GEN_INT ((mask >> 6) & 3),
12130 GEN_INT (((mask >> 0) & 3) + 4),
12131 GEN_INT (((mask >> 2) & 3) + 4),
12132 GEN_INT (((mask >> 4) & 3) + 4),
12133 GEN_INT (((mask >> 6) & 3) + 4),
12134 operands[3], operands[4]));
12138 (define_expand "avx2_pshufdv3"
12139 [(match_operand:V8SI 0 "register_operand")
12140 (match_operand:V8SI 1 "nonimmediate_operand")
12141 (match_operand:SI 2 "const_0_to_255_operand")]
12144 int mask = INTVAL (operands[2]);
12145 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12146 GEN_INT ((mask >> 0) & 3),
12147 GEN_INT ((mask >> 2) & 3),
12148 GEN_INT ((mask >> 4) & 3),
12149 GEN_INT ((mask >> 6) & 3),
12150 GEN_INT (((mask >> 0) & 3) + 4),
12151 GEN_INT (((mask >> 2) & 3) + 4),
12152 GEN_INT (((mask >> 4) & 3) + 4),
12153 GEN_INT (((mask >> 6) & 3) + 4)));
12157 (define_insn "avx2_pshufd_1<mask_name>"
12158 [(set (match_operand:V8SI 0 "register_operand" "=v")
12160 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12161 (parallel [(match_operand 2 "const_0_to_3_operand")
12162 (match_operand 3 "const_0_to_3_operand")
12163 (match_operand 4 "const_0_to_3_operand")
12164 (match_operand 5 "const_0_to_3_operand")
12165 (match_operand 6 "const_4_to_7_operand")
12166 (match_operand 7 "const_4_to_7_operand")
12167 (match_operand 8 "const_4_to_7_operand")
12168 (match_operand 9 "const_4_to_7_operand")])))]
12170 && <mask_avx512vl_condition>
12171 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12172 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12173 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12174 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12177 mask |= INTVAL (operands[2]) << 0;
12178 mask |= INTVAL (operands[3]) << 2;
12179 mask |= INTVAL (operands[4]) << 4;
12180 mask |= INTVAL (operands[5]) << 6;
12181 operands[2] = GEN_INT (mask);
12183 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12185 [(set_attr "type" "sselog1")
12186 (set_attr "prefix" "maybe_evex")
12187 (set_attr "length_immediate" "1")
12188 (set_attr "mode" "OI")])
12190 (define_expand "avx512vl_pshufd_mask"
12191 [(match_operand:V4SI 0 "register_operand")
12192 (match_operand:V4SI 1 "nonimmediate_operand")
12193 (match_operand:SI 2 "const_0_to_255_operand")
12194 (match_operand:V4SI 3 "register_operand")
12195 (match_operand:QI 4 "register_operand")]
12198 int mask = INTVAL (operands[2]);
12199 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12200 GEN_INT ((mask >> 0) & 3),
12201 GEN_INT ((mask >> 2) & 3),
12202 GEN_INT ((mask >> 4) & 3),
12203 GEN_INT ((mask >> 6) & 3),
12204 operands[3], operands[4]));
12208 (define_expand "sse2_pshufd"
12209 [(match_operand:V4SI 0 "register_operand")
12210 (match_operand:V4SI 1 "nonimmediate_operand")
12211 (match_operand:SI 2 "const_int_operand")]
12214 int mask = INTVAL (operands[2]);
12215 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12216 GEN_INT ((mask >> 0) & 3),
12217 GEN_INT ((mask >> 2) & 3),
12218 GEN_INT ((mask >> 4) & 3),
12219 GEN_INT ((mask >> 6) & 3)));
12223 (define_insn "sse2_pshufd_1<mask_name>"
12224 [(set (match_operand:V4SI 0 "register_operand" "=v")
12226 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12227 (parallel [(match_operand 2 "const_0_to_3_operand")
12228 (match_operand 3 "const_0_to_3_operand")
12229 (match_operand 4 "const_0_to_3_operand")
12230 (match_operand 5 "const_0_to_3_operand")])))]
12231 "TARGET_SSE2 && <mask_avx512vl_condition>"
12234 mask |= INTVAL (operands[2]) << 0;
12235 mask |= INTVAL (operands[3]) << 2;
12236 mask |= INTVAL (operands[4]) << 4;
12237 mask |= INTVAL (operands[5]) << 6;
12238 operands[2] = GEN_INT (mask);
12240 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12242 [(set_attr "type" "sselog1")
12243 (set_attr "prefix_data16" "1")
12244 (set_attr "prefix" "<mask_prefix2>")
12245 (set_attr "length_immediate" "1")
12246 (set_attr "mode" "TI")])
12248 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12249 [(set (match_operand:V32HI 0 "register_operand" "=v")
12251 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12252 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12255 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12256 [(set_attr "type" "sselog")
12257 (set_attr "prefix" "evex")
12258 (set_attr "mode" "XI")])
12260 (define_expand "avx512vl_pshuflwv3_mask"
12261 [(match_operand:V16HI 0 "register_operand")
12262 (match_operand:V16HI 1 "nonimmediate_operand")
12263 (match_operand:SI 2 "const_0_to_255_operand")
12264 (match_operand:V16HI 3 "register_operand")
12265 (match_operand:HI 4 "register_operand")]
12266 "TARGET_AVX512VL && TARGET_AVX512BW"
12268 int mask = INTVAL (operands[2]);
12269 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12270 GEN_INT ((mask >> 0) & 3),
12271 GEN_INT ((mask >> 2) & 3),
12272 GEN_INT ((mask >> 4) & 3),
12273 GEN_INT ((mask >> 6) & 3),
12274 GEN_INT (((mask >> 0) & 3) + 8),
12275 GEN_INT (((mask >> 2) & 3) + 8),
12276 GEN_INT (((mask >> 4) & 3) + 8),
12277 GEN_INT (((mask >> 6) & 3) + 8),
12278 operands[3], operands[4]));
12282 (define_expand "avx2_pshuflwv3"
12283 [(match_operand:V16HI 0 "register_operand")
12284 (match_operand:V16HI 1 "nonimmediate_operand")
12285 (match_operand:SI 2 "const_0_to_255_operand")]
12288 int mask = INTVAL (operands[2]);
12289 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12290 GEN_INT ((mask >> 0) & 3),
12291 GEN_INT ((mask >> 2) & 3),
12292 GEN_INT ((mask >> 4) & 3),
12293 GEN_INT ((mask >> 6) & 3),
12294 GEN_INT (((mask >> 0) & 3) + 8),
12295 GEN_INT (((mask >> 2) & 3) + 8),
12296 GEN_INT (((mask >> 4) & 3) + 8),
12297 GEN_INT (((mask >> 6) & 3) + 8)));
12301 (define_insn "avx2_pshuflw_1<mask_name>"
12302 [(set (match_operand:V16HI 0 "register_operand" "=v")
12304 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12305 (parallel [(match_operand 2 "const_0_to_3_operand")
12306 (match_operand 3 "const_0_to_3_operand")
12307 (match_operand 4 "const_0_to_3_operand")
12308 (match_operand 5 "const_0_to_3_operand")
12313 (match_operand 6 "const_8_to_11_operand")
12314 (match_operand 7 "const_8_to_11_operand")
12315 (match_operand 8 "const_8_to_11_operand")
12316 (match_operand 9 "const_8_to_11_operand")
12320 (const_int 15)])))]
12322 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12323 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12324 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12325 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12326 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12329 mask |= INTVAL (operands[2]) << 0;
12330 mask |= INTVAL (operands[3]) << 2;
12331 mask |= INTVAL (operands[4]) << 4;
12332 mask |= INTVAL (operands[5]) << 6;
12333 operands[2] = GEN_INT (mask);
12335 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12337 [(set_attr "type" "sselog")
12338 (set_attr "prefix" "maybe_evex")
12339 (set_attr "length_immediate" "1")
12340 (set_attr "mode" "OI")])
12342 (define_expand "avx512vl_pshuflw_mask"
12343 [(match_operand:V8HI 0 "register_operand")
12344 (match_operand:V8HI 1 "nonimmediate_operand")
12345 (match_operand:SI 2 "const_0_to_255_operand")
12346 (match_operand:V8HI 3 "register_operand")
12347 (match_operand:QI 4 "register_operand")]
12348 "TARGET_AVX512VL && TARGET_AVX512BW"
12350 int mask = INTVAL (operands[2]);
12351 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12352 GEN_INT ((mask >> 0) & 3),
12353 GEN_INT ((mask >> 2) & 3),
12354 GEN_INT ((mask >> 4) & 3),
12355 GEN_INT ((mask >> 6) & 3),
12356 operands[3], operands[4]));
12360 (define_expand "sse2_pshuflw"
12361 [(match_operand:V8HI 0 "register_operand")
12362 (match_operand:V8HI 1 "nonimmediate_operand")
12363 (match_operand:SI 2 "const_int_operand")]
12366 int mask = INTVAL (operands[2]);
12367 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12368 GEN_INT ((mask >> 0) & 3),
12369 GEN_INT ((mask >> 2) & 3),
12370 GEN_INT ((mask >> 4) & 3),
12371 GEN_INT ((mask >> 6) & 3)));
12375 (define_insn "sse2_pshuflw_1<mask_name>"
12376 [(set (match_operand:V8HI 0 "register_operand" "=v")
12378 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12379 (parallel [(match_operand 2 "const_0_to_3_operand")
12380 (match_operand 3 "const_0_to_3_operand")
12381 (match_operand 4 "const_0_to_3_operand")
12382 (match_operand 5 "const_0_to_3_operand")
12387 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12390 mask |= INTVAL (operands[2]) << 0;
12391 mask |= INTVAL (operands[3]) << 2;
12392 mask |= INTVAL (operands[4]) << 4;
12393 mask |= INTVAL (operands[5]) << 6;
12394 operands[2] = GEN_INT (mask);
12396 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12398 [(set_attr "type" "sselog")
12399 (set_attr "prefix_data16" "0")
12400 (set_attr "prefix_rep" "1")
12401 (set_attr "prefix" "maybe_vex")
12402 (set_attr "length_immediate" "1")
12403 (set_attr "mode" "TI")])
12405 (define_expand "avx2_pshufhwv3"
12406 [(match_operand:V16HI 0 "register_operand")
12407 (match_operand:V16HI 1 "nonimmediate_operand")
12408 (match_operand:SI 2 "const_0_to_255_operand")]
12411 int mask = INTVAL (operands[2]);
12412 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12413 GEN_INT (((mask >> 0) & 3) + 4),
12414 GEN_INT (((mask >> 2) & 3) + 4),
12415 GEN_INT (((mask >> 4) & 3) + 4),
12416 GEN_INT (((mask >> 6) & 3) + 4),
12417 GEN_INT (((mask >> 0) & 3) + 12),
12418 GEN_INT (((mask >> 2) & 3) + 12),
12419 GEN_INT (((mask >> 4) & 3) + 12),
12420 GEN_INT (((mask >> 6) & 3) + 12)));
12424 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12425 [(set (match_operand:V32HI 0 "register_operand" "=v")
12427 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12428 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12431 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12432 [(set_attr "type" "sselog")
12433 (set_attr "prefix" "evex")
12434 (set_attr "mode" "XI")])
12436 (define_expand "avx512vl_pshufhwv3_mask"
12437 [(match_operand:V16HI 0 "register_operand")
12438 (match_operand:V16HI 1 "nonimmediate_operand")
12439 (match_operand:SI 2 "const_0_to_255_operand")
12440 (match_operand:V16HI 3 "register_operand")
12441 (match_operand:HI 4 "register_operand")]
12442 "TARGET_AVX512VL && TARGET_AVX512BW"
12444 int mask = INTVAL (operands[2]);
12445 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12446 GEN_INT (((mask >> 0) & 3) + 4),
12447 GEN_INT (((mask >> 2) & 3) + 4),
12448 GEN_INT (((mask >> 4) & 3) + 4),
12449 GEN_INT (((mask >> 6) & 3) + 4),
12450 GEN_INT (((mask >> 0) & 3) + 12),
12451 GEN_INT (((mask >> 2) & 3) + 12),
12452 GEN_INT (((mask >> 4) & 3) + 12),
12453 GEN_INT (((mask >> 6) & 3) + 12),
12454 operands[3], operands[4]));
12458 (define_insn "avx2_pshufhw_1<mask_name>"
12459 [(set (match_operand:V16HI 0 "register_operand" "=v")
12461 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12462 (parallel [(const_int 0)
12466 (match_operand 2 "const_4_to_7_operand")
12467 (match_operand 3 "const_4_to_7_operand")
12468 (match_operand 4 "const_4_to_7_operand")
12469 (match_operand 5 "const_4_to_7_operand")
12474 (match_operand 6 "const_12_to_15_operand")
12475 (match_operand 7 "const_12_to_15_operand")
12476 (match_operand 8 "const_12_to_15_operand")
12477 (match_operand 9 "const_12_to_15_operand")])))]
12479 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12480 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12481 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12482 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12483 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12486 mask |= (INTVAL (operands[2]) - 4) << 0;
12487 mask |= (INTVAL (operands[3]) - 4) << 2;
12488 mask |= (INTVAL (operands[4]) - 4) << 4;
12489 mask |= (INTVAL (operands[5]) - 4) << 6;
12490 operands[2] = GEN_INT (mask);
12492 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12494 [(set_attr "type" "sselog")
12495 (set_attr "prefix" "maybe_evex")
12496 (set_attr "length_immediate" "1")
12497 (set_attr "mode" "OI")])
12499 (define_expand "avx512vl_pshufhw_mask"
12500 [(match_operand:V8HI 0 "register_operand")
12501 (match_operand:V8HI 1 "nonimmediate_operand")
12502 (match_operand:SI 2 "const_0_to_255_operand")
12503 (match_operand:V8HI 3 "register_operand")
12504 (match_operand:QI 4 "register_operand")]
12505 "TARGET_AVX512VL && TARGET_AVX512BW"
12507 int mask = INTVAL (operands[2]);
12508 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12509 GEN_INT (((mask >> 0) & 3) + 4),
12510 GEN_INT (((mask >> 2) & 3) + 4),
12511 GEN_INT (((mask >> 4) & 3) + 4),
12512 GEN_INT (((mask >> 6) & 3) + 4),
12513 operands[3], operands[4]));
12517 (define_expand "sse2_pshufhw"
12518 [(match_operand:V8HI 0 "register_operand")
12519 (match_operand:V8HI 1 "nonimmediate_operand")
12520 (match_operand:SI 2 "const_int_operand")]
12523 int mask = INTVAL (operands[2]);
12524 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12525 GEN_INT (((mask >> 0) & 3) + 4),
12526 GEN_INT (((mask >> 2) & 3) + 4),
12527 GEN_INT (((mask >> 4) & 3) + 4),
12528 GEN_INT (((mask >> 6) & 3) + 4)));
12532 (define_insn "sse2_pshufhw_1<mask_name>"
12533 [(set (match_operand:V8HI 0 "register_operand" "=v")
12535 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12536 (parallel [(const_int 0)
12540 (match_operand 2 "const_4_to_7_operand")
12541 (match_operand 3 "const_4_to_7_operand")
12542 (match_operand 4 "const_4_to_7_operand")
12543 (match_operand 5 "const_4_to_7_operand")])))]
12544 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12547 mask |= (INTVAL (operands[2]) - 4) << 0;
12548 mask |= (INTVAL (operands[3]) - 4) << 2;
12549 mask |= (INTVAL (operands[4]) - 4) << 4;
12550 mask |= (INTVAL (operands[5]) - 4) << 6;
12551 operands[2] = GEN_INT (mask);
12553 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12555 [(set_attr "type" "sselog")
12556 (set_attr "prefix_rep" "1")
12557 (set_attr "prefix_data16" "0")
12558 (set_attr "prefix" "maybe_vex")
12559 (set_attr "length_immediate" "1")
12560 (set_attr "mode" "TI")])
12562 (define_expand "sse2_loadd"
12563 [(set (match_operand:V4SI 0 "register_operand")
12565 (vec_duplicate:V4SI
12566 (match_operand:SI 1 "nonimmediate_operand"))
12570 "operands[2] = CONST0_RTX (V4SImode);")
12572 (define_insn "sse2_loadld"
12573 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12575 (vec_duplicate:V4SI
12576 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12577 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12581 %vmovd\t{%2, %0|%0, %2}
12582 %vmovd\t{%2, %0|%0, %2}
12583 movss\t{%2, %0|%0, %2}
12584 movss\t{%2, %0|%0, %2}
12585 vmovss\t{%2, %1, %0|%0, %1, %2}"
12586 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12587 (set_attr "type" "ssemov")
12588 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12589 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12591 (define_insn "*vec_extract<mode>"
12592 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12593 (vec_select:<ssescalarmode>
12594 (match_operand:VI12_128 1 "register_operand" "x,x")
12596 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12599 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12600 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12601 [(set_attr "type" "sselog1")
12602 (set (attr "prefix_data16")
12604 (and (eq_attr "alternative" "0")
12605 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12607 (const_string "*")))
12608 (set (attr "prefix_extra")
12610 (and (eq_attr "alternative" "0")
12611 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12613 (const_string "1")))
12614 (set_attr "length_immediate" "1")
12615 (set_attr "prefix" "maybe_vex")
12616 (set_attr "mode" "TI")])
12618 (define_insn "*vec_extractv8hi_sse2"
12619 [(set (match_operand:HI 0 "register_operand" "=r")
12621 (match_operand:V8HI 1 "register_operand" "x")
12623 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12624 "TARGET_SSE2 && !TARGET_SSE4_1"
12625 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12626 [(set_attr "type" "sselog1")
12627 (set_attr "prefix_data16" "1")
12628 (set_attr "length_immediate" "1")
12629 (set_attr "mode" "TI")])
12631 (define_insn "*vec_extractv16qi_zext"
12632 [(set (match_operand:SWI48 0 "register_operand" "=r")
12635 (match_operand:V16QI 1 "register_operand" "x")
12637 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12639 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12640 [(set_attr "type" "sselog1")
12641 (set_attr "prefix_extra" "1")
12642 (set_attr "length_immediate" "1")
12643 (set_attr "prefix" "maybe_vex")
12644 (set_attr "mode" "TI")])
12646 (define_insn "*vec_extractv8hi_zext"
12647 [(set (match_operand:SWI48 0 "register_operand" "=r")
12650 (match_operand:V8HI 1 "register_operand" "x")
12652 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12654 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12655 [(set_attr "type" "sselog1")
12656 (set_attr "prefix_data16" "1")
12657 (set_attr "length_immediate" "1")
12658 (set_attr "prefix" "maybe_vex")
12659 (set_attr "mode" "TI")])
12661 (define_insn "*vec_extract<mode>_mem"
12662 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12663 (vec_select:<ssescalarmode>
12664 (match_operand:VI12_128 1 "memory_operand" "o")
12666 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12670 (define_insn "*vec_extract<ssevecmodelower>_0"
12671 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12673 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12674 (parallel [(const_int 0)])))]
12675 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12677 [(set_attr "isa" "*,sse4,*,*")])
12679 (define_insn_and_split "*vec_extractv4si_0_zext"
12680 [(set (match_operand:DI 0 "register_operand" "=r")
12683 (match_operand:V4SI 1 "register_operand" "x")
12684 (parallel [(const_int 0)]))))]
12685 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12687 "&& reload_completed"
12688 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12689 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12691 (define_insn "*vec_extractv2di_0_sse"
12692 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12694 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12695 (parallel [(const_int 0)])))]
12696 "TARGET_SSE && !TARGET_64BIT
12697 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12701 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12703 (match_operand:<ssevecmode> 1 "register_operand")
12704 (parallel [(const_int 0)])))]
12705 "TARGET_SSE && reload_completed"
12706 [(set (match_dup 0) (match_dup 1))]
12707 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12709 (define_insn "*vec_extractv4si"
12710 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12712 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12713 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12716 switch (which_alternative)
12719 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12723 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12724 return "psrldq\t{%2, %0|%0, %2}";
12727 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12728 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12731 gcc_unreachable ();
12734 [(set_attr "isa" "*,noavx,noavx,avx")
12735 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12736 (set_attr "prefix_extra" "1,*,*,*")
12737 (set_attr "length_immediate" "1")
12738 (set_attr "prefix" "maybe_vex,orig,orig,vex")
12739 (set_attr "mode" "TI")])
12741 (define_insn "*vec_extractv4si_zext"
12742 [(set (match_operand:DI 0 "register_operand" "=r")
12745 (match_operand:V4SI 1 "register_operand" "x")
12746 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12747 "TARGET_64BIT && TARGET_SSE4_1"
12748 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12749 [(set_attr "type" "sselog1")
12750 (set_attr "prefix_extra" "1")
12751 (set_attr "length_immediate" "1")
12752 (set_attr "prefix" "maybe_vex")
12753 (set_attr "mode" "TI")])
12755 (define_insn "*vec_extractv4si_mem"
12756 [(set (match_operand:SI 0 "register_operand" "=x,r")
12758 (match_operand:V4SI 1 "memory_operand" "o,o")
12759 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12763 (define_insn_and_split "*vec_extractv4si_zext_mem"
12764 [(set (match_operand:DI 0 "register_operand" "=x,r")
12767 (match_operand:V4SI 1 "memory_operand" "o,o")
12768 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12769 "TARGET_64BIT && TARGET_SSE"
12771 "&& reload_completed"
12772 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12774 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12777 (define_insn "*vec_extractv2di_1"
12778 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12780 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12781 (parallel [(const_int 1)])))]
12782 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12784 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12785 %vmovhps\t{%1, %0|%0, %1}
12786 psrldq\t{$8, %0|%0, 8}
12787 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12788 movhlps\t{%1, %0|%0, %1}
12791 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12792 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12793 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12794 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12795 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12796 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12797 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12800 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12801 (vec_select:<ssescalarmode>
12802 (match_operand:VI_128 1 "memory_operand")
12804 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12805 "TARGET_SSE && reload_completed"
12806 [(set (match_dup 0) (match_dup 1))]
12808 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12810 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12813 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
12814 ;; vector modes into vec_extract*.
12816 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12817 (match_operand:SWI48x 1 "register_operand"))]
12818 "can_create_pseudo_p ()
12819 && GET_CODE (operands[1]) == SUBREG
12820 && REG_P (SUBREG_REG (operands[1]))
12821 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
12822 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
12823 == MODE_VECTOR_FLOAT))
12824 && SUBREG_BYTE (operands[1]) == 0
12826 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
12827 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
12829 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
12830 && TARGET_AVX512F))
12831 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
12832 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
12833 (parallel [(const_int 0)])))]
12836 operands[1] = SUBREG_REG (operands[1]);
12837 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
12840 if (<MODE>mode == SImode)
12842 tmp = gen_reg_rtx (V8SImode);
12843 emit_insn (gen_vec_extract_lo_v16si (tmp,
12844 gen_lowpart (V16SImode,
12849 tmp = gen_reg_rtx (V4DImode);
12850 emit_insn (gen_vec_extract_lo_v8di (tmp,
12851 gen_lowpart (V8DImode,
12857 tmp = gen_reg_rtx (<ssevecmode>mode);
12858 if (<MODE>mode == SImode)
12859 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
12862 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
12867 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
12872 (define_insn "*vec_concatv2si_sse4_1"
12873 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
12875 (match_operand:SI 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,rm, 0,rm")
12876 (match_operand:SI 2 "vector_move_operand" " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
12879 pinsrd\t{$1, %2, %0|%0, %2, 1}
12880 pinsrd\t{$1, %2, %0|%0, %2, 1}
12881 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12882 punpckldq\t{%2, %0|%0, %2}
12883 punpckldq\t{%2, %0|%0, %2}
12884 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12885 %vmovd\t{%1, %0|%0, %1}
12886 punpckldq\t{%2, %0|%0, %2}
12887 movd\t{%1, %0|%0, %1}"
12888 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
12889 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12890 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
12891 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
12892 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
12893 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
12895 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12896 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12897 ;; alternatives pretty much forces the MMX alternative to be chosen.
12898 (define_insn "*vec_concatv2si"
12899 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12901 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12902 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12903 "TARGET_SSE && !TARGET_SSE4_1"
12905 punpckldq\t{%2, %0|%0, %2}
12906 movd\t{%1, %0|%0, %1}
12907 movd\t{%1, %0|%0, %1}
12908 unpcklps\t{%2, %0|%0, %2}
12909 movss\t{%1, %0|%0, %1}
12910 punpckldq\t{%2, %0|%0, %2}
12911 movd\t{%1, %0|%0, %1}"
12912 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12913 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12914 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12916 (define_insn "*vec_concatv4si"
12917 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12919 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12920 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12923 punpcklqdq\t{%2, %0|%0, %2}
12924 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12925 movlhps\t{%2, %0|%0, %2}
12926 movhps\t{%2, %0|%0, %q2}
12927 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12928 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12929 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12930 (set_attr "prefix" "orig,vex,orig,orig,vex")
12931 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12933 ;; movd instead of movq is required to handle broken assemblers.
12934 (define_insn "vec_concatv2di"
12935 [(set (match_operand:V2DI 0 "register_operand"
12936 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
12938 (match_operand:DI 1 "nonimmediate_operand"
12939 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
12940 (match_operand:DI 2 "vector_move_operand"
12941 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
12944 pinsrq\t{$1, %2, %0|%0, %2, 1}
12945 pinsrq\t{$1, %2, %0|%0, %2, 1}
12946 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12947 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12948 %vmovq\t{%1, %0|%0, %1}
12949 movq2dq\t{%1, %0|%0, %1}
12950 punpcklqdq\t{%2, %0|%0, %2}
12951 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12952 movlhps\t{%2, %0|%0, %2}
12953 movhps\t{%2, %0|%0, %2}
12954 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12955 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12958 (eq_attr "alternative" "0,1,2,6,7")
12959 (const_string "sselog")
12960 (const_string "ssemov")))
12961 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
12962 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
12963 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
12964 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
12965 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
12967 (define_expand "vec_unpacks_lo_<mode>"
12968 [(match_operand:<sseunpackmode> 0 "register_operand")
12969 (match_operand:VI124_AVX512F 1 "register_operand")]
12971 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
12973 (define_expand "vec_unpacks_hi_<mode>"
12974 [(match_operand:<sseunpackmode> 0 "register_operand")
12975 (match_operand:VI124_AVX512F 1 "register_operand")]
12977 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
12979 (define_expand "vec_unpacku_lo_<mode>"
12980 [(match_operand:<sseunpackmode> 0 "register_operand")
12981 (match_operand:VI124_AVX512F 1 "register_operand")]
12983 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
12985 (define_expand "vec_unpacku_hi_<mode>"
12986 [(match_operand:<sseunpackmode> 0 "register_operand")
12987 (match_operand:VI124_AVX512F 1 "register_operand")]
12989 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
12991 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12995 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12997 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
12998 [(set (match_operand:VI12_AVX2 0 "register_operand")
12999 (truncate:VI12_AVX2
13000 (lshiftrt:<ssedoublemode>
13001 (plus:<ssedoublemode>
13002 (plus:<ssedoublemode>
13003 (zero_extend:<ssedoublemode>
13004 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13005 (zero_extend:<ssedoublemode>
13006 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
13007 (match_dup <mask_expand_op3>))
13009 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13012 if (<mask_applied>)
13014 operands[3] = CONST1_RTX(<MODE>mode);
13015 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13017 if (<mask_applied>)
13019 operands[5] = operands[3];
13024 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13025 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13026 (truncate:VI12_AVX2
13027 (lshiftrt:<ssedoublemode>
13028 (plus:<ssedoublemode>
13029 (plus:<ssedoublemode>
13030 (zero_extend:<ssedoublemode>
13031 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
13032 (zero_extend:<ssedoublemode>
13033 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13034 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13036 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13037 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13039 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13040 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13041 [(set_attr "isa" "noavx,avx")
13042 (set_attr "type" "sseiadd")
13043 (set_attr "prefix_data16" "1,*")
13044 (set_attr "prefix" "orig,<mask_prefix>")
13045 (set_attr "mode" "<sseinsnmode>")])
13047 ;; The correct representation for this is absolutely enormous, and
13048 ;; surely not generally useful.
13049 (define_insn "<sse2_avx2>_psadbw"
13050 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13051 (unspec:VI8_AVX2_AVX512BW
13052 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13053 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
13057 psadbw\t{%2, %0|%0, %2}
13058 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13059 [(set_attr "isa" "noavx,avx")
13060 (set_attr "type" "sseiadd")
13061 (set_attr "atom_unit" "simul")
13062 (set_attr "prefix_data16" "1,*")
13063 (set_attr "prefix" "orig,maybe_evex")
13064 (set_attr "mode" "<sseinsnmode>")])
13066 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13067 [(set (match_operand:SI 0 "register_operand" "=r")
13069 [(match_operand:VF_128_256 1 "register_operand" "x")]
13072 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13073 [(set_attr "type" "ssemov")
13074 (set_attr "prefix" "maybe_vex")
13075 (set_attr "mode" "<MODE>")])
13077 (define_insn "avx2_pmovmskb"
13078 [(set (match_operand:SI 0 "register_operand" "=r")
13079 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13082 "vpmovmskb\t{%1, %0|%0, %1}"
13083 [(set_attr "type" "ssemov")
13084 (set_attr "prefix" "vex")
13085 (set_attr "mode" "DI")])
13087 (define_insn "sse2_pmovmskb"
13088 [(set (match_operand:SI 0 "register_operand" "=r")
13089 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13092 "%vpmovmskb\t{%1, %0|%0, %1}"
13093 [(set_attr "type" "ssemov")
13094 (set_attr "prefix_data16" "1")
13095 (set_attr "prefix" "maybe_vex")
13096 (set_attr "mode" "SI")])
13098 (define_expand "sse2_maskmovdqu"
13099 [(set (match_operand:V16QI 0 "memory_operand")
13100 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13101 (match_operand:V16QI 2 "register_operand")
13106 (define_insn "*sse2_maskmovdqu"
13107 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13108 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13109 (match_operand:V16QI 2 "register_operand" "x")
13110 (mem:V16QI (match_dup 0))]
13114 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13115 that requires %v to be at the beginning of the opcode name. */
13116 if (Pmode != word_mode)
13117 fputs ("\taddr32", asm_out_file);
13118 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13120 [(set_attr "type" "ssemov")
13121 (set_attr "prefix_data16" "1")
13122 (set (attr "length_address")
13123 (symbol_ref ("Pmode != word_mode")))
13124 ;; The implicit %rdi operand confuses default length_vex computation.
13125 (set (attr "length_vex")
13126 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13127 (set_attr "prefix" "maybe_vex")
13128 (set_attr "mode" "TI")])
13130 (define_insn "sse_ldmxcsr"
13131 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13135 [(set_attr "type" "sse")
13136 (set_attr "atom_sse_attr" "mxcsr")
13137 (set_attr "prefix" "maybe_vex")
13138 (set_attr "memory" "load")])
13140 (define_insn "sse_stmxcsr"
13141 [(set (match_operand:SI 0 "memory_operand" "=m")
13142 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13145 [(set_attr "type" "sse")
13146 (set_attr "atom_sse_attr" "mxcsr")
13147 (set_attr "prefix" "maybe_vex")
13148 (set_attr "memory" "store")])
13150 (define_insn "sse2_clflush"
13151 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13155 [(set_attr "type" "sse")
13156 (set_attr "atom_sse_attr" "fence")
13157 (set_attr "memory" "unknown")])
13160 (define_insn "sse3_mwait"
13161 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
13162 (match_operand:SI 1 "register_operand" "c")]
13165 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13166 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13167 ;; we only need to set up 32bit registers.
13169 [(set_attr "length" "3")])
13171 (define_insn "sse3_monitor_<mode>"
13172 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13173 (match_operand:SI 1 "register_operand" "c")
13174 (match_operand:SI 2 "register_operand" "d")]
13177 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13178 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13179 ;; zero extended to 64bit, we only need to set up 32bit registers.
13181 [(set (attr "length")
13182 (symbol_ref ("(Pmode != word_mode) + 3")))])
13184 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13186 ;; SSSE3 instructions
13188 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13190 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13192 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13193 [(set (match_operand:V16HI 0 "register_operand" "=x")
13198 (ssse3_plusminus:HI
13200 (match_operand:V16HI 1 "register_operand" "x")
13201 (parallel [(const_int 0)]))
13202 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13203 (ssse3_plusminus:HI
13204 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13205 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13207 (ssse3_plusminus:HI
13208 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13209 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13210 (ssse3_plusminus:HI
13211 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13212 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13215 (ssse3_plusminus:HI
13216 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13217 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13218 (ssse3_plusminus:HI
13219 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13220 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13222 (ssse3_plusminus:HI
13223 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13224 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13225 (ssse3_plusminus:HI
13226 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13227 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13231 (ssse3_plusminus:HI
13233 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13234 (parallel [(const_int 0)]))
13235 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13236 (ssse3_plusminus:HI
13237 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13238 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13240 (ssse3_plusminus:HI
13241 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13242 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13243 (ssse3_plusminus:HI
13244 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13245 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13248 (ssse3_plusminus:HI
13249 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13250 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13251 (ssse3_plusminus:HI
13252 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13253 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13255 (ssse3_plusminus:HI
13256 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13257 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13258 (ssse3_plusminus:HI
13259 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13260 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13262 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13263 [(set_attr "type" "sseiadd")
13264 (set_attr "prefix_extra" "1")
13265 (set_attr "prefix" "vex")
13266 (set_attr "mode" "OI")])
13268 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13269 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13273 (ssse3_plusminus:HI
13275 (match_operand:V8HI 1 "register_operand" "0,x")
13276 (parallel [(const_int 0)]))
13277 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13278 (ssse3_plusminus:HI
13279 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13280 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13282 (ssse3_plusminus:HI
13283 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13284 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13285 (ssse3_plusminus:HI
13286 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13287 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13290 (ssse3_plusminus:HI
13292 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13293 (parallel [(const_int 0)]))
13294 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13295 (ssse3_plusminus:HI
13296 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13297 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13299 (ssse3_plusminus:HI
13300 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13301 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13302 (ssse3_plusminus:HI
13303 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13304 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13307 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13308 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13309 [(set_attr "isa" "noavx,avx")
13310 (set_attr "type" "sseiadd")
13311 (set_attr "atom_unit" "complex")
13312 (set_attr "prefix_data16" "1,*")
13313 (set_attr "prefix_extra" "1")
13314 (set_attr "prefix" "orig,vex")
13315 (set_attr "mode" "TI")])
13317 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13318 [(set (match_operand:V4HI 0 "register_operand" "=y")
13321 (ssse3_plusminus:HI
13323 (match_operand:V4HI 1 "register_operand" "0")
13324 (parallel [(const_int 0)]))
13325 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13326 (ssse3_plusminus:HI
13327 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13328 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13330 (ssse3_plusminus:HI
13332 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13333 (parallel [(const_int 0)]))
13334 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13335 (ssse3_plusminus:HI
13336 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13337 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13339 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13340 [(set_attr "type" "sseiadd")
13341 (set_attr "atom_unit" "complex")
13342 (set_attr "prefix_extra" "1")
13343 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13344 (set_attr "mode" "DI")])
13346 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13347 [(set (match_operand:V8SI 0 "register_operand" "=x")
13353 (match_operand:V8SI 1 "register_operand" "x")
13354 (parallel [(const_int 0)]))
13355 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13357 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13358 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13361 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13362 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13364 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13365 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13370 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13371 (parallel [(const_int 0)]))
13372 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13374 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13375 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13378 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13379 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13381 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13382 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13384 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13385 [(set_attr "type" "sseiadd")
13386 (set_attr "prefix_extra" "1")
13387 (set_attr "prefix" "vex")
13388 (set_attr "mode" "OI")])
13390 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13391 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13396 (match_operand:V4SI 1 "register_operand" "0,x")
13397 (parallel [(const_int 0)]))
13398 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13400 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13401 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13405 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
13406 (parallel [(const_int 0)]))
13407 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13409 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13410 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13413 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13414 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13415 [(set_attr "isa" "noavx,avx")
13416 (set_attr "type" "sseiadd")
13417 (set_attr "atom_unit" "complex")
13418 (set_attr "prefix_data16" "1,*")
13419 (set_attr "prefix_extra" "1")
13420 (set_attr "prefix" "orig,vex")
13421 (set_attr "mode" "TI")])
13423 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13424 [(set (match_operand:V2SI 0 "register_operand" "=y")
13428 (match_operand:V2SI 1 "register_operand" "0")
13429 (parallel [(const_int 0)]))
13430 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13433 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13434 (parallel [(const_int 0)]))
13435 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13437 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13438 [(set_attr "type" "sseiadd")
13439 (set_attr "atom_unit" "complex")
13440 (set_attr "prefix_extra" "1")
13441 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13442 (set_attr "mode" "DI")])
13444 (define_insn "avx2_pmaddubsw256"
13445 [(set (match_operand:V16HI 0 "register_operand" "=x")
13450 (match_operand:V32QI 1 "register_operand" "x")
13451 (parallel [(const_int 0) (const_int 2)
13452 (const_int 4) (const_int 6)
13453 (const_int 8) (const_int 10)
13454 (const_int 12) (const_int 14)
13455 (const_int 16) (const_int 18)
13456 (const_int 20) (const_int 22)
13457 (const_int 24) (const_int 26)
13458 (const_int 28) (const_int 30)])))
13461 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13462 (parallel [(const_int 0) (const_int 2)
13463 (const_int 4) (const_int 6)
13464 (const_int 8) (const_int 10)
13465 (const_int 12) (const_int 14)
13466 (const_int 16) (const_int 18)
13467 (const_int 20) (const_int 22)
13468 (const_int 24) (const_int 26)
13469 (const_int 28) (const_int 30)]))))
13472 (vec_select:V16QI (match_dup 1)
13473 (parallel [(const_int 1) (const_int 3)
13474 (const_int 5) (const_int 7)
13475 (const_int 9) (const_int 11)
13476 (const_int 13) (const_int 15)
13477 (const_int 17) (const_int 19)
13478 (const_int 21) (const_int 23)
13479 (const_int 25) (const_int 27)
13480 (const_int 29) (const_int 31)])))
13482 (vec_select:V16QI (match_dup 2)
13483 (parallel [(const_int 1) (const_int 3)
13484 (const_int 5) (const_int 7)
13485 (const_int 9) (const_int 11)
13486 (const_int 13) (const_int 15)
13487 (const_int 17) (const_int 19)
13488 (const_int 21) (const_int 23)
13489 (const_int 25) (const_int 27)
13490 (const_int 29) (const_int 31)]))))))]
13492 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13493 [(set_attr "type" "sseiadd")
13494 (set_attr "prefix_extra" "1")
13495 (set_attr "prefix" "vex")
13496 (set_attr "mode" "OI")])
13498 ;; The correct representation for this is absolutely enormous, and
13499 ;; surely not generally useful.
13500 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13501 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13502 (unspec:VI2_AVX512VL
13503 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13504 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13505 UNSPEC_PMADDUBSW512))]
13507 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13508 [(set_attr "type" "sseiadd")
13509 (set_attr "prefix" "evex")
13510 (set_attr "mode" "XI")])
13512 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13513 [(set (match_operand:V32HI 0 "register_operand" "=v")
13520 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13522 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13524 (const_vector:V32HI [(const_int 1) (const_int 1)
13525 (const_int 1) (const_int 1)
13526 (const_int 1) (const_int 1)
13527 (const_int 1) (const_int 1)
13528 (const_int 1) (const_int 1)
13529 (const_int 1) (const_int 1)
13530 (const_int 1) (const_int 1)
13531 (const_int 1) (const_int 1)
13532 (const_int 1) (const_int 1)
13533 (const_int 1) (const_int 1)
13534 (const_int 1) (const_int 1)
13535 (const_int 1) (const_int 1)
13536 (const_int 1) (const_int 1)
13537 (const_int 1) (const_int 1)
13538 (const_int 1) (const_int 1)
13539 (const_int 1) (const_int 1)]))
13542 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13543 [(set_attr "type" "sseimul")
13544 (set_attr "prefix" "evex")
13545 (set_attr "mode" "XI")])
13547 (define_insn "ssse3_pmaddubsw128"
13548 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13553 (match_operand:V16QI 1 "register_operand" "0,x")
13554 (parallel [(const_int 0) (const_int 2)
13555 (const_int 4) (const_int 6)
13556 (const_int 8) (const_int 10)
13557 (const_int 12) (const_int 14)])))
13560 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13561 (parallel [(const_int 0) (const_int 2)
13562 (const_int 4) (const_int 6)
13563 (const_int 8) (const_int 10)
13564 (const_int 12) (const_int 14)]))))
13567 (vec_select:V8QI (match_dup 1)
13568 (parallel [(const_int 1) (const_int 3)
13569 (const_int 5) (const_int 7)
13570 (const_int 9) (const_int 11)
13571 (const_int 13) (const_int 15)])))
13573 (vec_select:V8QI (match_dup 2)
13574 (parallel [(const_int 1) (const_int 3)
13575 (const_int 5) (const_int 7)
13576 (const_int 9) (const_int 11)
13577 (const_int 13) (const_int 15)]))))))]
13580 pmaddubsw\t{%2, %0|%0, %2}
13581 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13582 [(set_attr "isa" "noavx,avx")
13583 (set_attr "type" "sseiadd")
13584 (set_attr "atom_unit" "simul")
13585 (set_attr "prefix_data16" "1,*")
13586 (set_attr "prefix_extra" "1")
13587 (set_attr "prefix" "orig,vex")
13588 (set_attr "mode" "TI")])
13590 (define_insn "ssse3_pmaddubsw"
13591 [(set (match_operand:V4HI 0 "register_operand" "=y")
13596 (match_operand:V8QI 1 "register_operand" "0")
13597 (parallel [(const_int 0) (const_int 2)
13598 (const_int 4) (const_int 6)])))
13601 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13602 (parallel [(const_int 0) (const_int 2)
13603 (const_int 4) (const_int 6)]))))
13606 (vec_select:V4QI (match_dup 1)
13607 (parallel [(const_int 1) (const_int 3)
13608 (const_int 5) (const_int 7)])))
13610 (vec_select:V4QI (match_dup 2)
13611 (parallel [(const_int 1) (const_int 3)
13612 (const_int 5) (const_int 7)]))))))]
13614 "pmaddubsw\t{%2, %0|%0, %2}"
13615 [(set_attr "type" "sseiadd")
13616 (set_attr "atom_unit" "simul")
13617 (set_attr "prefix_extra" "1")
13618 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13619 (set_attr "mode" "DI")])
13621 (define_mode_iterator PMULHRSW
13622 [V4HI V8HI (V16HI "TARGET_AVX2")])
13624 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13625 [(set (match_operand:PMULHRSW 0 "register_operand")
13626 (vec_merge:PMULHRSW
13628 (lshiftrt:<ssedoublemode>
13629 (plus:<ssedoublemode>
13630 (lshiftrt:<ssedoublemode>
13631 (mult:<ssedoublemode>
13632 (sign_extend:<ssedoublemode>
13633 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13634 (sign_extend:<ssedoublemode>
13635 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13639 (match_operand:PMULHRSW 3 "register_operand")
13640 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13641 "TARGET_AVX512BW && TARGET_AVX512VL"
13643 operands[5] = CONST1_RTX(<MODE>mode);
13644 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13647 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13648 [(set (match_operand:PMULHRSW 0 "register_operand")
13650 (lshiftrt:<ssedoublemode>
13651 (plus:<ssedoublemode>
13652 (lshiftrt:<ssedoublemode>
13653 (mult:<ssedoublemode>
13654 (sign_extend:<ssedoublemode>
13655 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13656 (sign_extend:<ssedoublemode>
13657 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13663 operands[3] = CONST1_RTX(<MODE>mode);
13664 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13667 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13668 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13670 (lshiftrt:<ssedoublemode>
13671 (plus:<ssedoublemode>
13672 (lshiftrt:<ssedoublemode>
13673 (mult:<ssedoublemode>
13674 (sign_extend:<ssedoublemode>
13675 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13676 (sign_extend:<ssedoublemode>
13677 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13679 (match_operand:VI2_AVX2 3 "const1_operand"))
13681 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13682 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13684 pmulhrsw\t{%2, %0|%0, %2}
13685 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13686 [(set_attr "isa" "noavx,avx")
13687 (set_attr "type" "sseimul")
13688 (set_attr "prefix_data16" "1,*")
13689 (set_attr "prefix_extra" "1")
13690 (set_attr "prefix" "orig,maybe_evex")
13691 (set_attr "mode" "<sseinsnmode>")])
13693 (define_insn "*ssse3_pmulhrswv4hi3"
13694 [(set (match_operand:V4HI 0 "register_operand" "=y")
13701 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13703 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13705 (match_operand:V4HI 3 "const1_operand"))
13707 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13708 "pmulhrsw\t{%2, %0|%0, %2}"
13709 [(set_attr "type" "sseimul")
13710 (set_attr "prefix_extra" "1")
13711 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13712 (set_attr "mode" "DI")])
13714 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13715 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13717 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13718 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
13720 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13722 pshufb\t{%2, %0|%0, %2}
13723 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13724 [(set_attr "isa" "noavx,avx")
13725 (set_attr "type" "sselog1")
13726 (set_attr "prefix_data16" "1,*")
13727 (set_attr "prefix_extra" "1")
13728 (set_attr "prefix" "orig,maybe_evex")
13729 (set_attr "btver2_decode" "vector,vector")
13730 (set_attr "mode" "<sseinsnmode>")])
13732 (define_insn "ssse3_pshufbv8qi3"
13733 [(set (match_operand:V8QI 0 "register_operand" "=y")
13734 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13735 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13738 "pshufb\t{%2, %0|%0, %2}";
13739 [(set_attr "type" "sselog1")
13740 (set_attr "prefix_extra" "1")
13741 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13742 (set_attr "mode" "DI")])
13744 (define_insn "<ssse3_avx2>_psign<mode>3"
13745 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13747 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13748 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13752 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13753 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13754 [(set_attr "isa" "noavx,avx")
13755 (set_attr "type" "sselog1")
13756 (set_attr "prefix_data16" "1,*")
13757 (set_attr "prefix_extra" "1")
13758 (set_attr "prefix" "orig,vex")
13759 (set_attr "mode" "<sseinsnmode>")])
13761 (define_insn "ssse3_psign<mode>3"
13762 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13764 [(match_operand:MMXMODEI 1 "register_operand" "0")
13765 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13768 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13769 [(set_attr "type" "sselog1")
13770 (set_attr "prefix_extra" "1")
13771 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13772 (set_attr "mode" "DI")])
13774 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
13775 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13776 (vec_merge:VI1_AVX512
13778 [(match_operand:VI1_AVX512 1 "register_operand" "v")
13779 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
13780 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13782 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
13783 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13784 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13786 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13787 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13789 [(set_attr "type" "sseishft")
13790 (set_attr "atom_unit" "sishuf")
13791 (set_attr "prefix_extra" "1")
13792 (set_attr "length_immediate" "1")
13793 (set_attr "prefix" "evex")
13794 (set_attr "mode" "<sseinsnmode>")])
13796 (define_insn "<ssse3_avx2>_palignr<mode>"
13797 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13798 (unspec:SSESCALARMODE
13799 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13800 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13801 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13805 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13807 switch (which_alternative)
13810 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13812 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13814 gcc_unreachable ();
13817 [(set_attr "isa" "noavx,avx")
13818 (set_attr "type" "sseishft")
13819 (set_attr "atom_unit" "sishuf")
13820 (set_attr "prefix_data16" "1,*")
13821 (set_attr "prefix_extra" "1")
13822 (set_attr "length_immediate" "1")
13823 (set_attr "prefix" "orig,vex")
13824 (set_attr "mode" "<sseinsnmode>")])
13826 (define_insn "ssse3_palignrdi"
13827 [(set (match_operand:DI 0 "register_operand" "=y")
13828 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13829 (match_operand:DI 2 "nonimmediate_operand" "ym")
13830 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13834 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13835 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13837 [(set_attr "type" "sseishft")
13838 (set_attr "atom_unit" "sishuf")
13839 (set_attr "prefix_extra" "1")
13840 (set_attr "length_immediate" "1")
13841 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13842 (set_attr "mode" "DI")])
13844 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13845 ;; modes for abs instruction on pre AVX-512 targets.
13846 (define_mode_iterator VI1248_AVX512VL_AVX512BW
13847 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13848 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13849 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13850 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13852 (define_insn "*abs<mode>2"
13853 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13854 (abs:VI1248_AVX512VL_AVX512BW
13855 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
13857 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
13858 [(set_attr "type" "sselog1")
13859 (set_attr "prefix_data16" "1")
13860 (set_attr "prefix_extra" "1")
13861 (set_attr "prefix" "maybe_vex")
13862 (set_attr "mode" "<sseinsnmode>")])
13864 (define_insn "abs<mode>2_mask"
13865 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13866 (vec_merge:VI48_AVX512VL
13868 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13869 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13870 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13872 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13873 [(set_attr "type" "sselog1")
13874 (set_attr "prefix" "evex")
13875 (set_attr "mode" "<sseinsnmode>")])
13877 (define_insn "abs<mode>2_mask"
13878 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13879 (vec_merge:VI12_AVX512VL
13881 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
13882 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
13883 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13885 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13886 [(set_attr "type" "sselog1")
13887 (set_attr "prefix" "evex")
13888 (set_attr "mode" "<sseinsnmode>")])
13890 (define_expand "abs<mode>2"
13891 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
13892 (abs:VI1248_AVX512VL_AVX512BW
13893 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
13898 ix86_expand_sse2_abs (operands[0], operands[1]);
13903 (define_insn "abs<mode>2"
13904 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13906 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13908 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13909 [(set_attr "type" "sselog1")
13910 (set_attr "prefix_rep" "0")
13911 (set_attr "prefix_extra" "1")
13912 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13913 (set_attr "mode" "DI")])
13915 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13917 ;; AMD SSE4A instructions
13919 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13921 (define_insn "sse4a_movnt<mode>"
13922 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13924 [(match_operand:MODEF 1 "register_operand" "x")]
13927 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13928 [(set_attr "type" "ssemov")
13929 (set_attr "mode" "<MODE>")])
13931 (define_insn "sse4a_vmmovnt<mode>"
13932 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13933 (unspec:<ssescalarmode>
13934 [(vec_select:<ssescalarmode>
13935 (match_operand:VF_128 1 "register_operand" "x")
13936 (parallel [(const_int 0)]))]
13939 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13940 [(set_attr "type" "ssemov")
13941 (set_attr "mode" "<ssescalarmode>")])
13943 (define_insn "sse4a_extrqi"
13944 [(set (match_operand:V2DI 0 "register_operand" "=x")
13945 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13946 (match_operand 2 "const_0_to_255_operand")
13947 (match_operand 3 "const_0_to_255_operand")]
13950 "extrq\t{%3, %2, %0|%0, %2, %3}"
13951 [(set_attr "type" "sse")
13952 (set_attr "prefix_data16" "1")
13953 (set_attr "length_immediate" "2")
13954 (set_attr "mode" "TI")])
13956 (define_insn "sse4a_extrq"
13957 [(set (match_operand:V2DI 0 "register_operand" "=x")
13958 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13959 (match_operand:V16QI 2 "register_operand" "x")]
13962 "extrq\t{%2, %0|%0, %2}"
13963 [(set_attr "type" "sse")
13964 (set_attr "prefix_data16" "1")
13965 (set_attr "mode" "TI")])
13967 (define_insn "sse4a_insertqi"
13968 [(set (match_operand:V2DI 0 "register_operand" "=x")
13969 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13970 (match_operand:V2DI 2 "register_operand" "x")
13971 (match_operand 3 "const_0_to_255_operand")
13972 (match_operand 4 "const_0_to_255_operand")]
13975 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
13976 [(set_attr "type" "sseins")
13977 (set_attr "prefix_data16" "0")
13978 (set_attr "prefix_rep" "1")
13979 (set_attr "length_immediate" "2")
13980 (set_attr "mode" "TI")])
13982 (define_insn "sse4a_insertq"
13983 [(set (match_operand:V2DI 0 "register_operand" "=x")
13984 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13985 (match_operand:V2DI 2 "register_operand" "x")]
13988 "insertq\t{%2, %0|%0, %2}"
13989 [(set_attr "type" "sseins")
13990 (set_attr "prefix_data16" "0")
13991 (set_attr "prefix_rep" "1")
13992 (set_attr "mode" "TI")])
13994 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13996 ;; Intel SSE4.1 instructions
13998 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14000 ;; Mapping of immediate bits for blend instructions
14001 (define_mode_attr blendbits
14002 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14004 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14005 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14006 (vec_merge:VF_128_256
14007 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14008 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14009 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14012 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14013 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14014 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14015 [(set_attr "isa" "noavx,noavx,avx")
14016 (set_attr "type" "ssemov")
14017 (set_attr "length_immediate" "1")
14018 (set_attr "prefix_data16" "1,1,*")
14019 (set_attr "prefix_extra" "1")
14020 (set_attr "prefix" "orig,orig,vex")
14021 (set_attr "mode" "<MODE>")])
14023 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14024 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14026 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14027 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14028 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14032 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14033 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14034 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14035 [(set_attr "isa" "noavx,noavx,avx")
14036 (set_attr "type" "ssemov")
14037 (set_attr "length_immediate" "1")
14038 (set_attr "prefix_data16" "1,1,*")
14039 (set_attr "prefix_extra" "1")
14040 (set_attr "prefix" "orig,orig,vex")
14041 (set_attr "btver2_decode" "vector,vector,vector")
14042 (set_attr "mode" "<MODE>")])
14044 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14045 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14047 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14048 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14049 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14053 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14054 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14055 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14056 [(set_attr "isa" "noavx,noavx,avx")
14057 (set_attr "type" "ssemul")
14058 (set_attr "length_immediate" "1")
14059 (set_attr "prefix_data16" "1,1,*")
14060 (set_attr "prefix_extra" "1")
14061 (set_attr "prefix" "orig,orig,vex")
14062 (set_attr "btver2_decode" "vector,vector,vector")
14063 (set_attr "mode" "<MODE>")])
14065 ;; Mode attribute used by `vmovntdqa' pattern
14066 (define_mode_attr vi8_sse4_1_avx2_avx512
14067 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14069 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14070 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14071 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14074 "%vmovntdqa\t{%1, %0|%0, %1}"
14075 [(set_attr "type" "ssemov")
14076 (set_attr "prefix_extra" "1,1,*")
14077 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14078 (set_attr "mode" "<sseinsnmode>")])
14080 (define_insn "<sse4_1_avx2>_mpsadbw"
14081 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14083 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14084 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14085 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14089 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14090 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14091 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14092 [(set_attr "isa" "noavx,noavx,avx")
14093 (set_attr "type" "sselog1")
14094 (set_attr "length_immediate" "1")
14095 (set_attr "prefix_extra" "1")
14096 (set_attr "prefix" "orig,orig,vex")
14097 (set_attr "btver2_decode" "vector,vector,vector")
14098 (set_attr "mode" "<sseinsnmode>")])
14100 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14101 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14102 (vec_concat:VI2_AVX2
14103 (us_truncate:<ssehalfvecmode>
14104 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14105 (us_truncate:<ssehalfvecmode>
14106 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
14107 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14109 packusdw\t{%2, %0|%0, %2}
14110 packusdw\t{%2, %0|%0, %2}
14111 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14112 [(set_attr "isa" "noavx,noavx,avx")
14113 (set_attr "type" "sselog")
14114 (set_attr "prefix_extra" "1")
14115 (set_attr "prefix" "orig,orig,maybe_evex")
14116 (set_attr "mode" "<sseinsnmode>")])
14118 (define_insn "<sse4_1_avx2>_pblendvb"
14119 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14121 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14122 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14123 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14127 pblendvb\t{%3, %2, %0|%0, %2, %3}
14128 pblendvb\t{%3, %2, %0|%0, %2, %3}
14129 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14130 [(set_attr "isa" "noavx,noavx,avx")
14131 (set_attr "type" "ssemov")
14132 (set_attr "prefix_extra" "1")
14133 (set_attr "length_immediate" "*,*,1")
14134 (set_attr "prefix" "orig,orig,vex")
14135 (set_attr "btver2_decode" "vector,vector,vector")
14136 (set_attr "mode" "<sseinsnmode>")])
14138 (define_insn "sse4_1_pblendw"
14139 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14141 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14142 (match_operand:V8HI 1 "register_operand" "0,0,x")
14143 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14146 pblendw\t{%3, %2, %0|%0, %2, %3}
14147 pblendw\t{%3, %2, %0|%0, %2, %3}
14148 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14149 [(set_attr "isa" "noavx,noavx,avx")
14150 (set_attr "type" "ssemov")
14151 (set_attr "prefix_extra" "1")
14152 (set_attr "length_immediate" "1")
14153 (set_attr "prefix" "orig,orig,vex")
14154 (set_attr "mode" "TI")])
14156 ;; The builtin uses an 8-bit immediate. Expand that.
14157 (define_expand "avx2_pblendw"
14158 [(set (match_operand:V16HI 0 "register_operand")
14160 (match_operand:V16HI 2 "nonimmediate_operand")
14161 (match_operand:V16HI 1 "register_operand")
14162 (match_operand:SI 3 "const_0_to_255_operand")))]
14165 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14166 operands[3] = GEN_INT (val << 8 | val);
14169 (define_insn "*avx2_pblendw"
14170 [(set (match_operand:V16HI 0 "register_operand" "=x")
14172 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14173 (match_operand:V16HI 1 "register_operand" "x")
14174 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14177 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14178 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14180 [(set_attr "type" "ssemov")
14181 (set_attr "prefix_extra" "1")
14182 (set_attr "length_immediate" "1")
14183 (set_attr "prefix" "vex")
14184 (set_attr "mode" "OI")])
14186 (define_insn "avx2_pblendd<mode>"
14187 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14188 (vec_merge:VI4_AVX2
14189 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14190 (match_operand:VI4_AVX2 1 "register_operand" "x")
14191 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14193 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14194 [(set_attr "type" "ssemov")
14195 (set_attr "prefix_extra" "1")
14196 (set_attr "length_immediate" "1")
14197 (set_attr "prefix" "vex")
14198 (set_attr "mode" "<sseinsnmode>")])
14200 (define_insn "sse4_1_phminposuw"
14201 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14202 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
14203 UNSPEC_PHMINPOSUW))]
14205 "%vphminposuw\t{%1, %0|%0, %1}"
14206 [(set_attr "type" "sselog1")
14207 (set_attr "prefix_extra" "1")
14208 (set_attr "prefix" "maybe_vex")
14209 (set_attr "mode" "TI")])
14211 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14212 [(set (match_operand:V16HI 0 "register_operand" "=v")
14214 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14215 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14216 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14217 [(set_attr "type" "ssemov")
14218 (set_attr "prefix_extra" "1")
14219 (set_attr "prefix" "maybe_evex")
14220 (set_attr "mode" "OI")])
14222 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14223 [(set (match_operand:V32HI 0 "register_operand" "=v")
14225 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14227 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14228 [(set_attr "type" "ssemov")
14229 (set_attr "prefix_extra" "1")
14230 (set_attr "prefix" "evex")
14231 (set_attr "mode" "XI")])
14233 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14234 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14237 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14238 (parallel [(const_int 0) (const_int 1)
14239 (const_int 2) (const_int 3)
14240 (const_int 4) (const_int 5)
14241 (const_int 6) (const_int 7)]))))]
14242 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14243 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14244 [(set_attr "type" "ssemov")
14245 (set_attr "ssememalign" "64")
14246 (set_attr "prefix_extra" "1")
14247 (set_attr "prefix" "maybe_vex")
14248 (set_attr "mode" "TI")])
14250 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14251 [(set (match_operand:V16SI 0 "register_operand" "=v")
14253 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14255 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14256 [(set_attr "type" "ssemov")
14257 (set_attr "prefix" "evex")
14258 (set_attr "mode" "XI")])
14260 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14261 [(set (match_operand:V8SI 0 "register_operand" "=v")
14264 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14265 (parallel [(const_int 0) (const_int 1)
14266 (const_int 2) (const_int 3)
14267 (const_int 4) (const_int 5)
14268 (const_int 6) (const_int 7)]))))]
14269 "TARGET_AVX2 && <mask_avx512vl_condition>"
14270 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14271 [(set_attr "type" "ssemov")
14272 (set_attr "prefix_extra" "1")
14273 (set_attr "prefix" "maybe_evex")
14274 (set_attr "mode" "OI")])
14276 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14277 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14280 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14281 (parallel [(const_int 0) (const_int 1)
14282 (const_int 2) (const_int 3)]))))]
14283 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14284 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14285 [(set_attr "type" "ssemov")
14286 (set_attr "ssememalign" "32")
14287 (set_attr "prefix_extra" "1")
14288 (set_attr "prefix" "maybe_vex")
14289 (set_attr "mode" "TI")])
14291 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14292 [(set (match_operand:V16SI 0 "register_operand" "=v")
14294 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14296 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14297 [(set_attr "type" "ssemov")
14298 (set_attr "prefix" "evex")
14299 (set_attr "mode" "XI")])
14301 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14302 [(set (match_operand:V8SI 0 "register_operand" "=v")
14304 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14305 "TARGET_AVX2 && <mask_avx512vl_condition>"
14306 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14307 [(set_attr "type" "ssemov")
14308 (set_attr "prefix_extra" "1")
14309 (set_attr "prefix" "maybe_evex")
14310 (set_attr "mode" "OI")])
14312 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14313 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14316 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14317 (parallel [(const_int 0) (const_int 1)
14318 (const_int 2) (const_int 3)]))))]
14319 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14320 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14321 [(set_attr "type" "ssemov")
14322 (set_attr "ssememalign" "64")
14323 (set_attr "prefix_extra" "1")
14324 (set_attr "prefix" "maybe_vex")
14325 (set_attr "mode" "TI")])
14327 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14328 [(set (match_operand:V8DI 0 "register_operand" "=v")
14331 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14332 (parallel [(const_int 0) (const_int 1)
14333 (const_int 2) (const_int 3)
14334 (const_int 4) (const_int 5)
14335 (const_int 6) (const_int 7)]))))]
14337 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14338 [(set_attr "type" "ssemov")
14339 (set_attr "prefix" "evex")
14340 (set_attr "mode" "XI")])
14342 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14343 [(set (match_operand:V4DI 0 "register_operand" "=v")
14346 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14347 (parallel [(const_int 0) (const_int 1)
14348 (const_int 2) (const_int 3)]))))]
14349 "TARGET_AVX2 && <mask_avx512vl_condition>"
14350 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14351 [(set_attr "type" "ssemov")
14352 (set_attr "prefix_extra" "1")
14353 (set_attr "prefix" "maybe_evex")
14354 (set_attr "mode" "OI")])
14356 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14357 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14360 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14361 (parallel [(const_int 0) (const_int 1)]))))]
14362 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14363 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14364 [(set_attr "type" "ssemov")
14365 (set_attr "ssememalign" "16")
14366 (set_attr "prefix_extra" "1")
14367 (set_attr "prefix" "maybe_vex")
14368 (set_attr "mode" "TI")])
14370 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14371 [(set (match_operand:V8DI 0 "register_operand" "=v")
14373 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14375 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14376 [(set_attr "type" "ssemov")
14377 (set_attr "prefix" "evex")
14378 (set_attr "mode" "XI")])
14380 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14381 [(set (match_operand:V4DI 0 "register_operand" "=v")
14384 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14385 (parallel [(const_int 0) (const_int 1)
14386 (const_int 2) (const_int 3)]))))]
14387 "TARGET_AVX2 && <mask_avx512vl_condition>"
14388 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14389 [(set_attr "type" "ssemov")
14390 (set_attr "prefix_extra" "1")
14391 (set_attr "prefix" "maybe_evex")
14392 (set_attr "mode" "OI")])
14394 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14395 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14398 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14399 (parallel [(const_int 0) (const_int 1)]))))]
14400 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14401 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14402 [(set_attr "type" "ssemov")
14403 (set_attr "ssememalign" "32")
14404 (set_attr "prefix_extra" "1")
14405 (set_attr "prefix" "maybe_vex")
14406 (set_attr "mode" "TI")])
14408 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14409 [(set (match_operand:V8DI 0 "register_operand" "=v")
14411 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14413 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14414 [(set_attr "type" "ssemov")
14415 (set_attr "prefix" "evex")
14416 (set_attr "mode" "XI")])
14418 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14419 [(set (match_operand:V4DI 0 "register_operand" "=v")
14421 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14422 "TARGET_AVX2 && <mask_avx512vl_condition>"
14423 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14424 [(set_attr "type" "ssemov")
14425 (set_attr "prefix" "maybe_evex")
14426 (set_attr "prefix_extra" "1")
14427 (set_attr "mode" "OI")])
14429 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14430 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14433 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14434 (parallel [(const_int 0) (const_int 1)]))))]
14435 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14436 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14437 [(set_attr "type" "ssemov")
14438 (set_attr "ssememalign" "64")
14439 (set_attr "prefix_extra" "1")
14440 (set_attr "prefix" "maybe_vex")
14441 (set_attr "mode" "TI")])
14443 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14444 ;; setting FLAGS_REG. But it is not a really compare instruction.
14445 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14446 [(set (reg:CC FLAGS_REG)
14447 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14448 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14451 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14452 [(set_attr "type" "ssecomi")
14453 (set_attr "prefix_extra" "1")
14454 (set_attr "prefix" "vex")
14455 (set_attr "mode" "<MODE>")])
14457 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14458 ;; But it is not a really compare instruction.
14459 (define_insn "avx_ptest256"
14460 [(set (reg:CC FLAGS_REG)
14461 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14462 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14465 "vptest\t{%1, %0|%0, %1}"
14466 [(set_attr "type" "ssecomi")
14467 (set_attr "prefix_extra" "1")
14468 (set_attr "prefix" "vex")
14469 (set_attr "btver2_decode" "vector")
14470 (set_attr "mode" "OI")])
14472 (define_insn "sse4_1_ptest"
14473 [(set (reg:CC FLAGS_REG)
14474 (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
14475 (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
14478 "%vptest\t{%1, %0|%0, %1}"
14479 [(set_attr "type" "ssecomi")
14480 (set_attr "prefix_extra" "1")
14481 (set_attr "prefix" "maybe_vex")
14482 (set_attr "mode" "TI")])
14484 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14485 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14487 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
14488 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14491 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14492 [(set_attr "type" "ssecvt")
14493 (set (attr "prefix_data16")
14495 (match_test "TARGET_AVX")
14497 (const_string "1")))
14498 (set_attr "prefix_extra" "1")
14499 (set_attr "length_immediate" "1")
14500 (set_attr "prefix" "maybe_vex")
14501 (set_attr "mode" "<MODE>")])
14503 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14504 [(match_operand:<sseintvecmode> 0 "register_operand")
14505 (match_operand:VF1_128_256 1 "nonimmediate_operand")
14506 (match_operand:SI 2 "const_0_to_15_operand")]
14509 rtx tmp = gen_reg_rtx (<MODE>mode);
14512 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14515 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14519 (define_expand "avx512f_roundpd512"
14520 [(match_operand:V8DF 0 "register_operand")
14521 (match_operand:V8DF 1 "nonimmediate_operand")
14522 (match_operand:SI 2 "const_0_to_15_operand")]
14525 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14529 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14530 [(match_operand:<ssepackfltmode> 0 "register_operand")
14531 (match_operand:VF2 1 "nonimmediate_operand")
14532 (match_operand:VF2 2 "nonimmediate_operand")
14533 (match_operand:SI 3 "const_0_to_15_operand")]
14538 if (<MODE>mode == V2DFmode
14539 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14541 rtx tmp2 = gen_reg_rtx (V4DFmode);
14543 tmp0 = gen_reg_rtx (V4DFmode);
14544 tmp1 = force_reg (V2DFmode, operands[1]);
14546 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14547 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14548 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14552 tmp0 = gen_reg_rtx (<MODE>mode);
14553 tmp1 = gen_reg_rtx (<MODE>mode);
14556 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14559 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14562 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14567 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14568 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
14571 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
14572 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
14574 (match_operand:VF_128 1 "register_operand" "0,0,x")
14578 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14579 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14580 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14581 [(set_attr "isa" "noavx,noavx,avx")
14582 (set_attr "type" "ssecvt")
14583 (set_attr "length_immediate" "1")
14584 (set_attr "prefix_data16" "1,1,*")
14585 (set_attr "prefix_extra" "1")
14586 (set_attr "prefix" "orig,orig,vex")
14587 (set_attr "mode" "<MODE>")])
14589 (define_expand "round<mode>2"
14590 [(set (match_dup 4)
14592 (match_operand:VF 1 "register_operand")
14594 (set (match_operand:VF 0 "register_operand")
14596 [(match_dup 4) (match_dup 5)]
14598 "TARGET_ROUND && !flag_trapping_math"
14600 machine_mode scalar_mode;
14601 const struct real_format *fmt;
14602 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14603 rtx half, vec_half;
14605 scalar_mode = GET_MODE_INNER (<MODE>mode);
14607 /* load nextafter (0.5, 0.0) */
14608 fmt = REAL_MODE_FORMAT (scalar_mode);
14609 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14610 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14611 half = const_double_from_real_value (pred_half, scalar_mode);
14613 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14614 vec_half = force_reg (<MODE>mode, vec_half);
14616 operands[3] = gen_reg_rtx (<MODE>mode);
14617 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14619 operands[4] = gen_reg_rtx (<MODE>mode);
14620 operands[5] = GEN_INT (ROUND_TRUNC);
14623 (define_expand "round<mode>2_sfix"
14624 [(match_operand:<sseintvecmode> 0 "register_operand")
14625 (match_operand:VF1_128_256 1 "register_operand")]
14626 "TARGET_ROUND && !flag_trapping_math"
14628 rtx tmp = gen_reg_rtx (<MODE>mode);
14630 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14633 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14637 (define_expand "round<mode>2_vec_pack_sfix"
14638 [(match_operand:<ssepackfltmode> 0 "register_operand")
14639 (match_operand:VF2 1 "register_operand")
14640 (match_operand:VF2 2 "register_operand")]
14641 "TARGET_ROUND && !flag_trapping_math"
14645 if (<MODE>mode == V2DFmode
14646 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14648 rtx tmp2 = gen_reg_rtx (V4DFmode);
14650 tmp0 = gen_reg_rtx (V4DFmode);
14651 tmp1 = force_reg (V2DFmode, operands[1]);
14653 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14654 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14655 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14659 tmp0 = gen_reg_rtx (<MODE>mode);
14660 tmp1 = gen_reg_rtx (<MODE>mode);
14662 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14663 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14666 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14671 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14673 ;; Intel SSE4.2 string/text processing instructions
14675 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14677 (define_insn_and_split "sse4_2_pcmpestr"
14678 [(set (match_operand:SI 0 "register_operand" "=c,c")
14680 [(match_operand:V16QI 2 "register_operand" "x,x")
14681 (match_operand:SI 3 "register_operand" "a,a")
14682 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14683 (match_operand:SI 5 "register_operand" "d,d")
14684 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14686 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14694 (set (reg:CC FLAGS_REG)
14703 && can_create_pseudo_p ()"
14708 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14709 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14710 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14713 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14714 operands[3], operands[4],
14715 operands[5], operands[6]));
14717 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14718 operands[3], operands[4],
14719 operands[5], operands[6]));
14720 if (flags && !(ecx || xmm0))
14721 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14722 operands[2], operands[3],
14723 operands[4], operands[5],
14725 if (!(flags || ecx || xmm0))
14726 emit_note (NOTE_INSN_DELETED);
14730 [(set_attr "type" "sselog")
14731 (set_attr "prefix_data16" "1")
14732 (set_attr "prefix_extra" "1")
14733 (set_attr "ssememalign" "8")
14734 (set_attr "length_immediate" "1")
14735 (set_attr "memory" "none,load")
14736 (set_attr "mode" "TI")])
14738 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14739 [(set (match_operand:SI 0 "register_operand" "=c")
14741 [(match_operand:V16QI 2 "register_operand" "x")
14742 (match_operand:SI 3 "register_operand" "a")
14744 [(match_operand:V16QI 4 "memory_operand" "m")]
14746 (match_operand:SI 5 "register_operand" "d")
14747 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14749 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14753 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14757 (set (reg:CC FLAGS_REG)
14761 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14766 && can_create_pseudo_p ()"
14771 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14772 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14773 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14776 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14777 operands[3], operands[4],
14778 operands[5], operands[6]));
14780 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14781 operands[3], operands[4],
14782 operands[5], operands[6]));
14783 if (flags && !(ecx || xmm0))
14784 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14785 operands[2], operands[3],
14786 operands[4], operands[5],
14788 if (!(flags || ecx || xmm0))
14789 emit_note (NOTE_INSN_DELETED);
14793 [(set_attr "type" "sselog")
14794 (set_attr "prefix_data16" "1")
14795 (set_attr "prefix_extra" "1")
14796 (set_attr "ssememalign" "8")
14797 (set_attr "length_immediate" "1")
14798 (set_attr "memory" "load")
14799 (set_attr "mode" "TI")])
14801 (define_insn "sse4_2_pcmpestri"
14802 [(set (match_operand:SI 0 "register_operand" "=c,c")
14804 [(match_operand:V16QI 1 "register_operand" "x,x")
14805 (match_operand:SI 2 "register_operand" "a,a")
14806 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14807 (match_operand:SI 4 "register_operand" "d,d")
14808 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14810 (set (reg:CC FLAGS_REG)
14819 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14820 [(set_attr "type" "sselog")
14821 (set_attr "prefix_data16" "1")
14822 (set_attr "prefix_extra" "1")
14823 (set_attr "prefix" "maybe_vex")
14824 (set_attr "ssememalign" "8")
14825 (set_attr "length_immediate" "1")
14826 (set_attr "btver2_decode" "vector")
14827 (set_attr "memory" "none,load")
14828 (set_attr "mode" "TI")])
14830 (define_insn "sse4_2_pcmpestrm"
14831 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14833 [(match_operand:V16QI 1 "register_operand" "x,x")
14834 (match_operand:SI 2 "register_operand" "a,a")
14835 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14836 (match_operand:SI 4 "register_operand" "d,d")
14837 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14839 (set (reg:CC FLAGS_REG)
14848 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14849 [(set_attr "type" "sselog")
14850 (set_attr "prefix_data16" "1")
14851 (set_attr "prefix_extra" "1")
14852 (set_attr "ssememalign" "8")
14853 (set_attr "length_immediate" "1")
14854 (set_attr "prefix" "maybe_vex")
14855 (set_attr "btver2_decode" "vector")
14856 (set_attr "memory" "none,load")
14857 (set_attr "mode" "TI")])
14859 (define_insn "sse4_2_pcmpestr_cconly"
14860 [(set (reg:CC FLAGS_REG)
14862 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14863 (match_operand:SI 3 "register_operand" "a,a,a,a")
14864 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14865 (match_operand:SI 5 "register_operand" "d,d,d,d")
14866 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14868 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14869 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14872 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14873 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14874 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14875 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14876 [(set_attr "type" "sselog")
14877 (set_attr "prefix_data16" "1")
14878 (set_attr "prefix_extra" "1")
14879 (set_attr "ssememalign" "8")
14880 (set_attr "length_immediate" "1")
14881 (set_attr "memory" "none,load,none,load")
14882 (set_attr "btver2_decode" "vector,vector,vector,vector")
14883 (set_attr "prefix" "maybe_vex")
14884 (set_attr "mode" "TI")])
14886 (define_insn_and_split "sse4_2_pcmpistr"
14887 [(set (match_operand:SI 0 "register_operand" "=c,c")
14889 [(match_operand:V16QI 2 "register_operand" "x,x")
14890 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14891 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14893 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14899 (set (reg:CC FLAGS_REG)
14906 && can_create_pseudo_p ()"
14911 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14912 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14913 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14916 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14917 operands[3], operands[4]));
14919 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14920 operands[3], operands[4]));
14921 if (flags && !(ecx || xmm0))
14922 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14923 operands[2], operands[3],
14925 if (!(flags || ecx || xmm0))
14926 emit_note (NOTE_INSN_DELETED);
14930 [(set_attr "type" "sselog")
14931 (set_attr "prefix_data16" "1")
14932 (set_attr "prefix_extra" "1")
14933 (set_attr "ssememalign" "8")
14934 (set_attr "length_immediate" "1")
14935 (set_attr "memory" "none,load")
14936 (set_attr "mode" "TI")])
14938 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14939 [(set (match_operand:SI 0 "register_operand" "=c")
14941 [(match_operand:V16QI 2 "register_operand" "x")
14943 [(match_operand:V16QI 3 "memory_operand" "m")]
14945 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14947 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14950 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14953 (set (reg:CC FLAGS_REG)
14956 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14960 && can_create_pseudo_p ()"
14965 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14966 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14967 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14970 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14971 operands[3], operands[4]));
14973 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14974 operands[3], operands[4]));
14975 if (flags && !(ecx || xmm0))
14976 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14977 operands[2], operands[3],
14979 if (!(flags || ecx || xmm0))
14980 emit_note (NOTE_INSN_DELETED);
14984 [(set_attr "type" "sselog")
14985 (set_attr "prefix_data16" "1")
14986 (set_attr "prefix_extra" "1")
14987 (set_attr "ssememalign" "8")
14988 (set_attr "length_immediate" "1")
14989 (set_attr "memory" "load")
14990 (set_attr "mode" "TI")])
14992 (define_insn "sse4_2_pcmpistri"
14993 [(set (match_operand:SI 0 "register_operand" "=c,c")
14995 [(match_operand:V16QI 1 "register_operand" "x,x")
14996 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14997 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14999 (set (reg:CC FLAGS_REG)
15006 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15007 [(set_attr "type" "sselog")
15008 (set_attr "prefix_data16" "1")
15009 (set_attr "prefix_extra" "1")
15010 (set_attr "ssememalign" "8")
15011 (set_attr "length_immediate" "1")
15012 (set_attr "prefix" "maybe_vex")
15013 (set_attr "memory" "none,load")
15014 (set_attr "btver2_decode" "vector")
15015 (set_attr "mode" "TI")])
15017 (define_insn "sse4_2_pcmpistrm"
15018 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15020 [(match_operand:V16QI 1 "register_operand" "x,x")
15021 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15022 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15024 (set (reg:CC FLAGS_REG)
15031 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15032 [(set_attr "type" "sselog")
15033 (set_attr "prefix_data16" "1")
15034 (set_attr "prefix_extra" "1")
15035 (set_attr "ssememalign" "8")
15036 (set_attr "length_immediate" "1")
15037 (set_attr "prefix" "maybe_vex")
15038 (set_attr "memory" "none,load")
15039 (set_attr "btver2_decode" "vector")
15040 (set_attr "mode" "TI")])
15042 (define_insn "sse4_2_pcmpistr_cconly"
15043 [(set (reg:CC FLAGS_REG)
15045 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15046 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15047 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15049 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15050 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15053 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15054 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15055 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15056 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15057 [(set_attr "type" "sselog")
15058 (set_attr "prefix_data16" "1")
15059 (set_attr "prefix_extra" "1")
15060 (set_attr "ssememalign" "8")
15061 (set_attr "length_immediate" "1")
15062 (set_attr "memory" "none,load,none,load")
15063 (set_attr "prefix" "maybe_vex")
15064 (set_attr "btver2_decode" "vector,vector,vector,vector")
15065 (set_attr "mode" "TI")])
15067 ;; Packed float variants
15068 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15069 [(V8DI "V8SF") (V16SI "V16SF")])
15071 (define_expand "avx512pf_gatherpf<mode>sf"
15073 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15074 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15076 [(match_operand 2 "vsib_address_operand")
15077 (match_operand:VI48_512 1 "register_operand")
15078 (match_operand:SI 3 "const1248_operand")]))
15079 (match_operand:SI 4 "const_2_to_3_operand")]
15080 UNSPEC_GATHER_PREFETCH)]
15084 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15085 operands[3]), UNSPEC_VSIBADDR);
15088 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15090 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15091 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15093 [(match_operand:P 2 "vsib_address_operand" "Tv")
15094 (match_operand:VI48_512 1 "register_operand" "v")
15095 (match_operand:SI 3 "const1248_operand" "n")]
15097 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15098 UNSPEC_GATHER_PREFETCH)]
15101 switch (INTVAL (operands[4]))
15104 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15106 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15108 gcc_unreachable ();
15111 [(set_attr "type" "sse")
15112 (set_attr "prefix" "evex")
15113 (set_attr "mode" "XI")])
15115 (define_insn "*avx512pf_gatherpf<mode>sf"
15118 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15120 [(match_operand:P 1 "vsib_address_operand" "Tv")
15121 (match_operand:VI48_512 0 "register_operand" "v")
15122 (match_operand:SI 2 "const1248_operand" "n")]
15124 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15125 UNSPEC_GATHER_PREFETCH)]
15128 switch (INTVAL (operands[3]))
15131 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15133 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15135 gcc_unreachable ();
15138 [(set_attr "type" "sse")
15139 (set_attr "prefix" "evex")
15140 (set_attr "mode" "XI")])
15142 ;; Packed double variants
15143 (define_expand "avx512pf_gatherpf<mode>df"
15145 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15148 [(match_operand 2 "vsib_address_operand")
15149 (match_operand:VI4_256_8_512 1 "register_operand")
15150 (match_operand:SI 3 "const1248_operand")]))
15151 (match_operand:SI 4 "const_2_to_3_operand")]
15152 UNSPEC_GATHER_PREFETCH)]
15156 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15157 operands[3]), UNSPEC_VSIBADDR);
15160 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15162 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15163 (match_operator:V8DF 5 "vsib_mem_operator"
15165 [(match_operand:P 2 "vsib_address_operand" "Tv")
15166 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15167 (match_operand:SI 3 "const1248_operand" "n")]
15169 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15170 UNSPEC_GATHER_PREFETCH)]
15173 switch (INTVAL (operands[4]))
15176 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15178 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15180 gcc_unreachable ();
15183 [(set_attr "type" "sse")
15184 (set_attr "prefix" "evex")
15185 (set_attr "mode" "XI")])
15187 (define_insn "*avx512pf_gatherpf<mode>df"
15190 (match_operator:V8DF 4 "vsib_mem_operator"
15192 [(match_operand:P 1 "vsib_address_operand" "Tv")
15193 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15194 (match_operand:SI 2 "const1248_operand" "n")]
15196 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15197 UNSPEC_GATHER_PREFETCH)]
15200 switch (INTVAL (operands[3]))
15203 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15205 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15207 gcc_unreachable ();
15210 [(set_attr "type" "sse")
15211 (set_attr "prefix" "evex")
15212 (set_attr "mode" "XI")])
15214 ;; Packed float variants
15215 (define_expand "avx512pf_scatterpf<mode>sf"
15217 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15218 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15220 [(match_operand 2 "vsib_address_operand")
15221 (match_operand:VI48_512 1 "register_operand")
15222 (match_operand:SI 3 "const1248_operand")]))
15223 (match_operand:SI 4 "const2367_operand")]
15224 UNSPEC_SCATTER_PREFETCH)]
15228 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15229 operands[3]), UNSPEC_VSIBADDR);
15232 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15234 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15235 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15237 [(match_operand:P 2 "vsib_address_operand" "Tv")
15238 (match_operand:VI48_512 1 "register_operand" "v")
15239 (match_operand:SI 3 "const1248_operand" "n")]
15241 (match_operand:SI 4 "const2367_operand" "n")]
15242 UNSPEC_SCATTER_PREFETCH)]
15245 switch (INTVAL (operands[4]))
15249 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15252 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15254 gcc_unreachable ();
15257 [(set_attr "type" "sse")
15258 (set_attr "prefix" "evex")
15259 (set_attr "mode" "XI")])
15261 (define_insn "*avx512pf_scatterpf<mode>sf"
15264 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15266 [(match_operand:P 1 "vsib_address_operand" "Tv")
15267 (match_operand:VI48_512 0 "register_operand" "v")
15268 (match_operand:SI 2 "const1248_operand" "n")]
15270 (match_operand:SI 3 "const2367_operand" "n")]
15271 UNSPEC_SCATTER_PREFETCH)]
15274 switch (INTVAL (operands[3]))
15278 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15281 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15283 gcc_unreachable ();
15286 [(set_attr "type" "sse")
15287 (set_attr "prefix" "evex")
15288 (set_attr "mode" "XI")])
15290 ;; Packed double variants
15291 (define_expand "avx512pf_scatterpf<mode>df"
15293 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15296 [(match_operand 2 "vsib_address_operand")
15297 (match_operand:VI4_256_8_512 1 "register_operand")
15298 (match_operand:SI 3 "const1248_operand")]))
15299 (match_operand:SI 4 "const2367_operand")]
15300 UNSPEC_SCATTER_PREFETCH)]
15304 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15305 operands[3]), UNSPEC_VSIBADDR);
15308 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15310 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15311 (match_operator:V8DF 5 "vsib_mem_operator"
15313 [(match_operand:P 2 "vsib_address_operand" "Tv")
15314 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15315 (match_operand:SI 3 "const1248_operand" "n")]
15317 (match_operand:SI 4 "const2367_operand" "n")]
15318 UNSPEC_SCATTER_PREFETCH)]
15321 switch (INTVAL (operands[4]))
15325 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15328 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15330 gcc_unreachable ();
15333 [(set_attr "type" "sse")
15334 (set_attr "prefix" "evex")
15335 (set_attr "mode" "XI")])
15337 (define_insn "*avx512pf_scatterpf<mode>df"
15340 (match_operator:V8DF 4 "vsib_mem_operator"
15342 [(match_operand:P 1 "vsib_address_operand" "Tv")
15343 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15344 (match_operand:SI 2 "const1248_operand" "n")]
15346 (match_operand:SI 3 "const2367_operand" "n")]
15347 UNSPEC_SCATTER_PREFETCH)]
15350 switch (INTVAL (operands[3]))
15354 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15357 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15359 gcc_unreachable ();
15362 [(set_attr "type" "sse")
15363 (set_attr "prefix" "evex")
15364 (set_attr "mode" "XI")])
15366 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15367 [(set (match_operand:VF_512 0 "register_operand" "=v")
15369 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15372 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15373 [(set_attr "prefix" "evex")
15374 (set_attr "type" "sse")
15375 (set_attr "mode" "<MODE>")])
15377 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15378 [(set (match_operand:VF_512 0 "register_operand" "=v")
15380 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15383 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15384 [(set_attr "prefix" "evex")
15385 (set_attr "type" "sse")
15386 (set_attr "mode" "<MODE>")])
15388 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15389 [(set (match_operand:VF_128 0 "register_operand" "=v")
15392 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15394 (match_operand:VF_128 2 "register_operand" "v")
15397 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15398 [(set_attr "length_immediate" "1")
15399 (set_attr "prefix" "evex")
15400 (set_attr "type" "sse")
15401 (set_attr "mode" "<MODE>")])
15403 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15404 [(set (match_operand:VF_512 0 "register_operand" "=v")
15406 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15409 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15410 [(set_attr "prefix" "evex")
15411 (set_attr "type" "sse")
15412 (set_attr "mode" "<MODE>")])
15414 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15415 [(set (match_operand:VF_128 0 "register_operand" "=v")
15418 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15420 (match_operand:VF_128 2 "register_operand" "v")
15423 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15424 [(set_attr "length_immediate" "1")
15425 (set_attr "type" "sse")
15426 (set_attr "prefix" "evex")
15427 (set_attr "mode" "<MODE>")])
15429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15431 ;; XOP instructions
15433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15435 (define_code_iterator xop_plus [plus ss_plus])
15437 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15438 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15440 ;; XOP parallel integer multiply/add instructions.
15442 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15443 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15446 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15447 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15448 (match_operand:VI24_128 3 "register_operand" "x")))]
15450 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15451 [(set_attr "type" "ssemuladd")
15452 (set_attr "mode" "TI")])
15454 (define_insn "xop_p<macs>dql"
15455 [(set (match_operand:V2DI 0 "register_operand" "=x")
15460 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15461 (parallel [(const_int 0) (const_int 2)])))
15464 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15465 (parallel [(const_int 0) (const_int 2)]))))
15466 (match_operand:V2DI 3 "register_operand" "x")))]
15468 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15469 [(set_attr "type" "ssemuladd")
15470 (set_attr "mode" "TI")])
15472 (define_insn "xop_p<macs>dqh"
15473 [(set (match_operand:V2DI 0 "register_operand" "=x")
15478 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15479 (parallel [(const_int 1) (const_int 3)])))
15482 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15483 (parallel [(const_int 1) (const_int 3)]))))
15484 (match_operand:V2DI 3 "register_operand" "x")))]
15486 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15487 [(set_attr "type" "ssemuladd")
15488 (set_attr "mode" "TI")])
15490 ;; XOP parallel integer multiply/add instructions for the intrinisics
15491 (define_insn "xop_p<macs>wd"
15492 [(set (match_operand:V4SI 0 "register_operand" "=x")
15497 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15498 (parallel [(const_int 1) (const_int 3)
15499 (const_int 5) (const_int 7)])))
15502 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15503 (parallel [(const_int 1) (const_int 3)
15504 (const_int 5) (const_int 7)]))))
15505 (match_operand:V4SI 3 "register_operand" "x")))]
15507 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15508 [(set_attr "type" "ssemuladd")
15509 (set_attr "mode" "TI")])
15511 (define_insn "xop_p<madcs>wd"
15512 [(set (match_operand:V4SI 0 "register_operand" "=x")
15518 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15519 (parallel [(const_int 0) (const_int 2)
15520 (const_int 4) (const_int 6)])))
15523 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15524 (parallel [(const_int 0) (const_int 2)
15525 (const_int 4) (const_int 6)]))))
15530 (parallel [(const_int 1) (const_int 3)
15531 (const_int 5) (const_int 7)])))
15535 (parallel [(const_int 1) (const_int 3)
15536 (const_int 5) (const_int 7)])))))
15537 (match_operand:V4SI 3 "register_operand" "x")))]
15539 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15540 [(set_attr "type" "ssemuladd")
15541 (set_attr "mode" "TI")])
15543 ;; XOP parallel XMM conditional moves
15544 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15545 [(set (match_operand:V 0 "register_operand" "=x,x")
15547 (match_operand:V 3 "nonimmediate_operand" "x,m")
15548 (match_operand:V 1 "register_operand" "x,x")
15549 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15551 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15552 [(set_attr "type" "sse4arg")])
15554 ;; XOP horizontal add/subtract instructions
15555 (define_insn "xop_phadd<u>bw"
15556 [(set (match_operand:V8HI 0 "register_operand" "=x")
15560 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15561 (parallel [(const_int 0) (const_int 2)
15562 (const_int 4) (const_int 6)
15563 (const_int 8) (const_int 10)
15564 (const_int 12) (const_int 14)])))
15568 (parallel [(const_int 1) (const_int 3)
15569 (const_int 5) (const_int 7)
15570 (const_int 9) (const_int 11)
15571 (const_int 13) (const_int 15)])))))]
15573 "vphadd<u>bw\t{%1, %0|%0, %1}"
15574 [(set_attr "type" "sseiadd1")])
15576 (define_insn "xop_phadd<u>bd"
15577 [(set (match_operand:V4SI 0 "register_operand" "=x")
15582 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15583 (parallel [(const_int 0) (const_int 4)
15584 (const_int 8) (const_int 12)])))
15588 (parallel [(const_int 1) (const_int 5)
15589 (const_int 9) (const_int 13)]))))
15594 (parallel [(const_int 2) (const_int 6)
15595 (const_int 10) (const_int 14)])))
15599 (parallel [(const_int 3) (const_int 7)
15600 (const_int 11) (const_int 15)]))))))]
15602 "vphadd<u>bd\t{%1, %0|%0, %1}"
15603 [(set_attr "type" "sseiadd1")])
15605 (define_insn "xop_phadd<u>bq"
15606 [(set (match_operand:V2DI 0 "register_operand" "=x")
15612 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15613 (parallel [(const_int 0) (const_int 8)])))
15617 (parallel [(const_int 1) (const_int 9)]))))
15622 (parallel [(const_int 2) (const_int 10)])))
15626 (parallel [(const_int 3) (const_int 11)])))))
15632 (parallel [(const_int 4) (const_int 12)])))
15636 (parallel [(const_int 5) (const_int 13)]))))
15641 (parallel [(const_int 6) (const_int 14)])))
15645 (parallel [(const_int 7) (const_int 15)])))))))]
15647 "vphadd<u>bq\t{%1, %0|%0, %1}"
15648 [(set_attr "type" "sseiadd1")])
15650 (define_insn "xop_phadd<u>wd"
15651 [(set (match_operand:V4SI 0 "register_operand" "=x")
15655 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15656 (parallel [(const_int 0) (const_int 2)
15657 (const_int 4) (const_int 6)])))
15661 (parallel [(const_int 1) (const_int 3)
15662 (const_int 5) (const_int 7)])))))]
15664 "vphadd<u>wd\t{%1, %0|%0, %1}"
15665 [(set_attr "type" "sseiadd1")])
15667 (define_insn "xop_phadd<u>wq"
15668 [(set (match_operand:V2DI 0 "register_operand" "=x")
15673 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15674 (parallel [(const_int 0) (const_int 4)])))
15678 (parallel [(const_int 1) (const_int 5)]))))
15683 (parallel [(const_int 2) (const_int 6)])))
15687 (parallel [(const_int 3) (const_int 7)]))))))]
15689 "vphadd<u>wq\t{%1, %0|%0, %1}"
15690 [(set_attr "type" "sseiadd1")])
15692 (define_insn "xop_phadd<u>dq"
15693 [(set (match_operand:V2DI 0 "register_operand" "=x")
15697 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15698 (parallel [(const_int 0) (const_int 2)])))
15702 (parallel [(const_int 1) (const_int 3)])))))]
15704 "vphadd<u>dq\t{%1, %0|%0, %1}"
15705 [(set_attr "type" "sseiadd1")])
15707 (define_insn "xop_phsubbw"
15708 [(set (match_operand:V8HI 0 "register_operand" "=x")
15712 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15713 (parallel [(const_int 0) (const_int 2)
15714 (const_int 4) (const_int 6)
15715 (const_int 8) (const_int 10)
15716 (const_int 12) (const_int 14)])))
15720 (parallel [(const_int 1) (const_int 3)
15721 (const_int 5) (const_int 7)
15722 (const_int 9) (const_int 11)
15723 (const_int 13) (const_int 15)])))))]
15725 "vphsubbw\t{%1, %0|%0, %1}"
15726 [(set_attr "type" "sseiadd1")])
15728 (define_insn "xop_phsubwd"
15729 [(set (match_operand:V4SI 0 "register_operand" "=x")
15733 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15734 (parallel [(const_int 0) (const_int 2)
15735 (const_int 4) (const_int 6)])))
15739 (parallel [(const_int 1) (const_int 3)
15740 (const_int 5) (const_int 7)])))))]
15742 "vphsubwd\t{%1, %0|%0, %1}"
15743 [(set_attr "type" "sseiadd1")])
15745 (define_insn "xop_phsubdq"
15746 [(set (match_operand:V2DI 0 "register_operand" "=x")
15750 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15751 (parallel [(const_int 0) (const_int 2)])))
15755 (parallel [(const_int 1) (const_int 3)])))))]
15757 "vphsubdq\t{%1, %0|%0, %1}"
15758 [(set_attr "type" "sseiadd1")])
15760 ;; XOP permute instructions
15761 (define_insn "xop_pperm"
15762 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15764 [(match_operand:V16QI 1 "register_operand" "x,x")
15765 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15766 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15767 UNSPEC_XOP_PERMUTE))]
15768 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15769 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15770 [(set_attr "type" "sse4arg")
15771 (set_attr "mode" "TI")])
15773 ;; XOP pack instructions that combine two vectors into a smaller vector
15774 (define_insn "xop_pperm_pack_v2di_v4si"
15775 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15778 (match_operand:V2DI 1 "register_operand" "x,x"))
15780 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15781 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15782 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15783 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15784 [(set_attr "type" "sse4arg")
15785 (set_attr "mode" "TI")])
15787 (define_insn "xop_pperm_pack_v4si_v8hi"
15788 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15791 (match_operand:V4SI 1 "register_operand" "x,x"))
15793 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15794 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15795 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15796 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15797 [(set_attr "type" "sse4arg")
15798 (set_attr "mode" "TI")])
15800 (define_insn "xop_pperm_pack_v8hi_v16qi"
15801 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15804 (match_operand:V8HI 1 "register_operand" "x,x"))
15806 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15807 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15808 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15809 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15810 [(set_attr "type" "sse4arg")
15811 (set_attr "mode" "TI")])
15813 ;; XOP packed rotate instructions
15814 (define_expand "rotl<mode>3"
15815 [(set (match_operand:VI_128 0 "register_operand")
15817 (match_operand:VI_128 1 "nonimmediate_operand")
15818 (match_operand:SI 2 "general_operand")))]
15821 /* If we were given a scalar, convert it to parallel */
15822 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15824 rtvec vs = rtvec_alloc (<ssescalarnum>);
15825 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15826 rtx reg = gen_reg_rtx (<MODE>mode);
15827 rtx op2 = operands[2];
15830 if (GET_MODE (op2) != <ssescalarmode>mode)
15832 op2 = gen_reg_rtx (<ssescalarmode>mode);
15833 convert_move (op2, operands[2], false);
15836 for (i = 0; i < <ssescalarnum>; i++)
15837 RTVEC_ELT (vs, i) = op2;
15839 emit_insn (gen_vec_init<mode> (reg, par));
15840 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15845 (define_expand "rotr<mode>3"
15846 [(set (match_operand:VI_128 0 "register_operand")
15848 (match_operand:VI_128 1 "nonimmediate_operand")
15849 (match_operand:SI 2 "general_operand")))]
15852 /* If we were given a scalar, convert it to parallel */
15853 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15855 rtvec vs = rtvec_alloc (<ssescalarnum>);
15856 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15857 rtx neg = gen_reg_rtx (<MODE>mode);
15858 rtx reg = gen_reg_rtx (<MODE>mode);
15859 rtx op2 = operands[2];
15862 if (GET_MODE (op2) != <ssescalarmode>mode)
15864 op2 = gen_reg_rtx (<ssescalarmode>mode);
15865 convert_move (op2, operands[2], false);
15868 for (i = 0; i < <ssescalarnum>; i++)
15869 RTVEC_ELT (vs, i) = op2;
15871 emit_insn (gen_vec_init<mode> (reg, par));
15872 emit_insn (gen_neg<mode>2 (neg, reg));
15873 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15878 (define_insn "xop_rotl<mode>3"
15879 [(set (match_operand:VI_128 0 "register_operand" "=x")
15881 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15882 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15884 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15885 [(set_attr "type" "sseishft")
15886 (set_attr "length_immediate" "1")
15887 (set_attr "mode" "TI")])
15889 (define_insn "xop_rotr<mode>3"
15890 [(set (match_operand:VI_128 0 "register_operand" "=x")
15892 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15893 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15897 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15898 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15900 [(set_attr "type" "sseishft")
15901 (set_attr "length_immediate" "1")
15902 (set_attr "mode" "TI")])
15904 (define_expand "vrotr<mode>3"
15905 [(match_operand:VI_128 0 "register_operand")
15906 (match_operand:VI_128 1 "register_operand")
15907 (match_operand:VI_128 2 "register_operand")]
15910 rtx reg = gen_reg_rtx (<MODE>mode);
15911 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15912 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15916 (define_expand "vrotl<mode>3"
15917 [(match_operand:VI_128 0 "register_operand")
15918 (match_operand:VI_128 1 "register_operand")
15919 (match_operand:VI_128 2 "register_operand")]
15922 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15926 (define_insn "xop_vrotl<mode>3"
15927 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15928 (if_then_else:VI_128
15930 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15933 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15937 (neg:VI_128 (match_dup 2)))))]
15938 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15939 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15940 [(set_attr "type" "sseishft")
15941 (set_attr "prefix_data16" "0")
15942 (set_attr "prefix_extra" "2")
15943 (set_attr "mode" "TI")])
15945 ;; XOP packed shift instructions.
15946 (define_expand "vlshr<mode>3"
15947 [(set (match_operand:VI12_128 0 "register_operand")
15949 (match_operand:VI12_128 1 "register_operand")
15950 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15953 rtx neg = gen_reg_rtx (<MODE>mode);
15954 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15955 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15959 (define_expand "vlshr<mode>3"
15960 [(set (match_operand:VI48_128 0 "register_operand")
15962 (match_operand:VI48_128 1 "register_operand")
15963 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15964 "TARGET_AVX2 || TARGET_XOP"
15968 rtx neg = gen_reg_rtx (<MODE>mode);
15969 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15970 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15975 (define_expand "vlshr<mode>3"
15976 [(set (match_operand:VI48_512 0 "register_operand")
15978 (match_operand:VI48_512 1 "register_operand")
15979 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15982 (define_expand "vlshr<mode>3"
15983 [(set (match_operand:VI48_256 0 "register_operand")
15985 (match_operand:VI48_256 1 "register_operand")
15986 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15989 (define_expand "vashrv8hi3<mask_name>"
15990 [(set (match_operand:V8HI 0 "register_operand")
15992 (match_operand:V8HI 1 "register_operand")
15993 (match_operand:V8HI 2 "nonimmediate_operand")))]
15994 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
15998 rtx neg = gen_reg_rtx (V8HImode);
15999 emit_insn (gen_negv8hi2 (neg, operands[2]));
16000 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16005 (define_expand "vashrv16qi3"
16006 [(set (match_operand:V16QI 0 "register_operand")
16008 (match_operand:V16QI 1 "register_operand")
16009 (match_operand:V16QI 2 "nonimmediate_operand")))]
16012 rtx neg = gen_reg_rtx (V16QImode);
16013 emit_insn (gen_negv16qi2 (neg, operands[2]));
16014 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16018 (define_expand "vashrv2di3<mask_name>"
16019 [(set (match_operand:V2DI 0 "register_operand")
16021 (match_operand:V2DI 1 "register_operand")
16022 (match_operand:V2DI 2 "nonimmediate_operand")))]
16023 "TARGET_XOP || TARGET_AVX512VL"
16027 rtx neg = gen_reg_rtx (V2DImode);
16028 emit_insn (gen_negv2di2 (neg, operands[2]));
16029 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16034 (define_expand "vashrv4si3"
16035 [(set (match_operand:V4SI 0 "register_operand")
16036 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16037 (match_operand:V4SI 2 "nonimmediate_operand")))]
16038 "TARGET_AVX2 || TARGET_XOP"
16042 rtx neg = gen_reg_rtx (V4SImode);
16043 emit_insn (gen_negv4si2 (neg, operands[2]));
16044 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16049 (define_expand "vashrv16si3"
16050 [(set (match_operand:V16SI 0 "register_operand")
16051 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16052 (match_operand:V16SI 2 "nonimmediate_operand")))]
16055 (define_expand "vashrv8si3"
16056 [(set (match_operand:V8SI 0 "register_operand")
16057 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16058 (match_operand:V8SI 2 "nonimmediate_operand")))]
16061 (define_expand "vashl<mode>3"
16062 [(set (match_operand:VI12_128 0 "register_operand")
16064 (match_operand:VI12_128 1 "register_operand")
16065 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16068 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16072 (define_expand "vashl<mode>3"
16073 [(set (match_operand:VI48_128 0 "register_operand")
16075 (match_operand:VI48_128 1 "register_operand")
16076 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16077 "TARGET_AVX2 || TARGET_XOP"
16081 operands[2] = force_reg (<MODE>mode, operands[2]);
16082 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16087 (define_expand "vashl<mode>3"
16088 [(set (match_operand:VI48_512 0 "register_operand")
16090 (match_operand:VI48_512 1 "register_operand")
16091 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16094 (define_expand "vashl<mode>3"
16095 [(set (match_operand:VI48_256 0 "register_operand")
16097 (match_operand:VI48_256 1 "register_operand")
16098 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16101 (define_insn "xop_sha<mode>3"
16102 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16103 (if_then_else:VI_128
16105 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16108 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16112 (neg:VI_128 (match_dup 2)))))]
16113 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16114 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16115 [(set_attr "type" "sseishft")
16116 (set_attr "prefix_data16" "0")
16117 (set_attr "prefix_extra" "2")
16118 (set_attr "mode" "TI")])
16120 (define_insn "xop_shl<mode>3"
16121 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16122 (if_then_else:VI_128
16124 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16127 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16131 (neg:VI_128 (match_dup 2)))))]
16132 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16133 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16134 [(set_attr "type" "sseishft")
16135 (set_attr "prefix_data16" "0")
16136 (set_attr "prefix_extra" "2")
16137 (set_attr "mode" "TI")])
16139 (define_expand "<shift_insn><mode>3"
16140 [(set (match_operand:VI1_AVX512 0 "register_operand")
16141 (any_shift:VI1_AVX512
16142 (match_operand:VI1_AVX512 1 "register_operand")
16143 (match_operand:SI 2 "nonmemory_operand")))]
16146 if (TARGET_XOP && <MODE>mode == V16QImode)
16148 bool negate = false;
16149 rtx (*gen) (rtx, rtx, rtx);
16153 if (<CODE> != ASHIFT)
16155 if (CONST_INT_P (operands[2]))
16156 operands[2] = GEN_INT (-INTVAL (operands[2]));
16160 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16161 for (i = 0; i < 16; i++)
16162 XVECEXP (par, 0, i) = operands[2];
16164 tmp = gen_reg_rtx (V16QImode);
16165 emit_insn (gen_vec_initv16qi (tmp, par));
16168 emit_insn (gen_negv16qi2 (tmp, tmp));
16170 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16171 emit_insn (gen (operands[0], operands[1], tmp));
16174 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16178 (define_expand "ashrv2di3"
16179 [(set (match_operand:V2DI 0 "register_operand")
16181 (match_operand:V2DI 1 "register_operand")
16182 (match_operand:DI 2 "nonmemory_operand")))]
16183 "TARGET_XOP || TARGET_AVX512VL"
16185 if (!TARGET_AVX512VL)
16187 rtx reg = gen_reg_rtx (V2DImode);
16189 bool negate = false;
16192 if (CONST_INT_P (operands[2]))
16193 operands[2] = GEN_INT (-INTVAL (operands[2]));
16197 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16198 for (i = 0; i < 2; i++)
16199 XVECEXP (par, 0, i) = operands[2];
16201 emit_insn (gen_vec_initv2di (reg, par));
16204 emit_insn (gen_negv2di2 (reg, reg));
16206 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16211 ;; XOP FRCZ support
16212 (define_insn "xop_frcz<mode>2"
16213 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16215 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16218 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16219 [(set_attr "type" "ssecvt1")
16220 (set_attr "mode" "<MODE>")])
16222 (define_expand "xop_vmfrcz<mode>2"
16223 [(set (match_operand:VF_128 0 "register_operand")
16226 [(match_operand:VF_128 1 "nonimmediate_operand")]
16231 "operands[2] = CONST0_RTX (<MODE>mode);")
16233 (define_insn "*xop_vmfrcz<mode>2"
16234 [(set (match_operand:VF_128 0 "register_operand" "=x")
16237 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16239 (match_operand:VF_128 2 "const0_operand")
16242 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16243 [(set_attr "type" "ssecvt1")
16244 (set_attr "mode" "<MODE>")])
16246 (define_insn "xop_maskcmp<mode>3"
16247 [(set (match_operand:VI_128 0 "register_operand" "=x")
16248 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16249 [(match_operand:VI_128 2 "register_operand" "x")
16250 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16252 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16253 [(set_attr "type" "sse4arg")
16254 (set_attr "prefix_data16" "0")
16255 (set_attr "prefix_rep" "0")
16256 (set_attr "prefix_extra" "2")
16257 (set_attr "length_immediate" "1")
16258 (set_attr "mode" "TI")])
16260 (define_insn "xop_maskcmp_uns<mode>3"
16261 [(set (match_operand:VI_128 0 "register_operand" "=x")
16262 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16263 [(match_operand:VI_128 2 "register_operand" "x")
16264 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16266 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16267 [(set_attr "type" "ssecmp")
16268 (set_attr "prefix_data16" "0")
16269 (set_attr "prefix_rep" "0")
16270 (set_attr "prefix_extra" "2")
16271 (set_attr "length_immediate" "1")
16272 (set_attr "mode" "TI")])
16274 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16275 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16276 ;; the exact instruction generated for the intrinsic.
16277 (define_insn "xop_maskcmp_uns2<mode>3"
16278 [(set (match_operand:VI_128 0 "register_operand" "=x")
16280 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16281 [(match_operand:VI_128 2 "register_operand" "x")
16282 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16283 UNSPEC_XOP_UNSIGNED_CMP))]
16285 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16286 [(set_attr "type" "ssecmp")
16287 (set_attr "prefix_data16" "0")
16288 (set_attr "prefix_extra" "2")
16289 (set_attr "length_immediate" "1")
16290 (set_attr "mode" "TI")])
16292 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16293 ;; being added here to be complete.
16294 (define_insn "xop_pcom_tf<mode>3"
16295 [(set (match_operand:VI_128 0 "register_operand" "=x")
16297 [(match_operand:VI_128 1 "register_operand" "x")
16298 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16299 (match_operand:SI 3 "const_int_operand" "n")]
16300 UNSPEC_XOP_TRUEFALSE))]
16303 return ((INTVAL (operands[3]) != 0)
16304 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16305 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16307 [(set_attr "type" "ssecmp")
16308 (set_attr "prefix_data16" "0")
16309 (set_attr "prefix_extra" "2")
16310 (set_attr "length_immediate" "1")
16311 (set_attr "mode" "TI")])
16313 (define_insn "xop_vpermil2<mode>3"
16314 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16316 [(match_operand:VF_128_256 1 "register_operand" "x")
16317 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16318 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16319 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16322 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16323 [(set_attr "type" "sse4arg")
16324 (set_attr "length_immediate" "1")
16325 (set_attr "mode" "<MODE>")])
16327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16329 (define_insn "aesenc"
16330 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16331 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16332 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16336 aesenc\t{%2, %0|%0, %2}
16337 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16338 [(set_attr "isa" "noavx,avx")
16339 (set_attr "type" "sselog1")
16340 (set_attr "prefix_extra" "1")
16341 (set_attr "prefix" "orig,vex")
16342 (set_attr "btver2_decode" "double,double")
16343 (set_attr "mode" "TI")])
16345 (define_insn "aesenclast"
16346 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16347 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16348 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16349 UNSPEC_AESENCLAST))]
16352 aesenclast\t{%2, %0|%0, %2}
16353 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16354 [(set_attr "isa" "noavx,avx")
16355 (set_attr "type" "sselog1")
16356 (set_attr "prefix_extra" "1")
16357 (set_attr "prefix" "orig,vex")
16358 (set_attr "btver2_decode" "double,double")
16359 (set_attr "mode" "TI")])
16361 (define_insn "aesdec"
16362 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16363 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16364 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16368 aesdec\t{%2, %0|%0, %2}
16369 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16370 [(set_attr "isa" "noavx,avx")
16371 (set_attr "type" "sselog1")
16372 (set_attr "prefix_extra" "1")
16373 (set_attr "prefix" "orig,vex")
16374 (set_attr "btver2_decode" "double,double")
16375 (set_attr "mode" "TI")])
16377 (define_insn "aesdeclast"
16378 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16379 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16380 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16381 UNSPEC_AESDECLAST))]
16384 aesdeclast\t{%2, %0|%0, %2}
16385 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16386 [(set_attr "isa" "noavx,avx")
16387 (set_attr "type" "sselog1")
16388 (set_attr "prefix_extra" "1")
16389 (set_attr "prefix" "orig,vex")
16390 (set_attr "btver2_decode" "double,double")
16391 (set_attr "mode" "TI")])
16393 (define_insn "aesimc"
16394 [(set (match_operand:V2DI 0 "register_operand" "=x")
16395 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16398 "%vaesimc\t{%1, %0|%0, %1}"
16399 [(set_attr "type" "sselog1")
16400 (set_attr "prefix_extra" "1")
16401 (set_attr "prefix" "maybe_vex")
16402 (set_attr "mode" "TI")])
16404 (define_insn "aeskeygenassist"
16405 [(set (match_operand:V2DI 0 "register_operand" "=x")
16406 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16407 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16408 UNSPEC_AESKEYGENASSIST))]
16410 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16411 [(set_attr "type" "sselog1")
16412 (set_attr "prefix_extra" "1")
16413 (set_attr "length_immediate" "1")
16414 (set_attr "prefix" "maybe_vex")
16415 (set_attr "mode" "TI")])
16417 (define_insn "pclmulqdq"
16418 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16419 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16420 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16421 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16425 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16426 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16427 [(set_attr "isa" "noavx,avx")
16428 (set_attr "type" "sselog1")
16429 (set_attr "prefix_extra" "1")
16430 (set_attr "length_immediate" "1")
16431 (set_attr "prefix" "orig,vex")
16432 (set_attr "mode" "TI")])
16434 (define_expand "avx_vzeroall"
16435 [(match_par_dup 0 [(const_int 0)])]
16438 int nregs = TARGET_64BIT ? 16 : 8;
16441 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16443 XVECEXP (operands[0], 0, 0)
16444 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16447 for (regno = 0; regno < nregs; regno++)
16448 XVECEXP (operands[0], 0, regno + 1)
16449 = gen_rtx_SET (VOIDmode,
16450 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16451 CONST0_RTX (V8SImode));
16454 (define_insn "*avx_vzeroall"
16455 [(match_parallel 0 "vzeroall_operation"
16456 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16459 [(set_attr "type" "sse")
16460 (set_attr "modrm" "0")
16461 (set_attr "memory" "none")
16462 (set_attr "prefix" "vex")
16463 (set_attr "btver2_decode" "vector")
16464 (set_attr "mode" "OI")])
16466 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16467 ;; if the upper 128bits are unused.
16468 (define_insn "avx_vzeroupper"
16469 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16472 [(set_attr "type" "sse")
16473 (set_attr "modrm" "0")
16474 (set_attr "memory" "none")
16475 (set_attr "prefix" "vex")
16476 (set_attr "btver2_decode" "vector")
16477 (set_attr "mode" "OI")])
16479 (define_insn "avx2_pbroadcast<mode>"
16480 [(set (match_operand:VI 0 "register_operand" "=x")
16482 (vec_select:<ssescalarmode>
16483 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16484 (parallel [(const_int 0)]))))]
16486 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16487 [(set_attr "type" "ssemov")
16488 (set_attr "prefix_extra" "1")
16489 (set_attr "prefix" "vex")
16490 (set_attr "mode" "<sseinsnmode>")])
16492 (define_insn "avx2_pbroadcast<mode>_1"
16493 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16494 (vec_duplicate:VI_256
16495 (vec_select:<ssescalarmode>
16496 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16497 (parallel [(const_int 0)]))))]
16500 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16501 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16502 [(set_attr "type" "ssemov")
16503 (set_attr "prefix_extra" "1")
16504 (set_attr "prefix" "vex")
16505 (set_attr "mode" "<sseinsnmode>")])
16507 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16508 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16509 (unspec:VI48F_256_512
16510 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16511 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16513 "TARGET_AVX2 && <mask_mode512bit_condition>"
16514 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16515 [(set_attr "type" "sselog")
16516 (set_attr "prefix" "<mask_prefix2>")
16517 (set_attr "mode" "<sseinsnmode>")])
16519 (define_insn "<avx512>_permvar<mode><mask_name>"
16520 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16521 (unspec:VI1_AVX512VL
16522 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16523 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16525 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16526 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16527 [(set_attr "type" "sselog")
16528 (set_attr "prefix" "<mask_prefix2>")
16529 (set_attr "mode" "<sseinsnmode>")])
16531 (define_insn "<avx512>_permvar<mode><mask_name>"
16532 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16533 (unspec:VI2_AVX512VL
16534 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16535 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16537 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16538 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16539 [(set_attr "type" "sselog")
16540 (set_attr "prefix" "<mask_prefix2>")
16541 (set_attr "mode" "<sseinsnmode>")])
16543 (define_expand "<avx2_avx512>_perm<mode>"
16544 [(match_operand:VI8F_256_512 0 "register_operand")
16545 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16546 (match_operand:SI 2 "const_0_to_255_operand")]
16549 int mask = INTVAL (operands[2]);
16550 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16551 GEN_INT ((mask >> 0) & 3),
16552 GEN_INT ((mask >> 2) & 3),
16553 GEN_INT ((mask >> 4) & 3),
16554 GEN_INT ((mask >> 6) & 3)));
16558 (define_expand "<avx512>_perm<mode>_mask"
16559 [(match_operand:VI8F_256_512 0 "register_operand")
16560 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16561 (match_operand:SI 2 "const_0_to_255_operand")
16562 (match_operand:VI8F_256_512 3 "vector_move_operand")
16563 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16566 int mask = INTVAL (operands[2]);
16567 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16568 GEN_INT ((mask >> 0) & 3),
16569 GEN_INT ((mask >> 2) & 3),
16570 GEN_INT ((mask >> 4) & 3),
16571 GEN_INT ((mask >> 6) & 3),
16572 operands[3], operands[4]));
16576 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16577 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16578 (vec_select:VI8F_256_512
16579 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16580 (parallel [(match_operand 2 "const_0_to_3_operand")
16581 (match_operand 3 "const_0_to_3_operand")
16582 (match_operand 4 "const_0_to_3_operand")
16583 (match_operand 5 "const_0_to_3_operand")])))]
16584 "TARGET_AVX2 && <mask_mode512bit_condition>"
16587 mask |= INTVAL (operands[2]) << 0;
16588 mask |= INTVAL (operands[3]) << 2;
16589 mask |= INTVAL (operands[4]) << 4;
16590 mask |= INTVAL (operands[5]) << 6;
16591 operands[2] = GEN_INT (mask);
16592 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16594 [(set_attr "type" "sselog")
16595 (set_attr "prefix" "<mask_prefix2>")
16596 (set_attr "mode" "<sseinsnmode>")])
16598 (define_insn "avx2_permv2ti"
16599 [(set (match_operand:V4DI 0 "register_operand" "=x")
16601 [(match_operand:V4DI 1 "register_operand" "x")
16602 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16603 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16606 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16607 [(set_attr "type" "sselog")
16608 (set_attr "prefix" "vex")
16609 (set_attr "mode" "OI")])
16611 (define_insn "avx2_vec_dupv4df"
16612 [(set (match_operand:V4DF 0 "register_operand" "=x")
16613 (vec_duplicate:V4DF
16615 (match_operand:V2DF 1 "register_operand" "x")
16616 (parallel [(const_int 0)]))))]
16618 "vbroadcastsd\t{%1, %0|%0, %1}"
16619 [(set_attr "type" "sselog1")
16620 (set_attr "prefix" "vex")
16621 (set_attr "mode" "V4DF")])
16623 (define_insn "<avx512>_vec_dup<mode>_1"
16624 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16625 (vec_duplicate:VI_AVX512BW
16626 (vec_select:VI_AVX512BW
16627 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16628 (parallel [(const_int 0)]))))]
16630 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16631 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16632 [(set_attr "type" "ssemov")
16633 (set_attr "prefix" "evex")
16634 (set_attr "mode" "<sseinsnmode>")])
16636 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16637 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16638 (vec_duplicate:V48_AVX512VL
16639 (vec_select:<ssescalarmode>
16640 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16641 (parallel [(const_int 0)]))))]
16643 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16644 [(set_attr "type" "ssemov")
16645 (set_attr "prefix" "evex")
16646 (set_attr "mode" "<sseinsnmode>")])
16648 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16649 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16650 (vec_duplicate:VI12_AVX512VL
16651 (vec_select:<ssescalarmode>
16652 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16653 (parallel [(const_int 0)]))))]
16655 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16656 [(set_attr "type" "ssemov")
16657 (set_attr "prefix" "evex")
16658 (set_attr "mode" "<sseinsnmode>")])
16660 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16661 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16662 (vec_duplicate:V16FI
16663 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16666 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16667 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16668 [(set_attr "type" "ssemov")
16669 (set_attr "prefix" "evex")
16670 (set_attr "mode" "<sseinsnmode>")])
16672 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16673 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16674 (vec_duplicate:V8FI
16675 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16678 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16679 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16680 [(set_attr "type" "ssemov")
16681 (set_attr "prefix" "evex")
16682 (set_attr "mode" "<sseinsnmode>")])
16684 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16685 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16686 (vec_duplicate:VI12_AVX512VL
16687 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16690 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16691 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16692 [(set_attr "type" "ssemov")
16693 (set_attr "prefix" "evex")
16694 (set_attr "mode" "<sseinsnmode>")])
16696 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16697 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16698 (vec_duplicate:V48_AVX512VL
16699 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16701 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16702 [(set_attr "type" "ssemov")
16703 (set_attr "prefix" "evex")
16704 (set_attr "mode" "<sseinsnmode>")
16705 (set (attr "enabled")
16706 (if_then_else (eq_attr "alternative" "1")
16707 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16708 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16711 (define_insn "vec_dupv4sf"
16712 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16713 (vec_duplicate:V4SF
16714 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16717 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16718 vbroadcastss\t{%1, %0|%0, %1}
16719 shufps\t{$0, %0, %0|%0, %0, 0}"
16720 [(set_attr "isa" "avx,avx,noavx")
16721 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16722 (set_attr "length_immediate" "1,0,1")
16723 (set_attr "prefix_extra" "0,1,*")
16724 (set_attr "prefix" "vex,vex,orig")
16725 (set_attr "mode" "V4SF")])
16727 (define_insn "*vec_dupv4si"
16728 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16729 (vec_duplicate:V4SI
16730 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16733 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16734 vbroadcastss\t{%1, %0|%0, %1}
16735 shufps\t{$0, %0, %0|%0, %0, 0}"
16736 [(set_attr "isa" "sse2,avx,noavx")
16737 (set_attr "type" "sselog1,ssemov,sselog1")
16738 (set_attr "length_immediate" "1,0,1")
16739 (set_attr "prefix_extra" "0,1,*")
16740 (set_attr "prefix" "maybe_vex,vex,orig")
16741 (set_attr "mode" "TI,V4SF,V4SF")])
16743 (define_insn "*vec_dupv2di"
16744 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16745 (vec_duplicate:V2DI
16746 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16750 vpunpcklqdq\t{%d1, %0|%0, %d1}
16751 %vmovddup\t{%1, %0|%0, %1}
16753 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16754 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16755 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16756 (set_attr "mode" "TI,TI,DF,V4SF")])
16758 (define_insn "avx2_vbroadcasti128_<mode>"
16759 [(set (match_operand:VI_256 0 "register_operand" "=x")
16761 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16764 "vbroadcasti128\t{%1, %0|%0, %1}"
16765 [(set_attr "type" "ssemov")
16766 (set_attr "prefix_extra" "1")
16767 (set_attr "prefix" "vex")
16768 (set_attr "mode" "OI")])
16770 ;; Modes handled by AVX vec_dup patterns.
16771 (define_mode_iterator AVX_VEC_DUP_MODE
16772 [V8SI V8SF V4DI V4DF])
16773 ;; Modes handled by AVX2 vec_dup patterns.
16774 (define_mode_iterator AVX2_VEC_DUP_MODE
16775 [V32QI V16QI V16HI V8HI V8SI V4SI])
16777 (define_insn "*vec_dup<mode>"
16778 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16779 (vec_duplicate:AVX2_VEC_DUP_MODE
16780 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16783 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16784 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16786 [(set_attr "type" "ssemov")
16787 (set_attr "prefix_extra" "1")
16788 (set_attr "prefix" "maybe_evex")
16789 (set_attr "mode" "<sseinsnmode>")])
16791 (define_insn "vec_dup<mode>"
16792 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x")
16793 (vec_duplicate:AVX_VEC_DUP_MODE
16794 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))]
16797 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16798 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16799 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16801 [(set_attr "type" "ssemov")
16802 (set_attr "prefix_extra" "1")
16803 (set_attr "prefix" "maybe_evex")
16804 (set_attr "isa" "avx2,noavx2,avx2,noavx2")
16805 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")])
16808 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16809 (vec_duplicate:AVX2_VEC_DUP_MODE
16810 (match_operand:<ssescalarmode> 1 "register_operand")))]
16812 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16813 available, because then we can broadcast from GPRs directly.
16814 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16815 for V*SI mode it requires just -mavx512vl. */
16816 && !(TARGET_AVX512VL
16817 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16818 && reload_completed && GENERAL_REG_P (operands[1])"
16821 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16822 CONST0_RTX (V4SImode),
16823 gen_lowpart (SImode, operands[1])));
16824 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16825 gen_lowpart (<ssexmmmode>mode,
16831 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16832 (vec_duplicate:AVX_VEC_DUP_MODE
16833 (match_operand:<ssescalarmode> 1 "register_operand")))]
16834 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16835 [(set (match_dup 2)
16836 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16838 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16839 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16841 (define_insn "avx_vbroadcastf128_<mode>"
16842 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16844 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16848 vbroadcast<i128>\t{%1, %0|%0, %1}
16849 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16850 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16851 [(set_attr "type" "ssemov,sselog1,sselog1")
16852 (set_attr "prefix_extra" "1")
16853 (set_attr "length_immediate" "0,1,1")
16854 (set_attr "prefix" "vex")
16855 (set_attr "mode" "<sseinsnmode>")])
16857 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16858 (define_mode_iterator VI4F_BRCST32x2
16859 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16860 V16SF (V8SF "TARGET_AVX512VL")])
16862 (define_mode_attr 64x2mode
16863 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16865 (define_mode_attr 32x2mode
16866 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16867 (V8SF "V2SF") (V4SI "V2SI")])
16869 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16870 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16871 (vec_duplicate:VI4F_BRCST32x2
16872 (vec_select:<32x2mode>
16873 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16874 (parallel [(const_int 0) (const_int 1)]))))]
16876 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16877 [(set_attr "type" "ssemov")
16878 (set_attr "prefix_extra" "1")
16879 (set_attr "prefix" "evex")
16880 (set_attr "mode" "<sseinsnmode>")])
16882 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16883 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16884 (vec_duplicate:VI4F_256
16885 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16888 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16889 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16890 [(set_attr "type" "ssemov")
16891 (set_attr "prefix_extra" "1")
16892 (set_attr "prefix" "evex")
16893 (set_attr "mode" "<sseinsnmode>")])
16895 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16896 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16897 (vec_duplicate:V16FI
16898 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16901 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16902 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16903 [(set_attr "type" "ssemov")
16904 (set_attr "prefix_extra" "1")
16905 (set_attr "prefix" "evex")
16906 (set_attr "mode" "<sseinsnmode>")])
16908 ;; For broadcast[i|f]64x2
16909 (define_mode_iterator VI8F_BRCST64x2
16910 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16912 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16913 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16914 (vec_duplicate:VI8F_BRCST64x2
16915 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16918 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16919 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16920 [(set_attr "type" "ssemov")
16921 (set_attr "prefix_extra" "1")
16922 (set_attr "prefix" "evex")
16923 (set_attr "mode" "<sseinsnmode>")])
16925 (define_insn "avx512cd_maskb_vec_dup<mode>"
16926 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16927 (vec_duplicate:VI8_AVX512VL
16929 (match_operand:QI 1 "register_operand" "Yk"))))]
16931 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16932 [(set_attr "type" "mskmov")
16933 (set_attr "prefix" "evex")
16934 (set_attr "mode" "XI")])
16936 (define_insn "avx512cd_maskw_vec_dup<mode>"
16937 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16938 (vec_duplicate:VI4_AVX512VL
16940 (match_operand:HI 1 "register_operand" "Yk"))))]
16942 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16943 [(set_attr "type" "mskmov")
16944 (set_attr "prefix" "evex")
16945 (set_attr "mode" "XI")])
16947 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16948 ;; If it so happens that the input is in memory, use vbroadcast.
16949 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16950 (define_insn "*avx_vperm_broadcast_v4sf"
16951 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16953 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16954 (match_parallel 2 "avx_vbroadcast_operand"
16955 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16958 int elt = INTVAL (operands[3]);
16959 switch (which_alternative)
16963 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16964 return "vbroadcastss\t{%1, %0|%0, %k1}";
16966 operands[2] = GEN_INT (elt * 0x55);
16967 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16969 gcc_unreachable ();
16972 [(set_attr "type" "ssemov,ssemov,sselog1")
16973 (set_attr "prefix_extra" "1")
16974 (set_attr "length_immediate" "0,0,1")
16975 (set_attr "prefix" "vex")
16976 (set_attr "mode" "SF,SF,V4SF")])
16978 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
16979 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
16981 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
16982 (match_parallel 2 "avx_vbroadcast_operand"
16983 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16986 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
16987 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
16989 rtx op0 = operands[0], op1 = operands[1];
16990 int elt = INTVAL (operands[3]);
16996 if (TARGET_AVX2 && elt == 0)
16998 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17003 /* Shuffle element we care about into all elements of the 128-bit lane.
17004 The other lane gets shuffled too, but we don't care. */
17005 if (<MODE>mode == V4DFmode)
17006 mask = (elt & 1 ? 15 : 0);
17008 mask = (elt & 3) * 0x55;
17009 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17011 /* Shuffle the lane we care about into both lanes of the dest. */
17012 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17013 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17017 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17018 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17021 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17022 [(set (match_operand:VF2 0 "register_operand")
17024 (match_operand:VF2 1 "nonimmediate_operand")
17025 (match_operand:SI 2 "const_0_to_255_operand")))]
17026 "TARGET_AVX && <mask_mode512bit_condition>"
17028 int mask = INTVAL (operands[2]);
17029 rtx perm[<ssescalarnum>];
17032 for (i = 0; i < <ssescalarnum>; i = i + 2)
17034 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17035 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17039 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17042 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17043 [(set (match_operand:VF1 0 "register_operand")
17045 (match_operand:VF1 1 "nonimmediate_operand")
17046 (match_operand:SI 2 "const_0_to_255_operand")))]
17047 "TARGET_AVX && <mask_mode512bit_condition>"
17049 int mask = INTVAL (operands[2]);
17050 rtx perm[<ssescalarnum>];
17053 for (i = 0; i < <ssescalarnum>; i = i + 4)
17055 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17056 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17057 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17058 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17062 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17065 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17066 [(set (match_operand:VF 0 "register_operand" "=v")
17068 (match_operand:VF 1 "nonimmediate_operand" "vm")
17069 (match_parallel 2 ""
17070 [(match_operand 3 "const_int_operand")])))]
17071 "TARGET_AVX && <mask_mode512bit_condition>
17072 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17074 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17075 operands[2] = GEN_INT (mask);
17076 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17078 [(set_attr "type" "sselog")
17079 (set_attr "prefix_extra" "1")
17080 (set_attr "length_immediate" "1")
17081 (set_attr "prefix" "<mask_prefix>")
17082 (set_attr "mode" "<sseinsnmode>")])
17084 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17085 [(set (match_operand:VF 0 "register_operand" "=v")
17087 [(match_operand:VF 1 "register_operand" "v")
17088 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17090 "TARGET_AVX && <mask_mode512bit_condition>"
17091 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17092 [(set_attr "type" "sselog")
17093 (set_attr "prefix_extra" "1")
17094 (set_attr "btver2_decode" "vector")
17095 (set_attr "prefix" "<mask_prefix>")
17096 (set_attr "mode" "<sseinsnmode>")])
17098 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17099 [(match_operand:VI48F 0 "register_operand" "=v")
17100 (match_operand:VI48F 1 "register_operand" "v")
17101 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17102 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17103 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17106 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17107 operands[0], operands[1], operands[2], operands[3],
17108 CONST0_RTX (<MODE>mode), operands[4]));
17112 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17113 [(match_operand:VI1_AVX512VL 0 "register_operand")
17114 (match_operand:VI1_AVX512VL 1 "register_operand")
17115 (match_operand:<sseintvecmode> 2 "register_operand")
17116 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17117 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17118 "TARGET_AVX512VBMI"
17120 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17121 operands[0], operands[1], operands[2], operands[3],
17122 CONST0_RTX (<MODE>mode), operands[4]));
17126 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17127 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17128 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17129 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17130 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17131 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17134 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17135 operands[0], operands[1], operands[2], operands[3],
17136 CONST0_RTX (<MODE>mode), operands[4]));
17140 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17141 [(set (match_operand:VI48F 0 "register_operand" "=v")
17143 [(match_operand:VI48F 1 "register_operand" "v")
17144 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17145 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17148 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17149 [(set_attr "type" "sselog")
17150 (set_attr "prefix" "evex")
17151 (set_attr "mode" "<sseinsnmode>")])
17153 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17154 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17155 (unspec:VI1_AVX512VL
17156 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17157 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17158 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17160 "TARGET_AVX512VBMI"
17161 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17162 [(set_attr "type" "sselog")
17163 (set_attr "prefix" "evex")
17164 (set_attr "mode" "<sseinsnmode>")])
17166 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17167 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17168 (unspec:VI2_AVX512VL
17169 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17170 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17171 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17174 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17175 [(set_attr "type" "sselog")
17176 (set_attr "prefix" "evex")
17177 (set_attr "mode" "<sseinsnmode>")])
17179 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17180 [(set (match_operand:VI48F 0 "register_operand" "=v")
17183 [(match_operand:VI48F 1 "register_operand" "v")
17184 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17185 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17186 UNSPEC_VPERMI2_MASK)
17188 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17190 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17191 [(set_attr "type" "sselog")
17192 (set_attr "prefix" "evex")
17193 (set_attr "mode" "<sseinsnmode>")])
17195 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17196 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17197 (vec_merge:VI1_AVX512VL
17198 (unspec:VI1_AVX512VL
17199 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17200 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17201 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17202 UNSPEC_VPERMI2_MASK)
17204 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17205 "TARGET_AVX512VBMI"
17206 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17207 [(set_attr "type" "sselog")
17208 (set_attr "prefix" "evex")
17209 (set_attr "mode" "<sseinsnmode>")])
17211 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17212 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17213 (vec_merge:VI2_AVX512VL
17214 (unspec:VI2_AVX512VL
17215 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17216 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17217 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17218 UNSPEC_VPERMI2_MASK)
17220 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17222 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17223 [(set_attr "type" "sselog")
17224 (set_attr "prefix" "evex")
17225 (set_attr "mode" "<sseinsnmode>")])
17227 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17228 [(match_operand:VI48F 0 "register_operand" "=v")
17229 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17230 (match_operand:VI48F 2 "register_operand" "0")
17231 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17232 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17235 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17236 operands[0], operands[1], operands[2], operands[3],
17237 CONST0_RTX (<MODE>mode), operands[4]));
17241 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17242 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17243 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17244 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17245 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17246 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17247 "TARGET_AVX512VBMI"
17249 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17250 operands[0], operands[1], operands[2], operands[3],
17251 CONST0_RTX (<MODE>mode), operands[4]));
17255 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17256 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17257 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17258 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17259 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17260 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17263 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17264 operands[0], operands[1], operands[2], operands[3],
17265 CONST0_RTX (<MODE>mode), operands[4]));
17269 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17270 [(set (match_operand:VI48F 0 "register_operand" "=v")
17272 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17273 (match_operand:VI48F 2 "register_operand" "0")
17274 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17277 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17278 [(set_attr "type" "sselog")
17279 (set_attr "prefix" "evex")
17280 (set_attr "mode" "<sseinsnmode>")])
17282 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17283 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17284 (unspec:VI1_AVX512VL
17285 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17286 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17287 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17289 "TARGET_AVX512VBMI"
17290 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17291 [(set_attr "type" "sselog")
17292 (set_attr "prefix" "evex")
17293 (set_attr "mode" "<sseinsnmode>")])
17295 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17296 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17297 (unspec:VI2_AVX512VL
17298 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17299 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17300 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17303 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17304 [(set_attr "type" "sselog")
17305 (set_attr "prefix" "evex")
17306 (set_attr "mode" "<sseinsnmode>")])
17308 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17309 [(set (match_operand:VI48F 0 "register_operand" "=v")
17312 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17313 (match_operand:VI48F 2 "register_operand" "0")
17314 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17317 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17319 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17320 [(set_attr "type" "sselog")
17321 (set_attr "prefix" "evex")
17322 (set_attr "mode" "<sseinsnmode>")])
17324 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17325 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17326 (vec_merge:VI1_AVX512VL
17327 (unspec:VI1_AVX512VL
17328 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17329 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17330 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17333 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17334 "TARGET_AVX512VBMI"
17335 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17336 [(set_attr "type" "sselog")
17337 (set_attr "prefix" "evex")
17338 (set_attr "mode" "<sseinsnmode>")])
17340 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17341 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17342 (vec_merge:VI2_AVX512VL
17343 (unspec:VI2_AVX512VL
17344 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17345 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17346 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17349 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17351 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17352 [(set_attr "type" "sselog")
17353 (set_attr "prefix" "evex")
17354 (set_attr "mode" "<sseinsnmode>")])
17356 (define_expand "avx_vperm2f128<mode>3"
17357 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17358 (unspec:AVX256MODE2P
17359 [(match_operand:AVX256MODE2P 1 "register_operand")
17360 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17361 (match_operand:SI 3 "const_0_to_255_operand")]
17362 UNSPEC_VPERMIL2F128))]
17365 int mask = INTVAL (operands[3]);
17366 if ((mask & 0x88) == 0)
17368 rtx perm[<ssescalarnum>], t1, t2;
17369 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17371 base = (mask & 3) * nelt2;
17372 for (i = 0; i < nelt2; ++i)
17373 perm[i] = GEN_INT (base + i);
17375 base = ((mask >> 4) & 3) * nelt2;
17376 for (i = 0; i < nelt2; ++i)
17377 perm[i + nelt2] = GEN_INT (base + i);
17379 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17380 operands[1], operands[2]);
17381 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17382 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17383 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
17389 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17390 ;; means that in order to represent this properly in rtl we'd have to
17391 ;; nest *another* vec_concat with a zero operand and do the select from
17392 ;; a 4x wide vector. That doesn't seem very nice.
17393 (define_insn "*avx_vperm2f128<mode>_full"
17394 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17395 (unspec:AVX256MODE2P
17396 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17397 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17398 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17399 UNSPEC_VPERMIL2F128))]
17401 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17402 [(set_attr "type" "sselog")
17403 (set_attr "prefix_extra" "1")
17404 (set_attr "length_immediate" "1")
17405 (set_attr "prefix" "vex")
17406 (set_attr "mode" "<sseinsnmode>")])
17408 (define_insn "*avx_vperm2f128<mode>_nozero"
17409 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17410 (vec_select:AVX256MODE2P
17411 (vec_concat:<ssedoublevecmode>
17412 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17413 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17414 (match_parallel 3 ""
17415 [(match_operand 4 "const_int_operand")])))]
17417 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17419 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17421 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17423 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17424 operands[3] = GEN_INT (mask);
17425 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17427 [(set_attr "type" "sselog")
17428 (set_attr "prefix_extra" "1")
17429 (set_attr "length_immediate" "1")
17430 (set_attr "prefix" "vex")
17431 (set_attr "mode" "<sseinsnmode>")])
17433 (define_insn "*ssse3_palignr<mode>_perm"
17434 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17436 (match_operand:V_128 1 "register_operand" "0,x")
17437 (match_parallel 2 "palignr_operand"
17438 [(match_operand 3 "const_int_operand" "n, n")])))]
17441 machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
17442 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17444 switch (which_alternative)
17447 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17449 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17451 gcc_unreachable ();
17454 [(set_attr "isa" "noavx,avx")
17455 (set_attr "type" "sseishft")
17456 (set_attr "atom_unit" "sishuf")
17457 (set_attr "prefix_data16" "1,*")
17458 (set_attr "prefix_extra" "1")
17459 (set_attr "length_immediate" "1")
17460 (set_attr "prefix" "orig,vex")])
17462 (define_expand "avx512vl_vinsert<mode>"
17463 [(match_operand:VI48F_256 0 "register_operand")
17464 (match_operand:VI48F_256 1 "register_operand")
17465 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17466 (match_operand:SI 3 "const_0_to_1_operand")
17467 (match_operand:VI48F_256 4 "register_operand")
17468 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17471 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17473 switch (INTVAL (operands[3]))
17476 insn = gen_vec_set_lo_<mode>_mask;
17479 insn = gen_vec_set_hi_<mode>_mask;
17482 gcc_unreachable ();
17485 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17490 (define_expand "avx_vinsertf128<mode>"
17491 [(match_operand:V_256 0 "register_operand")
17492 (match_operand:V_256 1 "register_operand")
17493 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17494 (match_operand:SI 3 "const_0_to_1_operand")]
17497 rtx (*insn)(rtx, rtx, rtx);
17499 switch (INTVAL (operands[3]))
17502 insn = gen_vec_set_lo_<mode>;
17505 insn = gen_vec_set_hi_<mode>;
17508 gcc_unreachable ();
17511 emit_insn (insn (operands[0], operands[1], operands[2]));
17515 (define_insn "vec_set_lo_<mode><mask_name>"
17516 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17517 (vec_concat:VI8F_256
17518 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17519 (vec_select:<ssehalfvecmode>
17520 (match_operand:VI8F_256 1 "register_operand" "v")
17521 (parallel [(const_int 2) (const_int 3)]))))]
17524 if (TARGET_AVX512VL)
17525 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17527 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17529 [(set_attr "type" "sselog")
17530 (set_attr "prefix_extra" "1")
17531 (set_attr "length_immediate" "1")
17532 (set_attr "prefix" "vex")
17533 (set_attr "mode" "<sseinsnmode>")])
17535 (define_insn "vec_set_hi_<mode><mask_name>"
17536 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17537 (vec_concat:VI8F_256
17538 (vec_select:<ssehalfvecmode>
17539 (match_operand:VI8F_256 1 "register_operand" "v")
17540 (parallel [(const_int 0) (const_int 1)]))
17541 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17544 if (TARGET_AVX512VL)
17545 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17547 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17549 [(set_attr "type" "sselog")
17550 (set_attr "prefix_extra" "1")
17551 (set_attr "length_immediate" "1")
17552 (set_attr "prefix" "vex")
17553 (set_attr "mode" "<sseinsnmode>")])
17555 (define_insn "vec_set_lo_<mode><mask_name>"
17556 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17557 (vec_concat:VI4F_256
17558 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17559 (vec_select:<ssehalfvecmode>
17560 (match_operand:VI4F_256 1 "register_operand" "v")
17561 (parallel [(const_int 4) (const_int 5)
17562 (const_int 6) (const_int 7)]))))]
17565 if (TARGET_AVX512VL)
17566 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17568 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17570 [(set_attr "type" "sselog")
17571 (set_attr "prefix_extra" "1")
17572 (set_attr "length_immediate" "1")
17573 (set_attr "prefix" "vex")
17574 (set_attr "mode" "<sseinsnmode>")])
17576 (define_insn "vec_set_hi_<mode><mask_name>"
17577 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17578 (vec_concat:VI4F_256
17579 (vec_select:<ssehalfvecmode>
17580 (match_operand:VI4F_256 1 "register_operand" "v")
17581 (parallel [(const_int 0) (const_int 1)
17582 (const_int 2) (const_int 3)]))
17583 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17586 if (TARGET_AVX512VL)
17587 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17589 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17591 [(set_attr "type" "sselog")
17592 (set_attr "prefix_extra" "1")
17593 (set_attr "length_immediate" "1")
17594 (set_attr "prefix" "vex")
17595 (set_attr "mode" "<sseinsnmode>")])
17597 (define_insn "vec_set_lo_v16hi"
17598 [(set (match_operand:V16HI 0 "register_operand" "=x")
17600 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17602 (match_operand:V16HI 1 "register_operand" "x")
17603 (parallel [(const_int 8) (const_int 9)
17604 (const_int 10) (const_int 11)
17605 (const_int 12) (const_int 13)
17606 (const_int 14) (const_int 15)]))))]
17608 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17609 [(set_attr "type" "sselog")
17610 (set_attr "prefix_extra" "1")
17611 (set_attr "length_immediate" "1")
17612 (set_attr "prefix" "vex")
17613 (set_attr "mode" "OI")])
17615 (define_insn "vec_set_hi_v16hi"
17616 [(set (match_operand:V16HI 0 "register_operand" "=x")
17619 (match_operand:V16HI 1 "register_operand" "x")
17620 (parallel [(const_int 0) (const_int 1)
17621 (const_int 2) (const_int 3)
17622 (const_int 4) (const_int 5)
17623 (const_int 6) (const_int 7)]))
17624 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17626 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17627 [(set_attr "type" "sselog")
17628 (set_attr "prefix_extra" "1")
17629 (set_attr "length_immediate" "1")
17630 (set_attr "prefix" "vex")
17631 (set_attr "mode" "OI")])
17633 (define_insn "vec_set_lo_v32qi"
17634 [(set (match_operand:V32QI 0 "register_operand" "=x")
17636 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17638 (match_operand:V32QI 1 "register_operand" "x")
17639 (parallel [(const_int 16) (const_int 17)
17640 (const_int 18) (const_int 19)
17641 (const_int 20) (const_int 21)
17642 (const_int 22) (const_int 23)
17643 (const_int 24) (const_int 25)
17644 (const_int 26) (const_int 27)
17645 (const_int 28) (const_int 29)
17646 (const_int 30) (const_int 31)]))))]
17648 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17649 [(set_attr "type" "sselog")
17650 (set_attr "prefix_extra" "1")
17651 (set_attr "length_immediate" "1")
17652 (set_attr "prefix" "vex")
17653 (set_attr "mode" "OI")])
17655 (define_insn "vec_set_hi_v32qi"
17656 [(set (match_operand:V32QI 0 "register_operand" "=x")
17659 (match_operand:V32QI 1 "register_operand" "x")
17660 (parallel [(const_int 0) (const_int 1)
17661 (const_int 2) (const_int 3)
17662 (const_int 4) (const_int 5)
17663 (const_int 6) (const_int 7)
17664 (const_int 8) (const_int 9)
17665 (const_int 10) (const_int 11)
17666 (const_int 12) (const_int 13)
17667 (const_int 14) (const_int 15)]))
17668 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17670 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17671 [(set_attr "type" "sselog")
17672 (set_attr "prefix_extra" "1")
17673 (set_attr "length_immediate" "1")
17674 (set_attr "prefix" "vex")
17675 (set_attr "mode" "OI")])
17677 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17678 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17680 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17681 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17684 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17685 [(set_attr "type" "sselog1")
17686 (set_attr "prefix_extra" "1")
17687 (set_attr "prefix" "vex")
17688 (set_attr "btver2_decode" "vector")
17689 (set_attr "mode" "<sseinsnmode>")])
17691 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17692 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17694 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17695 (match_operand:V48_AVX2 2 "register_operand" "x")
17699 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17700 [(set_attr "type" "sselog1")
17701 (set_attr "prefix_extra" "1")
17702 (set_attr "prefix" "vex")
17703 (set_attr "btver2_decode" "vector")
17704 (set_attr "mode" "<sseinsnmode>")])
17706 (define_expand "maskload<mode>"
17707 [(set (match_operand:V48_AVX2 0 "register_operand")
17709 [(match_operand:<sseintvecmode> 2 "register_operand")
17710 (match_operand:V48_AVX2 1 "memory_operand")]
17714 (define_expand "maskstore<mode>"
17715 [(set (match_operand:V48_AVX2 0 "memory_operand")
17717 [(match_operand:<sseintvecmode> 2 "register_operand")
17718 (match_operand:V48_AVX2 1 "register_operand")
17723 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17724 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17725 (unspec:AVX256MODE2P
17726 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17730 "&& reload_completed"
17733 rtx op0 = operands[0];
17734 rtx op1 = operands[1];
17736 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17738 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17739 emit_move_insn (op0, op1);
17743 (define_expand "vec_init<mode>"
17744 [(match_operand:V_256 0 "register_operand")
17748 ix86_expand_vector_init (false, operands[0], operands[1]);
17752 (define_expand "vec_init<mode>"
17753 [(match_operand:VF48_I1248 0 "register_operand")
17757 ix86_expand_vector_init (false, operands[0], operands[1]);
17761 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17762 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17763 (ashiftrt:VI48_AVX512F_AVX512VL
17764 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17765 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17766 "TARGET_AVX2 && <mask_mode512bit_condition>"
17767 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17768 [(set_attr "type" "sseishft")
17769 (set_attr "prefix" "maybe_evex")
17770 (set_attr "mode" "<sseinsnmode>")])
17772 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17773 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17774 (ashiftrt:VI2_AVX512VL
17775 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17776 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17778 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17779 [(set_attr "type" "sseishft")
17780 (set_attr "prefix" "maybe_evex")
17781 (set_attr "mode" "<sseinsnmode>")])
17783 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17784 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17785 (any_lshift:VI48_AVX512F
17786 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17787 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17788 "TARGET_AVX2 && <mask_mode512bit_condition>"
17789 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17790 [(set_attr "type" "sseishft")
17791 (set_attr "prefix" "maybe_evex")
17792 (set_attr "mode" "<sseinsnmode>")])
17794 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17795 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17796 (any_lshift:VI2_AVX512VL
17797 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17798 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17800 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17801 [(set_attr "type" "sseishft")
17802 (set_attr "prefix" "maybe_evex")
17803 (set_attr "mode" "<sseinsnmode>")])
17805 (define_insn "avx_vec_concat<mode>"
17806 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17807 (vec_concat:V_256_512
17808 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17809 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17812 switch (which_alternative)
17815 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17817 switch (get_attr_mode (insn))
17820 return "vmovaps\t{%1, %t0|%t0, %1}";
17822 return "vmovapd\t{%1, %t0|%t0, %1}";
17824 return "vmovaps\t{%1, %x0|%x0, %1}";
17826 return "vmovapd\t{%1, %x0|%x0, %1}";
17828 return "vmovdqa\t{%1, %t0|%t0, %1}";
17830 return "vmovdqa\t{%1, %x0|%x0, %1}";
17832 gcc_unreachable ();
17835 gcc_unreachable ();
17838 [(set_attr "type" "sselog,ssemov")
17839 (set_attr "prefix_extra" "1,*")
17840 (set_attr "length_immediate" "1,*")
17841 (set_attr "prefix" "maybe_evex")
17842 (set_attr "mode" "<sseinsnmode>")])
17844 (define_insn "vcvtph2ps<mask_name>"
17845 [(set (match_operand:V4SF 0 "register_operand" "=v")
17847 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17849 (parallel [(const_int 0) (const_int 1)
17850 (const_int 2) (const_int 3)])))]
17851 "TARGET_F16C || TARGET_AVX512VL"
17852 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17853 [(set_attr "type" "ssecvt")
17854 (set_attr "prefix" "maybe_evex")
17855 (set_attr "mode" "V4SF")])
17857 (define_insn "*vcvtph2ps_load<mask_name>"
17858 [(set (match_operand:V4SF 0 "register_operand" "=v")
17859 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17860 UNSPEC_VCVTPH2PS))]
17861 "TARGET_F16C || TARGET_AVX512VL"
17862 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17863 [(set_attr "type" "ssecvt")
17864 (set_attr "prefix" "vex")
17865 (set_attr "mode" "V8SF")])
17867 (define_insn "vcvtph2ps256<mask_name>"
17868 [(set (match_operand:V8SF 0 "register_operand" "=v")
17869 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17870 UNSPEC_VCVTPH2PS))]
17871 "TARGET_F16C || TARGET_AVX512VL"
17872 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17873 [(set_attr "type" "ssecvt")
17874 (set_attr "prefix" "vex")
17875 (set_attr "btver2_decode" "double")
17876 (set_attr "mode" "V8SF")])
17878 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17879 [(set (match_operand:V16SF 0 "register_operand" "=v")
17881 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17882 UNSPEC_VCVTPH2PS))]
17884 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17885 [(set_attr "type" "ssecvt")
17886 (set_attr "prefix" "evex")
17887 (set_attr "mode" "V16SF")])
17889 (define_expand "vcvtps2ph_mask"
17890 [(set (match_operand:V8HI 0 "register_operand")
17893 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17894 (match_operand:SI 2 "const_0_to_255_operand")]
17897 (match_operand:V8HI 3 "vector_move_operand")
17898 (match_operand:QI 4 "register_operand")))]
17900 "operands[5] = CONST0_RTX (V4HImode);")
17902 (define_expand "vcvtps2ph"
17903 [(set (match_operand:V8HI 0 "register_operand")
17905 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17906 (match_operand:SI 2 "const_0_to_255_operand")]
17910 "operands[3] = CONST0_RTX (V4HImode);")
17912 (define_insn "*vcvtps2ph<mask_name>"
17913 [(set (match_operand:V8HI 0 "register_operand" "=v")
17915 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17916 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17918 (match_operand:V4HI 3 "const0_operand")))]
17919 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17920 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17921 [(set_attr "type" "ssecvt")
17922 (set_attr "prefix" "maybe_evex")
17923 (set_attr "mode" "V4SF")])
17925 (define_insn "*vcvtps2ph_store<mask_name>"
17926 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17927 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17928 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17929 UNSPEC_VCVTPS2PH))]
17930 "TARGET_F16C || TARGET_AVX512VL"
17931 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17932 [(set_attr "type" "ssecvt")
17933 (set_attr "prefix" "maybe_evex")
17934 (set_attr "mode" "V4SF")])
17936 (define_insn "vcvtps2ph256<mask_name>"
17937 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17938 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17939 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17940 UNSPEC_VCVTPS2PH))]
17941 "TARGET_F16C || TARGET_AVX512VL"
17942 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17943 [(set_attr "type" "ssecvt")
17944 (set_attr "prefix" "maybe_evex")
17945 (set_attr "btver2_decode" "vector")
17946 (set_attr "mode" "V8SF")])
17948 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17949 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17951 [(match_operand:V16SF 1 "register_operand" "v")
17952 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17953 UNSPEC_VCVTPS2PH))]
17955 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17956 [(set_attr "type" "ssecvt")
17957 (set_attr "prefix" "evex")
17958 (set_attr "mode" "V16SF")])
17960 ;; For gather* insn patterns
17961 (define_mode_iterator VEC_GATHER_MODE
17962 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17963 (define_mode_attr VEC_GATHER_IDXSI
17964 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17965 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17966 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17967 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17969 (define_mode_attr VEC_GATHER_IDXDI
17970 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17971 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
17972 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
17973 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
17975 (define_mode_attr VEC_GATHER_SRCDI
17976 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17977 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
17978 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
17979 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
17981 (define_expand "avx2_gathersi<mode>"
17982 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17983 (unspec:VEC_GATHER_MODE
17984 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
17985 (mem:<ssescalarmode>
17987 [(match_operand 2 "vsib_address_operand")
17988 (match_operand:<VEC_GATHER_IDXSI>
17989 3 "register_operand")
17990 (match_operand:SI 5 "const1248_operand ")]))
17991 (mem:BLK (scratch))
17992 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
17994 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17998 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17999 operands[5]), UNSPEC_VSIBADDR);
18002 (define_insn "*avx2_gathersi<mode>"
18003 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18004 (unspec:VEC_GATHER_MODE
18005 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18006 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18008 [(match_operand:P 3 "vsib_address_operand" "Tv")
18009 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18010 (match_operand:SI 6 "const1248_operand" "n")]
18012 (mem:BLK (scratch))
18013 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18015 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18017 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18018 [(set_attr "type" "ssemov")
18019 (set_attr "prefix" "vex")
18020 (set_attr "mode" "<sseinsnmode>")])
18022 (define_insn "*avx2_gathersi<mode>_2"
18023 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18024 (unspec:VEC_GATHER_MODE
18026 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18028 [(match_operand:P 2 "vsib_address_operand" "Tv")
18029 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18030 (match_operand:SI 5 "const1248_operand" "n")]
18032 (mem:BLK (scratch))
18033 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18035 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18037 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18038 [(set_attr "type" "ssemov")
18039 (set_attr "prefix" "vex")
18040 (set_attr "mode" "<sseinsnmode>")])
18042 (define_expand "avx2_gatherdi<mode>"
18043 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18044 (unspec:VEC_GATHER_MODE
18045 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18046 (mem:<ssescalarmode>
18048 [(match_operand 2 "vsib_address_operand")
18049 (match_operand:<VEC_GATHER_IDXDI>
18050 3 "register_operand")
18051 (match_operand:SI 5 "const1248_operand ")]))
18052 (mem:BLK (scratch))
18053 (match_operand:<VEC_GATHER_SRCDI>
18054 4 "register_operand")]
18056 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18060 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18061 operands[5]), UNSPEC_VSIBADDR);
18064 (define_insn "*avx2_gatherdi<mode>"
18065 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18066 (unspec:VEC_GATHER_MODE
18067 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18068 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18070 [(match_operand:P 3 "vsib_address_operand" "Tv")
18071 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18072 (match_operand:SI 6 "const1248_operand" "n")]
18074 (mem:BLK (scratch))
18075 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18077 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18079 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18080 [(set_attr "type" "ssemov")
18081 (set_attr "prefix" "vex")
18082 (set_attr "mode" "<sseinsnmode>")])
18084 (define_insn "*avx2_gatherdi<mode>_2"
18085 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18086 (unspec:VEC_GATHER_MODE
18088 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18090 [(match_operand:P 2 "vsib_address_operand" "Tv")
18091 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18092 (match_operand:SI 5 "const1248_operand" "n")]
18094 (mem:BLK (scratch))
18095 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18097 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18100 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18101 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18102 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18104 [(set_attr "type" "ssemov")
18105 (set_attr "prefix" "vex")
18106 (set_attr "mode" "<sseinsnmode>")])
18108 (define_insn "*avx2_gatherdi<mode>_3"
18109 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18110 (vec_select:<VEC_GATHER_SRCDI>
18112 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18113 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18115 [(match_operand:P 3 "vsib_address_operand" "Tv")
18116 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18117 (match_operand:SI 6 "const1248_operand" "n")]
18119 (mem:BLK (scratch))
18120 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18122 (parallel [(const_int 0) (const_int 1)
18123 (const_int 2) (const_int 3)])))
18124 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18126 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18127 [(set_attr "type" "ssemov")
18128 (set_attr "prefix" "vex")
18129 (set_attr "mode" "<sseinsnmode>")])
18131 (define_insn "*avx2_gatherdi<mode>_4"
18132 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18133 (vec_select:<VEC_GATHER_SRCDI>
18136 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18138 [(match_operand:P 2 "vsib_address_operand" "Tv")
18139 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18140 (match_operand:SI 5 "const1248_operand" "n")]
18142 (mem:BLK (scratch))
18143 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18145 (parallel [(const_int 0) (const_int 1)
18146 (const_int 2) (const_int 3)])))
18147 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18149 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18150 [(set_attr "type" "ssemov")
18151 (set_attr "prefix" "vex")
18152 (set_attr "mode" "<sseinsnmode>")])
18154 (define_expand "<avx512>_gathersi<mode>"
18155 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18157 [(match_operand:VI48F 1 "register_operand")
18158 (match_operand:<avx512fmaskmode> 4 "register_operand")
18159 (mem:<ssescalarmode>
18161 [(match_operand 2 "vsib_address_operand")
18162 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18163 (match_operand:SI 5 "const1248_operand")]))]
18165 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18169 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18170 operands[5]), UNSPEC_VSIBADDR);
18173 (define_insn "*avx512f_gathersi<mode>"
18174 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18176 [(match_operand:VI48F 1 "register_operand" "0")
18177 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18178 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18180 [(match_operand:P 4 "vsib_address_operand" "Tv")
18181 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18182 (match_operand:SI 5 "const1248_operand" "n")]
18183 UNSPEC_VSIBADDR)])]
18185 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18187 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18188 [(set_attr "type" "ssemov")
18189 (set_attr "prefix" "evex")
18190 (set_attr "mode" "<sseinsnmode>")])
18192 (define_insn "*avx512f_gathersi<mode>_2"
18193 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18196 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18197 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18199 [(match_operand:P 3 "vsib_address_operand" "Tv")
18200 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18201 (match_operand:SI 4 "const1248_operand" "n")]
18202 UNSPEC_VSIBADDR)])]
18204 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18206 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18207 [(set_attr "type" "ssemov")
18208 (set_attr "prefix" "evex")
18209 (set_attr "mode" "<sseinsnmode>")])
18212 (define_expand "<avx512>_gatherdi<mode>"
18213 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18215 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18216 (match_operand:QI 4 "register_operand")
18217 (mem:<ssescalarmode>
18219 [(match_operand 2 "vsib_address_operand")
18220 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18221 (match_operand:SI 5 "const1248_operand")]))]
18223 (clobber (match_scratch:QI 7))])]
18227 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18228 operands[5]), UNSPEC_VSIBADDR);
18231 (define_insn "*avx512f_gatherdi<mode>"
18232 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18234 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18235 (match_operand:QI 7 "register_operand" "2")
18236 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18238 [(match_operand:P 4 "vsib_address_operand" "Tv")
18239 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18240 (match_operand:SI 5 "const1248_operand" "n")]
18241 UNSPEC_VSIBADDR)])]
18243 (clobber (match_scratch:QI 2 "=&Yk"))]
18245 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18246 [(set_attr "type" "ssemov")
18247 (set_attr "prefix" "evex")
18248 (set_attr "mode" "<sseinsnmode>")])
18250 (define_insn "*avx512f_gatherdi<mode>_2"
18251 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18254 (match_operand:QI 6 "register_operand" "1")
18255 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18257 [(match_operand:P 3 "vsib_address_operand" "Tv")
18258 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18259 (match_operand:SI 4 "const1248_operand" "n")]
18260 UNSPEC_VSIBADDR)])]
18262 (clobber (match_scratch:QI 1 "=&Yk"))]
18265 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18267 if (<MODE_SIZE> != 64)
18268 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18270 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18272 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18274 [(set_attr "type" "ssemov")
18275 (set_attr "prefix" "evex")
18276 (set_attr "mode" "<sseinsnmode>")])
18278 (define_expand "<avx512>_scattersi<mode>"
18279 [(parallel [(set (mem:VI48F
18281 [(match_operand 0 "vsib_address_operand")
18282 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18283 (match_operand:SI 4 "const1248_operand")]))
18285 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18286 (match_operand:VI48F 3 "register_operand")]
18288 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18292 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18293 operands[4]), UNSPEC_VSIBADDR);
18296 (define_insn "*avx512f_scattersi<mode>"
18297 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18299 [(match_operand:P 0 "vsib_address_operand" "Tv")
18300 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18301 (match_operand:SI 4 "const1248_operand" "n")]
18304 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18305 (match_operand:VI48F 3 "register_operand" "v")]
18307 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18309 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18310 [(set_attr "type" "ssemov")
18311 (set_attr "prefix" "evex")
18312 (set_attr "mode" "<sseinsnmode>")])
18314 (define_expand "<avx512>_scatterdi<mode>"
18315 [(parallel [(set (mem:VI48F
18317 [(match_operand 0 "vsib_address_operand")
18318 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18319 (match_operand:SI 4 "const1248_operand")]))
18321 [(match_operand:QI 1 "register_operand")
18322 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18324 (clobber (match_scratch:QI 6))])]
18328 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18329 operands[4]), UNSPEC_VSIBADDR);
18332 (define_insn "*avx512f_scatterdi<mode>"
18333 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18335 [(match_operand:P 0 "vsib_address_operand" "Tv")
18336 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18337 (match_operand:SI 4 "const1248_operand" "n")]
18340 [(match_operand:QI 6 "register_operand" "1")
18341 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18343 (clobber (match_scratch:QI 1 "=&Yk"))]
18345 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18346 [(set_attr "type" "ssemov")
18347 (set_attr "prefix" "evex")
18348 (set_attr "mode" "<sseinsnmode>")])
18350 (define_insn "<avx512>_compress<mode>_mask"
18351 [(set (match_operand:VI48F 0 "register_operand" "=v")
18353 [(match_operand:VI48F 1 "register_operand" "v")
18354 (match_operand:VI48F 2 "vector_move_operand" "0C")
18355 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18358 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18359 [(set_attr "type" "ssemov")
18360 (set_attr "prefix" "evex")
18361 (set_attr "mode" "<sseinsnmode>")])
18363 (define_insn "<avx512>_compressstore<mode>_mask"
18364 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18366 [(match_operand:VI48F 1 "register_operand" "x")
18368 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18369 UNSPEC_COMPRESS_STORE))]
18371 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18372 [(set_attr "type" "ssemov")
18373 (set_attr "prefix" "evex")
18374 (set_attr "memory" "store")
18375 (set_attr "mode" "<sseinsnmode>")])
18377 (define_expand "<avx512>_expand<mode>_maskz"
18378 [(set (match_operand:VI48F 0 "register_operand")
18380 [(match_operand:VI48F 1 "nonimmediate_operand")
18381 (match_operand:VI48F 2 "vector_move_operand")
18382 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18385 "operands[2] = CONST0_RTX (<MODE>mode);")
18387 (define_insn "<avx512>_expand<mode>_mask"
18388 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18390 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18391 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18392 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18395 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18396 [(set_attr "type" "ssemov")
18397 (set_attr "prefix" "evex")
18398 (set_attr "memory" "none,load")
18399 (set_attr "mode" "<sseinsnmode>")])
18401 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18402 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18403 (unspec:VF_AVX512VL
18404 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18405 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18406 (match_operand:SI 3 "const_0_to_15_operand")]
18408 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18409 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
18410 [(set_attr "type" "sse")
18411 (set_attr "prefix" "evex")
18412 (set_attr "mode" "<MODE>")])
18414 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18415 [(set (match_operand:VF_128 0 "register_operand" "=v")
18418 [(match_operand:VF_128 1 "register_operand" "v")
18419 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18420 (match_operand:SI 3 "const_0_to_15_operand")]
18425 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
18426 [(set_attr "type" "sse")
18427 (set_attr "prefix" "evex")
18428 (set_attr "mode" "<MODE>")])
18430 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18431 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18432 (unspec:<avx512fmaskmode>
18433 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18434 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18437 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18438 [(set_attr "type" "sse")
18439 (set_attr "length_immediate" "1")
18440 (set_attr "prefix" "evex")
18441 (set_attr "mode" "<MODE>")])
18443 (define_insn "avx512dq_vmfpclass<mode>"
18444 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18445 (and:<avx512fmaskmode>
18446 (unspec:<avx512fmaskmode>
18447 [(match_operand:VF_128 1 "register_operand" "v")
18448 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18452 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18453 [(set_attr "type" "sse")
18454 (set_attr "length_immediate" "1")
18455 (set_attr "prefix" "evex")
18456 (set_attr "mode" "<MODE>")])
18458 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18459 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18460 (unspec:VF_AVX512VL
18461 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18462 (match_operand:SI 2 "const_0_to_15_operand")]
18465 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18466 [(set_attr "prefix" "evex")
18467 (set_attr "mode" "<MODE>")])
18469 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18470 [(set (match_operand:VF_128 0 "register_operand" "=v")
18473 [(match_operand:VF_128 1 "register_operand" "v")
18474 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18475 (match_operand:SI 3 "const_0_to_15_operand")]
18480 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18481 [(set_attr "prefix" "evex")
18482 (set_attr "mode" "<ssescalarmode>")])
18484 ;; The correct representation for this is absolutely enormous, and
18485 ;; surely not generally useful.
18486 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18487 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18488 (unspec:VI2_AVX512VL
18489 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18490 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18491 (match_operand:SI 3 "const_0_to_255_operand")]
18494 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18495 [(set_attr "isa" "avx")
18496 (set_attr "type" "sselog1")
18497 (set_attr "length_immediate" "1")
18498 (set_attr "prefix" "evex")
18499 (set_attr "mode" "<sseinsnmode>")])
18501 (define_insn "clz<mode>2<mask_name>"
18502 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18504 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18506 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18507 [(set_attr "type" "sse")
18508 (set_attr "prefix" "evex")
18509 (set_attr "mode" "<sseinsnmode>")])
18511 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18512 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18513 (unspec:VI48_AVX512VL
18514 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18517 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18518 [(set_attr "type" "sse")
18519 (set_attr "prefix" "evex")
18520 (set_attr "mode" "<sseinsnmode>")])
18522 (define_insn "sha1msg1"
18523 [(set (match_operand:V4SI 0 "register_operand" "=x")
18525 [(match_operand:V4SI 1 "register_operand" "0")
18526 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18529 "sha1msg1\t{%2, %0|%0, %2}"
18530 [(set_attr "type" "sselog1")
18531 (set_attr "mode" "TI")])
18533 (define_insn "sha1msg2"
18534 [(set (match_operand:V4SI 0 "register_operand" "=x")
18536 [(match_operand:V4SI 1 "register_operand" "0")
18537 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18540 "sha1msg2\t{%2, %0|%0, %2}"
18541 [(set_attr "type" "sselog1")
18542 (set_attr "mode" "TI")])
18544 (define_insn "sha1nexte"
18545 [(set (match_operand:V4SI 0 "register_operand" "=x")
18547 [(match_operand:V4SI 1 "register_operand" "0")
18548 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18549 UNSPEC_SHA1NEXTE))]
18551 "sha1nexte\t{%2, %0|%0, %2}"
18552 [(set_attr "type" "sselog1")
18553 (set_attr "mode" "TI")])
18555 (define_insn "sha1rnds4"
18556 [(set (match_operand:V4SI 0 "register_operand" "=x")
18558 [(match_operand:V4SI 1 "register_operand" "0")
18559 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18560 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18561 UNSPEC_SHA1RNDS4))]
18563 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18564 [(set_attr "type" "sselog1")
18565 (set_attr "length_immediate" "1")
18566 (set_attr "mode" "TI")])
18568 (define_insn "sha256msg1"
18569 [(set (match_operand:V4SI 0 "register_operand" "=x")
18571 [(match_operand:V4SI 1 "register_operand" "0")
18572 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18573 UNSPEC_SHA256MSG1))]
18575 "sha256msg1\t{%2, %0|%0, %2}"
18576 [(set_attr "type" "sselog1")
18577 (set_attr "mode" "TI")])
18579 (define_insn "sha256msg2"
18580 [(set (match_operand:V4SI 0 "register_operand" "=x")
18582 [(match_operand:V4SI 1 "register_operand" "0")
18583 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18584 UNSPEC_SHA256MSG2))]
18586 "sha256msg2\t{%2, %0|%0, %2}"
18587 [(set_attr "type" "sselog1")
18588 (set_attr "mode" "TI")])
18590 (define_insn "sha256rnds2"
18591 [(set (match_operand:V4SI 0 "register_operand" "=x")
18593 [(match_operand:V4SI 1 "register_operand" "0")
18594 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18595 (match_operand:V4SI 3 "register_operand" "Yz")]
18596 UNSPEC_SHA256RNDS2))]
18598 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18599 [(set_attr "type" "sselog1")
18600 (set_attr "length_immediate" "1")
18601 (set_attr "mode" "TI")])
18603 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18604 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18605 (unspec:AVX512MODE2P
18606 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18610 "&& reload_completed"
18613 rtx op0 = operands[0];
18614 rtx op1 = operands[1];
18616 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18618 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18619 emit_move_insn (op0, op1);
18623 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18624 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18625 (unspec:AVX512MODE2P
18626 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18630 "&& reload_completed"
18633 rtx op0 = operands[0];
18634 rtx op1 = operands[1];
18636 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18638 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18639 emit_move_insn (op0, op1);
18643 (define_int_iterator VPMADD52
18644 [UNSPEC_VPMADD52LUQ
18645 UNSPEC_VPMADD52HUQ])
18647 (define_int_attr vpmadd52type
18648 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18650 (define_expand "vpamdd52huq<mode>_maskz"
18651 [(match_operand:VI8_AVX512VL 0 "register_operand")
18652 (match_operand:VI8_AVX512VL 1 "register_operand")
18653 (match_operand:VI8_AVX512VL 2 "register_operand")
18654 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18655 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18656 "TARGET_AVX512IFMA"
18658 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18659 operands[0], operands[1], operands[2], operands[3],
18660 CONST0_RTX (<MODE>mode), operands[4]));
18664 (define_expand "vpamdd52luq<mode>_maskz"
18665 [(match_operand:VI8_AVX512VL 0 "register_operand")
18666 (match_operand:VI8_AVX512VL 1 "register_operand")
18667 (match_operand:VI8_AVX512VL 2 "register_operand")
18668 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18669 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18670 "TARGET_AVX512IFMA"
18672 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18673 operands[0], operands[1], operands[2], operands[3],
18674 CONST0_RTX (<MODE>mode), operands[4]));
18678 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18679 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18680 (unspec:VI8_AVX512VL
18681 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18682 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18683 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18685 "TARGET_AVX512IFMA"
18686 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18687 [(set_attr "type" "ssemuladd")
18688 (set_attr "prefix" "evex")
18689 (set_attr "mode" "<sseinsnmode>")])
18691 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
18692 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18693 (vec_merge:VI8_AVX512VL
18694 (unspec:VI8_AVX512VL
18695 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18696 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18697 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18700 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18701 "TARGET_AVX512IFMA"
18702 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18703 [(set_attr "type" "ssemuladd")
18704 (set_attr "prefix" "evex")
18705 (set_attr "mode" "<sseinsnmode>")])
18707 (define_insn "vpmultishiftqb<mode><mask_name>"
18708 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18709 (unspec:VI1_AVX512VL
18710 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18711 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18712 UNSPEC_VPMULTISHIFT))]
18713 "TARGET_AVX512VBMI"
18714 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18715 [(set_attr "type" "sselog")
18716 (set_attr "prefix" "evex")
18717 (set_attr "mode" "<sseinsnmode>")])