1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
132 ;; For AVX512BW support
140 ;; For AVX512DQ support
145 ;; For AVX512IFMA support
149 ;; For AVX512VBMI support
153 (define_c_enum "unspecv" [
163 ;; All vector modes including V?TImode, used in move patterns.
164 (define_mode_iterator VMOVE
165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
173 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174 (define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
180 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181 (define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
185 (define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
189 (define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
197 ;; All 128bit vector modes
198 (define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
201 ;; All 256bit vector modes
202 (define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
205 ;; All 512bit vector modes
206 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
208 ;; All 256bit and 512bit vector modes
209 (define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
214 ;; All vector float modes
215 (define_mode_iterator VF
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
219 ;; 128- and 256-bit float vector modes
220 (define_mode_iterator VF_128_256
221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
224 ;; All SFmode vector float modes
225 (define_mode_iterator VF1
226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
228 ;; 128- and 256-bit SF vector modes
229 (define_mode_iterator VF1_128_256
230 [(V8SF "TARGET_AVX") V4SF])
232 (define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
235 ;; All DFmode vector float modes
236 (define_mode_iterator VF2
237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
239 ;; 128- and 256-bit DF vector modes
240 (define_mode_iterator VF2_128_256
241 [(V4DF "TARGET_AVX") V2DF])
243 (define_mode_iterator VF2_512_256
244 [(V8DF "TARGET_AVX512F") V4DF])
246 (define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
249 ;; All 128bit vector float modes
250 (define_mode_iterator VF_128
251 [V4SF (V2DF "TARGET_SSE2")])
253 ;; All 256bit vector float modes
254 (define_mode_iterator VF_256
257 ;; All 512bit vector float modes
258 (define_mode_iterator VF_512
261 (define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
265 (define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
269 (define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
272 (define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
275 ;; All vector integer modes
276 (define_mode_iterator VI
277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
283 (define_mode_iterator VI_AVX2
284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
289 ;; All QImode vector integer modes
290 (define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
293 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
297 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
301 ;; All DImode vector integer modes
302 (define_mode_iterator VI8
303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
305 (define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
308 (define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
311 (define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
314 (define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
317 (define_mode_iterator VI2_AVX2
318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
320 (define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
323 (define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
326 (define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
329 (define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
332 (define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
335 (define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
339 (define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
342 (define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
345 (define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
348 (define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
351 (define_mode_iterator VI4_128_8_256
355 (define_mode_iterator V8FI
359 (define_mode_iterator V16FI
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
374 (define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
378 (define_mode_iterator VI124_AVX512F
379 [(V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
388 (define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
391 (define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
393 (V4DI "TARGET_AVX2") V2DI])
395 (define_mode_iterator VI248_AVX2_8_AVX512F
396 [(V16HI "TARGET_AVX2") V8HI
397 (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
400 (define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
404 ;; Suppose TARGET_AVX512VL as baseline
405 (define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
409 (define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
413 (define_mode_iterator V48_AVX2
416 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
417 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
419 (define_mode_attr avx512
420 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
421 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
422 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
423 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
424 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
425 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
427 (define_mode_attr sse2_avx_avx512f
428 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
430 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
432 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
433 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
435 (define_mode_attr sse2_avx2
436 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
437 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
438 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
439 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
440 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
442 (define_mode_attr ssse3_avx2
443 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
444 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
445 (V4SI "ssse3") (V8SI "avx2")
446 (V2DI "ssse3") (V4DI "avx2")
447 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
449 (define_mode_attr sse4_1_avx2
450 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
452 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
453 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
455 (define_mode_attr avx_avx2
456 [(V4SF "avx") (V2DF "avx")
457 (V8SF "avx") (V4DF "avx")
458 (V4SI "avx2") (V2DI "avx2")
459 (V8SI "avx2") (V4DI "avx2")])
461 (define_mode_attr vec_avx2
462 [(V16QI "vec") (V32QI "avx2")
463 (V8HI "vec") (V16HI "avx2")
464 (V4SI "vec") (V8SI "avx2")
465 (V2DI "vec") (V4DI "avx2")])
467 (define_mode_attr avx2_avx512
468 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
469 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
470 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
471 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
472 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
474 (define_mode_attr shuffletype
475 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
476 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
477 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
478 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
479 (V64QI "i") (V1TI "i") (V2TI "i")])
481 (define_mode_attr ssequartermode
482 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
484 (define_mode_attr ssedoublemodelower
485 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
486 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
487 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
489 (define_mode_attr ssedoublemode
490 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
491 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
492 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
493 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
495 (define_mode_attr ssebytemode
496 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
498 ;; All 128bit vector integer modes
499 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
501 ;; All 256bit vector integer modes
502 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
504 ;; All 512bit vector integer modes
505 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
507 ;; Various 128bit vector integer mode combinations
508 (define_mode_iterator VI12_128 [V16QI V8HI])
509 (define_mode_iterator VI14_128 [V16QI V4SI])
510 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
511 (define_mode_iterator VI24_128 [V8HI V4SI])
512 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
513 (define_mode_iterator VI48_128 [V4SI V2DI])
515 ;; Various 256bit and 512 vector integer mode combinations
516 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
517 (define_mode_iterator VI124_256_AVX512F_AVX512BW
519 (V64QI "TARGET_AVX512BW")
520 (V32HI "TARGET_AVX512BW")
521 (V16SI "TARGET_AVX512F")])
522 (define_mode_iterator VI48_256 [V8SI V4DI])
523 (define_mode_iterator VI48_512 [V16SI V8DI])
524 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
525 (define_mode_iterator VI_AVX512BW
526 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
528 ;; Int-float size matches
529 (define_mode_iterator VI4F_128 [V4SI V4SF])
530 (define_mode_iterator VI8F_128 [V2DI V2DF])
531 (define_mode_iterator VI4F_256 [V8SI V8SF])
532 (define_mode_iterator VI8F_256 [V4DI V4DF])
533 (define_mode_iterator VI8F_256_512
534 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
535 (define_mode_iterator VI48F_256_512
537 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
538 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
539 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
540 (define_mode_iterator VF48_I1248
541 [V16SI V16SF V8DI V8DF V32HI V64QI])
542 (define_mode_iterator VI48F
543 [V16SI V16SF V8DI V8DF
544 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
545 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
546 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
547 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
548 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
550 ;; Mapping from float mode to required SSE level
551 (define_mode_attr sse
552 [(SF "sse") (DF "sse2")
553 (V4SF "sse") (V2DF "sse2")
554 (V16SF "avx512f") (V8SF "avx")
555 (V8DF "avx512f") (V4DF "avx")])
557 (define_mode_attr sse2
558 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
559 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
561 (define_mode_attr sse3
562 [(V16QI "sse3") (V32QI "avx")])
564 (define_mode_attr sse4_1
565 [(V4SF "sse4_1") (V2DF "sse4_1")
566 (V8SF "avx") (V4DF "avx")
569 (define_mode_attr avxsizesuffix
570 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
571 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
572 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
573 (V16SF "512") (V8DF "512")
574 (V8SF "256") (V4DF "256")
575 (V4SF "") (V2DF "")])
577 ;; SSE instruction mode
578 (define_mode_attr sseinsnmode
579 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
580 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
581 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
582 (V16SF "V16SF") (V8DF "V8DF")
583 (V8SF "V8SF") (V4DF "V4DF")
584 (V4SF "V4SF") (V2DF "V2DF")
587 ;; Mapping of vector modes to corresponding mask size
588 (define_mode_attr avx512fmaskmode
589 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
590 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
591 (V16SI "HI") (V8SI "QI") (V4SI "QI")
592 (V8DI "QI") (V4DI "QI") (V2DI "QI")
593 (V16SF "HI") (V8SF "QI") (V4SF "QI")
594 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
596 ;; Mapping of vector float modes to an integer mode of the same size
597 (define_mode_attr sseintvecmode
598 [(V16SF "V16SI") (V8DF "V8DI")
599 (V8SF "V8SI") (V4DF "V4DI")
600 (V4SF "V4SI") (V2DF "V2DI")
601 (V16SI "V16SI") (V8DI "V8DI")
602 (V8SI "V8SI") (V4DI "V4DI")
603 (V4SI "V4SI") (V2DI "V2DI")
604 (V16HI "V16HI") (V8HI "V8HI")
605 (V32HI "V32HI") (V64QI "V64QI")
606 (V32QI "V32QI") (V16QI "V16QI")])
608 (define_mode_attr sseintvecmode2
609 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
610 (V8SF "OI") (V4SF "TI")])
612 (define_mode_attr sseintvecmodelower
613 [(V16SF "v16si") (V8DF "v8di")
614 (V8SF "v8si") (V4DF "v4di")
615 (V4SF "v4si") (V2DF "v2di")
616 (V8SI "v8si") (V4DI "v4di")
617 (V4SI "v4si") (V2DI "v2di")
618 (V16HI "v16hi") (V8HI "v8hi")
619 (V32QI "v32qi") (V16QI "v16qi")])
621 ;; Mapping of vector modes to a vector mode of double size
622 (define_mode_attr ssedoublevecmode
623 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
624 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
625 (V8SF "V16SF") (V4DF "V8DF")
626 (V4SF "V8SF") (V2DF "V4DF")])
628 ;; Mapping of vector modes to a vector mode of half size
629 (define_mode_attr ssehalfvecmode
630 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
631 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
632 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
633 (V16SF "V8SF") (V8DF "V4DF")
634 (V8SF "V4SF") (V4DF "V2DF")
637 ;; Mapping of vector modes ti packed single mode of the same size
638 (define_mode_attr ssePSmode
639 [(V16SI "V16SF") (V8DF "V16SF")
640 (V16SF "V16SF") (V8DI "V16SF")
641 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
642 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
643 (V8SI "V8SF") (V4SI "V4SF")
644 (V4DI "V8SF") (V2DI "V4SF")
645 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
646 (V8SF "V8SF") (V4SF "V4SF")
647 (V4DF "V8SF") (V2DF "V4SF")])
649 (define_mode_attr ssePSmode2
650 [(V8DI "V8SF") (V4DI "V4SF")])
652 ;; Mapping of vector modes back to the scalar modes
653 (define_mode_attr ssescalarmode
654 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
655 (V32HI "HI") (V16HI "HI") (V8HI "HI")
656 (V16SI "SI") (V8SI "SI") (V4SI "SI")
657 (V8DI "DI") (V4DI "DI") (V2DI "DI")
658 (V16SF "SF") (V8SF "SF") (V4SF "SF")
659 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
661 ;; Mapping of vector modes to the 128bit modes
662 (define_mode_attr ssexmmmode
663 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
664 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
665 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
666 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
667 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
668 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
670 ;; Pointer size override for scalar modes (Intel asm dialect)
671 (define_mode_attr iptr
672 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
673 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
674 (V8SF "k") (V4DF "q")
675 (V4SF "k") (V2DF "q")
678 ;; Number of scalar elements in each vector type
679 (define_mode_attr ssescalarnum
680 [(V64QI "64") (V16SI "16") (V8DI "8")
681 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
682 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
683 (V16SF "16") (V8DF "8")
684 (V8SF "8") (V4DF "4")
685 (V4SF "4") (V2DF "2")])
687 ;; Mask of scalar elements in each vector type
688 (define_mode_attr ssescalarnummask
689 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
690 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
691 (V8SF "7") (V4DF "3")
692 (V4SF "3") (V2DF "1")])
694 (define_mode_attr ssescalarsize
695 [(V8DI "64") (V4DI "64") (V2DI "64")
696 (V64QI "8") (V32QI "8") (V16QI "8")
697 (V32HI "16") (V16HI "16") (V8HI "16")
698 (V16SI "32") (V8SI "32") (V4SI "32")
699 (V16SF "32") (V8DF "64")])
701 ;; SSE prefix for integer vector modes
702 (define_mode_attr sseintprefix
703 [(V2DI "p") (V2DF "")
708 (V16SI "p") (V16SF "")
709 (V16QI "p") (V8HI "p")
710 (V32QI "p") (V16HI "p")
711 (V64QI "p") (V32HI "p")])
713 ;; SSE scalar suffix for vector modes
714 (define_mode_attr ssescalarmodesuffix
716 (V8SF "ss") (V4DF "sd")
717 (V4SF "ss") (V2DF "sd")
718 (V8SI "ss") (V4DI "sd")
721 ;; Pack/unpack vector modes
722 (define_mode_attr sseunpackmode
723 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
724 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
725 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
727 (define_mode_attr ssepackmode
728 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
729 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
730 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
732 ;; Mapping of the max integer size for xop rotate immediate constraint
733 (define_mode_attr sserotatemax
734 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
736 ;; Mapping of mode to cast intrinsic name
737 (define_mode_attr castmode
738 [(V8SI "si") (V8SF "ps") (V4DF "pd")
739 (V16SI "si") (V16SF "ps") (V8DF "pd")])
741 ;; Instruction suffix for sign and zero extensions.
742 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
744 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
745 ;; i64x4 or f64x4 for 512bit modes.
746 (define_mode_attr i128
747 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
748 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
749 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
752 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
753 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
755 ;; Mapping for dbpsabbw modes
756 (define_mode_attr dbpsadbwmode
757 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
759 ;; Mapping suffixes for broadcast
760 (define_mode_attr bcstscalarsuff
761 [(V64QI "b") (V32QI "b") (V16QI "b")
762 (V32HI "w") (V16HI "w") (V8HI "w")
763 (V16SI "d") (V8SI "d") (V4SI "d")
764 (V8DI "q") (V4DI "q") (V2DI "q")
765 (V16SF "ss") (V8SF "ss") (V4SF "ss")
766 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
768 ;; Tie mode of assembler operand to mode iterator
769 (define_mode_attr concat_tg_mode
770 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
771 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
774 ;; Include define_subst patterns for instructions with mask
777 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; All of these patterns are enabled for SSE1 as well as SSE2.
786 ;; This is essential for maintaining stable calling conventions.
788 (define_expand "mov<mode>"
789 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
790 (match_operand:VMOVE 1 "nonimmediate_operand"))]
793 ix86_expand_vector_move (<MODE>mode, operands);
797 (define_insn "*mov<mode>_internal"
798 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
799 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
801 && (register_operand (operands[0], <MODE>mode)
802 || register_operand (operands[1], <MODE>mode))"
804 int mode = get_attr_mode (insn);
805 switch (which_alternative)
808 return standard_sse_constant_opcode (insn, operands[1]);
811 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
812 in avx512f, so we need to use workarounds, to access sse registers
813 16-31, which are evex-only. In avx512vl we don't need workarounds. */
814 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
815 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
816 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
818 if (memory_operand (operands[0], <MODE>mode))
820 if (<MODE_SIZE> == 32)
821 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
822 else if (<MODE_SIZE> == 16)
823 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
827 else if (memory_operand (operands[1], <MODE>mode))
829 if (<MODE_SIZE> == 32)
830 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
831 else if (<MODE_SIZE> == 16)
832 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
837 /* Reg -> reg move is always aligned. Just use wider move. */
842 return "vmovaps\t{%g1, %g0|%g0, %g1}";
845 return "vmovapd\t{%g1, %g0|%g0, %g1}";
848 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
859 && (misaligned_operand (operands[0], <MODE>mode)
860 || misaligned_operand (operands[1], <MODE>mode)))
861 return "vmovups\t{%1, %0|%0, %1}";
863 return "%vmovaps\t{%1, %0|%0, %1}";
869 && (misaligned_operand (operands[0], <MODE>mode)
870 || misaligned_operand (operands[1], <MODE>mode)))
871 return "vmovupd\t{%1, %0|%0, %1}";
873 return "%vmovapd\t{%1, %0|%0, %1}";
878 && (misaligned_operand (operands[0], <MODE>mode)
879 || misaligned_operand (operands[1], <MODE>mode)))
880 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
881 : "vmovdqu\t{%1, %0|%0, %1}";
883 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
884 : "%vmovdqa\t{%1, %0|%0, %1}";
886 if (misaligned_operand (operands[0], <MODE>mode)
887 || misaligned_operand (operands[1], <MODE>mode))
888 return "vmovdqu64\t{%1, %0|%0, %1}";
890 return "vmovdqa64\t{%1, %0|%0, %1}";
899 [(set_attr "type" "sselog1,ssemov,ssemov")
900 (set_attr "prefix" "maybe_vex")
902 (cond [(and (match_test "<MODE_SIZE> == 16")
903 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
904 (and (eq_attr "alternative" "2")
905 (match_test "TARGET_SSE_TYPELESS_STORES"))))
906 (const_string "<ssePSmode>")
907 (match_test "TARGET_AVX")
908 (const_string "<sseinsnmode>")
909 (ior (not (match_test "TARGET_SSE2"))
910 (match_test "optimize_function_for_size_p (cfun)"))
911 (const_string "V4SF")
912 (and (eq_attr "alternative" "0")
913 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
916 (const_string "<sseinsnmode>")))])
918 (define_insn "<avx512>_load<mode>_mask"
919 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
920 (vec_merge:V48_AVX512VL
921 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
922 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
923 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
926 static char buf [64];
929 const char *sse_suffix;
931 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
934 sse_suffix = "<ssemodesuffix>";
939 sse_suffix = "<ssescalarsize>";
942 if (misaligned_operand (operands[1], <MODE>mode))
947 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
948 insn_op, align, sse_suffix);
951 [(set_attr "type" "ssemov")
952 (set_attr "prefix" "evex")
953 (set_attr "memory" "none,load")
954 (set_attr "mode" "<sseinsnmode>")])
956 (define_insn "<avx512>_load<mode>_mask"
957 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
958 (vec_merge:VI12_AVX512VL
959 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
960 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
961 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
963 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
964 [(set_attr "type" "ssemov")
965 (set_attr "prefix" "evex")
966 (set_attr "memory" "none,load")
967 (set_attr "mode" "<sseinsnmode>")])
969 (define_insn "<avx512>_blendm<mode>"
970 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
971 (vec_merge:V48_AVX512VL
972 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
973 (match_operand:V48_AVX512VL 1 "register_operand" "v")
974 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
976 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
977 [(set_attr "type" "ssemov")
978 (set_attr "prefix" "evex")
979 (set_attr "mode" "<sseinsnmode>")])
981 (define_insn "<avx512>_blendm<mode>"
982 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
983 (vec_merge:VI12_AVX512VL
984 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
985 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
986 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
988 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
989 [(set_attr "type" "ssemov")
990 (set_attr "prefix" "evex")
991 (set_attr "mode" "<sseinsnmode>")])
993 (define_insn "<avx512>_store<mode>_mask"
994 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
995 (vec_merge:V48_AVX512VL
996 (match_operand:V48_AVX512VL 1 "register_operand" "v")
998 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1001 static char buf [64];
1003 const char *insn_op;
1004 const char *sse_suffix;
1006 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1009 sse_suffix = "<ssemodesuffix>";
1014 sse_suffix = "<ssescalarsize>";
1017 if (misaligned_operand (operands[1], <MODE>mode))
1022 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1023 insn_op, align, sse_suffix);
1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "memory" "store")
1029 (set_attr "mode" "<sseinsnmode>")])
1031 (define_insn "<avx512>_store<mode>_mask"
1032 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1033 (vec_merge:VI12_AVX512VL
1034 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1036 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1038 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1039 [(set_attr "type" "ssemov")
1040 (set_attr "prefix" "evex")
1041 (set_attr "memory" "store")
1042 (set_attr "mode" "<sseinsnmode>")])
1044 (define_insn "sse2_movq128"
1045 [(set (match_operand:V2DI 0 "register_operand" "=x")
1048 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1049 (parallel [(const_int 0)]))
1052 "%vmovq\t{%1, %0|%0, %q1}"
1053 [(set_attr "type" "ssemov")
1054 (set_attr "prefix" "maybe_vex")
1055 (set_attr "mode" "TI")])
1057 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1058 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1059 ;; from memory, we'd prefer to load the memory directly into the %xmm
1060 ;; register. To facilitate this happy circumstance, this pattern won't
1061 ;; split until after register allocation. If the 64-bit value didn't
1062 ;; come from memory, this is the best we can do. This is much better
1063 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1066 (define_insn_and_split "movdi_to_sse"
1068 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1069 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1070 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1071 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1073 "&& reload_completed"
1076 if (register_operand (operands[1], DImode))
1078 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1079 Assemble the 64-bit DImode value in an xmm register. */
1080 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1081 gen_lowpart (SImode, operands[1])));
1082 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1083 gen_highpart (SImode, operands[1])));
1084 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1087 else if (memory_operand (operands[1], DImode))
1089 rtx tmp = gen_reg_rtx (V2DImode);
1090 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1091 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1098 [(set (match_operand:V4SF 0 "register_operand")
1099 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1100 "TARGET_SSE && reload_completed"
1103 (vec_duplicate:V4SF (match_dup 1))
1107 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1108 operands[2] = CONST0_RTX (V4SFmode);
1112 [(set (match_operand:V2DF 0 "register_operand")
1113 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1114 "TARGET_SSE2 && reload_completed"
1115 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1117 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1118 operands[2] = CONST0_RTX (DFmode);
1121 (define_expand "movmisalign<mode>"
1122 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1123 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1126 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1130 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1131 [(set (match_operand:VF 0 "register_operand")
1132 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1134 "TARGET_SSE && <mask_mode512bit_condition>"
1136 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1137 just fine if misaligned_operand is true, and without the UNSPEC it can
1138 be combined with arithmetic instructions. If misaligned_operand is
1139 false, still emit UNSPEC_LOADU insn to honor user's request for
1142 && misaligned_operand (operands[1], <MODE>mode))
1144 rtx src = operands[1];
1146 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1147 operands[2 * <mask_applied>],
1148 operands[3 * <mask_applied>]);
1149 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1154 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1155 [(set (match_operand:VF 0 "register_operand" "=v")
1157 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1159 "TARGET_SSE && <mask_mode512bit_condition>"
1161 switch (get_attr_mode (insn))
1166 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1168 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1171 [(set_attr "type" "ssemov")
1172 (set_attr "movu" "1")
1173 (set_attr "ssememalign" "8")
1174 (set_attr "prefix" "maybe_vex")
1176 (cond [(and (match_test "<MODE_SIZE> == 16")
1177 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1178 (const_string "<ssePSmode>")
1179 (match_test "TARGET_AVX")
1180 (const_string "<MODE>")
1181 (match_test "optimize_function_for_size_p (cfun)")
1182 (const_string "V4SF")
1184 (const_string "<MODE>")))])
1186 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1187 [(set (match_operand:VF 0 "memory_operand" "=m")
1189 [(match_operand:VF 1 "register_operand" "v")]
1193 switch (get_attr_mode (insn))
1198 return "%vmovups\t{%1, %0|%0, %1}";
1200 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1203 [(set_attr "type" "ssemov")
1204 (set_attr "movu" "1")
1205 (set_attr "ssememalign" "8")
1206 (set_attr "prefix" "maybe_vex")
1208 (cond [(and (match_test "<MODE_SIZE> == 16")
1209 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1210 (match_test "TARGET_SSE_TYPELESS_STORES")))
1211 (const_string "<ssePSmode>")
1212 (match_test "TARGET_AVX")
1213 (const_string "<MODE>")
1214 (match_test "optimize_function_for_size_p (cfun)")
1215 (const_string "V4SF")
1217 (const_string "<MODE>")))])
1219 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1220 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1221 (vec_merge:VF_AVX512VL
1223 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1226 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1229 switch (get_attr_mode (insn))
1234 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1236 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1239 [(set_attr "type" "ssemov")
1240 (set_attr "movu" "1")
1241 (set_attr "memory" "store")
1242 (set_attr "prefix" "evex")
1243 (set_attr "mode" "<sseinsnmode>")])
1245 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1246 just fine if misaligned_operand is true, and without the UNSPEC it can
1247 be combined with arithmetic instructions. If misaligned_operand is
1248 false, still emit UNSPEC_LOADU insn to honor user's request for
1250 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1251 [(set (match_operand:VI1 0 "register_operand")
1253 [(match_operand:VI1 1 "nonimmediate_operand")]
1255 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1258 && misaligned_operand (operands[1], <MODE>mode))
1260 rtx src = operands[1];
1262 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1263 operands[2 * <mask_applied>],
1264 operands[3 * <mask_applied>]);
1265 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1270 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1271 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1272 (unspec:VI_ULOADSTORE_BW_AVX512VL
1273 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1277 if (misaligned_operand (operands[1], <MODE>mode))
1279 rtx src = operands[1];
1281 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1282 operands[2 * <mask_applied>],
1283 operands[3 * <mask_applied>]);
1284 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1289 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1290 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1291 (unspec:VI_ULOADSTORE_F_AVX512VL
1292 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1296 if (misaligned_operand (operands[1], <MODE>mode))
1298 rtx src = operands[1];
1300 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1301 operands[2 * <mask_applied>],
1302 operands[3 * <mask_applied>]);
1303 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1308 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1309 [(set (match_operand:VI1 0 "register_operand" "=v")
1311 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1313 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1315 switch (get_attr_mode (insn))
1319 return "%vmovups\t{%1, %0|%0, %1}";
1321 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1322 return "%vmovdqu\t{%1, %0|%0, %1}";
1324 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1327 [(set_attr "type" "ssemov")
1328 (set_attr "movu" "1")
1329 (set_attr "ssememalign" "8")
1330 (set (attr "prefix_data16")
1332 (match_test "TARGET_AVX")
1334 (const_string "1")))
1335 (set_attr "prefix" "maybe_vex")
1337 (cond [(and (match_test "<MODE_SIZE> == 16")
1338 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1339 (const_string "<ssePSmode>")
1340 (match_test "TARGET_AVX")
1341 (const_string "<sseinsnmode>")
1342 (match_test "optimize_function_for_size_p (cfun)")
1343 (const_string "V4SF")
1345 (const_string "<sseinsnmode>")))])
1347 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1348 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1349 (unspec:VI_ULOADSTORE_BW_AVX512VL
1350 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1353 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1354 [(set_attr "type" "ssemov")
1355 (set_attr "movu" "1")
1356 (set_attr "ssememalign" "8")
1357 (set_attr "prefix" "maybe_evex")])
1359 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1360 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1361 (unspec:VI_ULOADSTORE_F_AVX512VL
1362 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1365 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1366 [(set_attr "type" "ssemov")
1367 (set_attr "movu" "1")
1368 (set_attr "ssememalign" "8")
1369 (set_attr "prefix" "maybe_evex")])
1371 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1372 [(set (match_operand:VI1 0 "memory_operand" "=m")
1374 [(match_operand:VI1 1 "register_operand" "v")]
1378 switch (get_attr_mode (insn))
1383 return "%vmovups\t{%1, %0|%0, %1}";
1389 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1390 return "%vmovdqu\t{%1, %0|%0, %1}";
1392 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1396 [(set_attr "type" "ssemov")
1397 (set_attr "movu" "1")
1398 (set_attr "ssememalign" "8")
1399 (set (attr "prefix_data16")
1401 (match_test "TARGET_AVX")
1403 (const_string "1")))
1404 (set_attr "prefix" "maybe_vex")
1406 (cond [(and (match_test "<MODE_SIZE> == 16")
1407 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1408 (match_test "TARGET_SSE_TYPELESS_STORES")))
1409 (const_string "<ssePSmode>")
1410 (match_test "TARGET_AVX")
1411 (const_string "<sseinsnmode>")
1412 (match_test "optimize_function_for_size_p (cfun)")
1413 (const_string "V4SF")
1415 (const_string "<sseinsnmode>")))])
1417 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1418 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1419 (unspec:VI_ULOADSTORE_BW_AVX512VL
1420 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1423 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1424 [(set_attr "type" "ssemov")
1425 (set_attr "movu" "1")
1426 (set_attr "ssememalign" "8")
1427 (set_attr "prefix" "maybe_evex")])
1429 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1430 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1431 (unspec:VI_ULOADSTORE_F_AVX512VL
1432 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1435 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1436 [(set_attr "type" "ssemov")
1437 (set_attr "movu" "1")
1438 (set_attr "ssememalign" "8")
1439 (set_attr "prefix" "maybe_vex")])
1441 (define_insn "<avx512>_storedqu<mode>_mask"
1442 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1443 (vec_merge:VI48_AVX512VL
1444 (unspec:VI48_AVX512VL
1445 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1448 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1450 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1451 [(set_attr "type" "ssemov")
1452 (set_attr "movu" "1")
1453 (set_attr "memory" "store")
1454 (set_attr "prefix" "evex")
1455 (set_attr "mode" "<sseinsnmode>")])
1457 (define_insn "<avx512>_storedqu<mode>_mask"
1458 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1459 (vec_merge:VI12_AVX512VL
1460 (unspec:VI12_AVX512VL
1461 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1464 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1466 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1467 [(set_attr "type" "ssemov")
1468 (set_attr "movu" "1")
1469 (set_attr "memory" "store")
1470 (set_attr "prefix" "evex")
1471 (set_attr "mode" "<sseinsnmode>")])
1473 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1474 [(set (match_operand:VI1 0 "register_operand" "=x")
1475 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1478 "%vlddqu\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "ssemov")
1480 (set_attr "movu" "1")
1481 (set_attr "ssememalign" "8")
1482 (set (attr "prefix_data16")
1484 (match_test "TARGET_AVX")
1486 (const_string "0")))
1487 (set (attr "prefix_rep")
1489 (match_test "TARGET_AVX")
1491 (const_string "1")))
1492 (set_attr "prefix" "maybe_vex")
1493 (set_attr "mode" "<sseinsnmode>")])
1495 (define_insn "sse2_movnti<mode>"
1496 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1497 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1500 "movnti\t{%1, %0|%0, %1}"
1501 [(set_attr "type" "ssemov")
1502 (set_attr "prefix_data16" "0")
1503 (set_attr "mode" "<MODE>")])
1505 (define_insn "<sse>_movnt<mode>"
1506 [(set (match_operand:VF 0 "memory_operand" "=m")
1508 [(match_operand:VF 1 "register_operand" "v")]
1511 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1512 [(set_attr "type" "ssemov")
1513 (set_attr "prefix" "maybe_vex")
1514 (set_attr "mode" "<MODE>")])
1516 (define_insn "<sse2>_movnt<mode>"
1517 [(set (match_operand:VI8 0 "memory_operand" "=m")
1518 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1521 "%vmovntdq\t{%1, %0|%0, %1}"
1522 [(set_attr "type" "ssecvt")
1523 (set (attr "prefix_data16")
1525 (match_test "TARGET_AVX")
1527 (const_string "1")))
1528 (set_attr "prefix" "maybe_vex")
1529 (set_attr "mode" "<sseinsnmode>")])
1531 ; Expand patterns for non-temporal stores. At the moment, only those
1532 ; that directly map to insns are defined; it would be possible to
1533 ; define patterns for other modes that would expand to several insns.
1535 ;; Modes handled by storent patterns.
1536 (define_mode_iterator STORENT_MODE
1537 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1538 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1540 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1541 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1543 (define_expand "storent<mode>"
1544 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1545 (unspec:STORENT_MODE
1546 [(match_operand:STORENT_MODE 1 "register_operand")]
1550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1552 ;; Parallel floating point arithmetic
1554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1556 (define_expand "<code><mode>2"
1557 [(set (match_operand:VF 0 "register_operand")
1559 (match_operand:VF 1 "register_operand")))]
1561 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1563 (define_insn_and_split "*absneg<mode>2"
1564 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1565 (match_operator:VF 3 "absneg_operator"
1566 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1567 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1570 "&& reload_completed"
1573 enum rtx_code absneg_op;
1579 if (MEM_P (operands[1]))
1580 op1 = operands[2], op2 = operands[1];
1582 op1 = operands[1], op2 = operands[2];
1587 if (rtx_equal_p (operands[0], operands[1]))
1593 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1594 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1595 t = gen_rtx_SET (VOIDmode, operands[0], t);
1599 [(set_attr "isa" "noavx,noavx,avx,avx")])
1601 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1602 [(set (match_operand:VF 0 "register_operand")
1604 (match_operand:VF 1 "<round_nimm_predicate>")
1605 (match_operand:VF 2 "<round_nimm_predicate>")))]
1606 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1607 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1609 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1610 [(set (match_operand:VF 0 "register_operand" "=x,v")
1612 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1613 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1614 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1616 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1617 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1618 [(set_attr "isa" "noavx,avx")
1619 (set_attr "type" "sseadd")
1620 (set_attr "prefix" "<mask_prefix3>")
1621 (set_attr "mode" "<MODE>")])
1623 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1624 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1627 (match_operand:VF_128 1 "register_operand" "0,v")
1628 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1633 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1634 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1635 [(set_attr "isa" "noavx,avx")
1636 (set_attr "type" "sseadd")
1637 (set_attr "prefix" "<round_prefix>")
1638 (set_attr "mode" "<ssescalarmode>")])
1640 (define_expand "mul<mode>3<mask_name><round_name>"
1641 [(set (match_operand:VF 0 "register_operand")
1643 (match_operand:VF 1 "<round_nimm_predicate>")
1644 (match_operand:VF 2 "<round_nimm_predicate>")))]
1645 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1646 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1648 (define_insn "*mul<mode>3<mask_name><round_name>"
1649 [(set (match_operand:VF 0 "register_operand" "=x,v")
1651 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1652 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1653 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1655 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1656 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1657 [(set_attr "isa" "noavx,avx")
1658 (set_attr "type" "ssemul")
1659 (set_attr "prefix" "<mask_prefix3>")
1660 (set_attr "btver2_decode" "direct,double")
1661 (set_attr "mode" "<MODE>")])
1663 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1664 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1667 (match_operand:VF_128 1 "register_operand" "0,v")
1668 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1673 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1674 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1675 [(set_attr "isa" "noavx,avx")
1676 (set_attr "type" "sse<multdiv_mnemonic>")
1677 (set_attr "prefix" "<round_prefix>")
1678 (set_attr "btver2_decode" "direct,double")
1679 (set_attr "mode" "<ssescalarmode>")])
1681 (define_expand "div<mode>3"
1682 [(set (match_operand:VF2 0 "register_operand")
1683 (div:VF2 (match_operand:VF2 1 "register_operand")
1684 (match_operand:VF2 2 "nonimmediate_operand")))]
1686 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1688 (define_expand "div<mode>3"
1689 [(set (match_operand:VF1 0 "register_operand")
1690 (div:VF1 (match_operand:VF1 1 "register_operand")
1691 (match_operand:VF1 2 "nonimmediate_operand")))]
1694 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1697 && TARGET_RECIP_VEC_DIV
1698 && !optimize_insn_for_size_p ()
1699 && flag_finite_math_only && !flag_trapping_math
1700 && flag_unsafe_math_optimizations)
1702 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1707 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1708 [(set (match_operand:VF 0 "register_operand" "=x,v")
1710 (match_operand:VF 1 "register_operand" "0,v")
1711 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1712 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1714 div<ssemodesuffix>\t{%2, %0|%0, %2}
1715 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1716 [(set_attr "isa" "noavx,avx")
1717 (set_attr "type" "ssediv")
1718 (set_attr "prefix" "<mask_prefix3>")
1719 (set_attr "mode" "<MODE>")])
1721 (define_insn "<sse>_rcp<mode>2"
1722 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1724 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1726 "%vrcpps\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "sse")
1728 (set_attr "atom_sse_attr" "rcp")
1729 (set_attr "btver2_sse_attr" "rcp")
1730 (set_attr "prefix" "maybe_vex")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "sse_vmrcpv4sf2"
1734 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1736 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1738 (match_operand:V4SF 2 "register_operand" "0,x")
1742 rcpss\t{%1, %0|%0, %k1}
1743 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1744 [(set_attr "isa" "noavx,avx")
1745 (set_attr "type" "sse")
1746 (set_attr "ssememalign" "32")
1747 (set_attr "atom_sse_attr" "rcp")
1748 (set_attr "btver2_sse_attr" "rcp")
1749 (set_attr "prefix" "orig,vex")
1750 (set_attr "mode" "SF")])
1752 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1753 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1755 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1758 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1759 [(set_attr "type" "sse")
1760 (set_attr "prefix" "evex")
1761 (set_attr "mode" "<MODE>")])
1763 (define_insn "srcp14<mode>"
1764 [(set (match_operand:VF_128 0 "register_operand" "=v")
1767 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1769 (match_operand:VF_128 2 "register_operand" "v")
1772 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1773 [(set_attr "type" "sse")
1774 (set_attr "prefix" "evex")
1775 (set_attr "mode" "<MODE>")])
1777 (define_expand "sqrt<mode>2"
1778 [(set (match_operand:VF2 0 "register_operand")
1779 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1782 (define_expand "sqrt<mode>2"
1783 [(set (match_operand:VF1 0 "register_operand")
1784 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1788 && TARGET_RECIP_VEC_SQRT
1789 && !optimize_insn_for_size_p ()
1790 && flag_finite_math_only && !flag_trapping_math
1791 && flag_unsafe_math_optimizations)
1793 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1798 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1799 [(set (match_operand:VF 0 "register_operand" "=v")
1800 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1801 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1802 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1803 [(set_attr "type" "sse")
1804 (set_attr "atom_sse_attr" "sqrt")
1805 (set_attr "btver2_sse_attr" "sqrt")
1806 (set_attr "prefix" "maybe_vex")
1807 (set_attr "mode" "<MODE>")])
1809 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1810 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1813 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1814 (match_operand:VF_128 2 "register_operand" "0,v")
1818 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1819 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1820 [(set_attr "isa" "noavx,avx")
1821 (set_attr "type" "sse")
1822 (set_attr "atom_sse_attr" "sqrt")
1823 (set_attr "prefix" "<round_prefix>")
1824 (set_attr "btver2_sse_attr" "sqrt")
1825 (set_attr "mode" "<ssescalarmode>")])
1827 (define_expand "rsqrt<mode>2"
1828 [(set (match_operand:VF1_128_256 0 "register_operand")
1830 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1833 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1837 (define_insn "<sse>_rsqrt<mode>2"
1838 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1840 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1842 "%vrsqrtps\t{%1, %0|%0, %1}"
1843 [(set_attr "type" "sse")
1844 (set_attr "prefix" "maybe_vex")
1845 (set_attr "mode" "<MODE>")])
1847 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1848 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1850 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1853 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1854 [(set_attr "type" "sse")
1855 (set_attr "prefix" "evex")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "rsqrt14<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=v")
1862 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1864 (match_operand:VF_128 2 "register_operand" "v")
1867 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1868 [(set_attr "type" "sse")
1869 (set_attr "prefix" "evex")
1870 (set_attr "mode" "<MODE>")])
1872 (define_insn "sse_vmrsqrtv4sf2"
1873 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1875 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1877 (match_operand:V4SF 2 "register_operand" "0,x")
1881 rsqrtss\t{%1, %0|%0, %k1}
1882 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1883 [(set_attr "isa" "noavx,avx")
1884 (set_attr "type" "sse")
1885 (set_attr "ssememalign" "32")
1886 (set_attr "prefix" "orig,vex")
1887 (set_attr "mode" "SF")])
1889 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1890 ;; isn't really correct, as those rtl operators aren't defined when
1891 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1893 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1894 [(set (match_operand:VF 0 "register_operand")
1896 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1897 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1898 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1900 if (!flag_finite_math_only)
1901 operands[1] = force_reg (<MODE>mode, operands[1]);
1902 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1905 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1906 [(set (match_operand:VF 0 "register_operand" "=x,v")
1908 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1909 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1910 "TARGET_SSE && flag_finite_math_only
1911 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1912 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1914 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1915 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1916 [(set_attr "isa" "noavx,avx")
1917 (set_attr "type" "sseadd")
1918 (set_attr "btver2_sse_attr" "maxmin")
1919 (set_attr "prefix" "<mask_prefix3>")
1920 (set_attr "mode" "<MODE>")])
1922 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1923 [(set (match_operand:VF 0 "register_operand" "=x,v")
1925 (match_operand:VF 1 "register_operand" "0,v")
1926 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1927 "TARGET_SSE && !flag_finite_math_only
1928 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1930 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1931 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1932 [(set_attr "isa" "noavx,avx")
1933 (set_attr "type" "sseadd")
1934 (set_attr "btver2_sse_attr" "maxmin")
1935 (set_attr "prefix" "<mask_prefix3>")
1936 (set_attr "mode" "<MODE>")])
1938 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1939 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1942 (match_operand:VF_128 1 "register_operand" "0,v")
1943 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1948 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1949 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1950 [(set_attr "isa" "noavx,avx")
1951 (set_attr "type" "sse")
1952 (set_attr "btver2_sse_attr" "maxmin")
1953 (set_attr "prefix" "<round_saeonly_prefix>")
1954 (set_attr "mode" "<ssescalarmode>")])
1956 ;; These versions of the min/max patterns implement exactly the operations
1957 ;; min = (op1 < op2 ? op1 : op2)
1958 ;; max = (!(op1 < op2) ? op1 : op2)
1959 ;; Their operands are not commutative, and thus they may be used in the
1960 ;; presence of -0.0 and NaN.
1962 (define_insn "*ieee_smin<mode>3"
1963 [(set (match_operand:VF 0 "register_operand" "=v,v")
1965 [(match_operand:VF 1 "register_operand" "0,v")
1966 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1970 min<ssemodesuffix>\t{%2, %0|%0, %2}
1971 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1972 [(set_attr "isa" "noavx,avx")
1973 (set_attr "type" "sseadd")
1974 (set_attr "prefix" "orig,vex")
1975 (set_attr "mode" "<MODE>")])
1977 (define_insn "*ieee_smax<mode>3"
1978 [(set (match_operand:VF 0 "register_operand" "=v,v")
1980 [(match_operand:VF 1 "register_operand" "0,v")
1981 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1985 max<ssemodesuffix>\t{%2, %0|%0, %2}
1986 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1987 [(set_attr "isa" "noavx,avx")
1988 (set_attr "type" "sseadd")
1989 (set_attr "prefix" "orig,vex")
1990 (set_attr "mode" "<MODE>")])
1992 (define_insn "avx_addsubv4df3"
1993 [(set (match_operand:V4DF 0 "register_operand" "=x")
1996 (match_operand:V4DF 1 "register_operand" "x")
1997 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1998 (minus:V4DF (match_dup 1) (match_dup 2))
2001 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2002 [(set_attr "type" "sseadd")
2003 (set_attr "prefix" "vex")
2004 (set_attr "mode" "V4DF")])
2006 (define_insn "sse3_addsubv2df3"
2007 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2010 (match_operand:V2DF 1 "register_operand" "0,x")
2011 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2012 (minus:V2DF (match_dup 1) (match_dup 2))
2016 addsubpd\t{%2, %0|%0, %2}
2017 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2018 [(set_attr "isa" "noavx,avx")
2019 (set_attr "type" "sseadd")
2020 (set_attr "atom_unit" "complex")
2021 (set_attr "prefix" "orig,vex")
2022 (set_attr "mode" "V2DF")])
2024 (define_insn "avx_addsubv8sf3"
2025 [(set (match_operand:V8SF 0 "register_operand" "=x")
2028 (match_operand:V8SF 1 "register_operand" "x")
2029 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2030 (minus:V8SF (match_dup 1) (match_dup 2))
2033 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2034 [(set_attr "type" "sseadd")
2035 (set_attr "prefix" "vex")
2036 (set_attr "mode" "V8SF")])
2038 (define_insn "sse3_addsubv4sf3"
2039 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2042 (match_operand:V4SF 1 "register_operand" "0,x")
2043 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2044 (minus:V4SF (match_dup 1) (match_dup 2))
2048 addsubps\t{%2, %0|%0, %2}
2049 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "isa" "noavx,avx")
2051 (set_attr "type" "sseadd")
2052 (set_attr "prefix" "orig,vex")
2053 (set_attr "prefix_rep" "1,*")
2054 (set_attr "mode" "V4SF")])
2056 (define_insn "avx_h<plusminus_insn>v4df3"
2057 [(set (match_operand:V4DF 0 "register_operand" "=x")
2062 (match_operand:V4DF 1 "register_operand" "x")
2063 (parallel [(const_int 0)]))
2064 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2067 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2068 (parallel [(const_int 0)]))
2069 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2072 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2073 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2075 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2076 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2078 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2079 [(set_attr "type" "sseadd")
2080 (set_attr "prefix" "vex")
2081 (set_attr "mode" "V4DF")])
2083 (define_expand "sse3_haddv2df3"
2084 [(set (match_operand:V2DF 0 "register_operand")
2088 (match_operand:V2DF 1 "register_operand")
2089 (parallel [(const_int 0)]))
2090 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2093 (match_operand:V2DF 2 "nonimmediate_operand")
2094 (parallel [(const_int 0)]))
2095 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2098 (define_insn "*sse3_haddv2df3"
2099 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2103 (match_operand:V2DF 1 "register_operand" "0,x")
2104 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2107 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2110 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2111 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2114 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2116 && INTVAL (operands[3]) != INTVAL (operands[4])
2117 && INTVAL (operands[5]) != INTVAL (operands[6])"
2119 haddpd\t{%2, %0|%0, %2}
2120 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2121 [(set_attr "isa" "noavx,avx")
2122 (set_attr "type" "sseadd")
2123 (set_attr "prefix" "orig,vex")
2124 (set_attr "mode" "V2DF")])
2126 (define_insn "sse3_hsubv2df3"
2127 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2131 (match_operand:V2DF 1 "register_operand" "0,x")
2132 (parallel [(const_int 0)]))
2133 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2136 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2137 (parallel [(const_int 0)]))
2138 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2141 hsubpd\t{%2, %0|%0, %2}
2142 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2143 [(set_attr "isa" "noavx,avx")
2144 (set_attr "type" "sseadd")
2145 (set_attr "prefix" "orig,vex")
2146 (set_attr "mode" "V2DF")])
2148 (define_insn "*sse3_haddv2df3_low"
2149 [(set (match_operand:DF 0 "register_operand" "=x,x")
2152 (match_operand:V2DF 1 "register_operand" "0,x")
2153 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2156 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2158 && INTVAL (operands[2]) != INTVAL (operands[3])"
2160 haddpd\t{%0, %0|%0, %0}
2161 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2162 [(set_attr "isa" "noavx,avx")
2163 (set_attr "type" "sseadd1")
2164 (set_attr "prefix" "orig,vex")
2165 (set_attr "mode" "V2DF")])
2167 (define_insn "*sse3_hsubv2df3_low"
2168 [(set (match_operand:DF 0 "register_operand" "=x,x")
2171 (match_operand:V2DF 1 "register_operand" "0,x")
2172 (parallel [(const_int 0)]))
2175 (parallel [(const_int 1)]))))]
2178 hsubpd\t{%0, %0|%0, %0}
2179 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2180 [(set_attr "isa" "noavx,avx")
2181 (set_attr "type" "sseadd1")
2182 (set_attr "prefix" "orig,vex")
2183 (set_attr "mode" "V2DF")])
2185 (define_insn "avx_h<plusminus_insn>v8sf3"
2186 [(set (match_operand:V8SF 0 "register_operand" "=x")
2192 (match_operand:V8SF 1 "register_operand" "x")
2193 (parallel [(const_int 0)]))
2194 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2196 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2197 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2201 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2202 (parallel [(const_int 0)]))
2203 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2205 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2206 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2210 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2211 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2213 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2214 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2217 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2218 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2220 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2221 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2223 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2224 [(set_attr "type" "sseadd")
2225 (set_attr "prefix" "vex")
2226 (set_attr "mode" "V8SF")])
2228 (define_insn "sse3_h<plusminus_insn>v4sf3"
2229 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2234 (match_operand:V4SF 1 "register_operand" "0,x")
2235 (parallel [(const_int 0)]))
2236 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2238 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2239 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2243 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2244 (parallel [(const_int 0)]))
2245 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2247 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2248 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2251 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2252 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "isa" "noavx,avx")
2254 (set_attr "type" "sseadd")
2255 (set_attr "atom_unit" "complex")
2256 (set_attr "prefix" "orig,vex")
2257 (set_attr "prefix_rep" "1,*")
2258 (set_attr "mode" "V4SF")])
2260 (define_expand "reduc_splus_v8df"
2261 [(match_operand:V8DF 0 "register_operand")
2262 (match_operand:V8DF 1 "register_operand")]
2265 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2269 (define_expand "reduc_splus_v4df"
2270 [(match_operand:V4DF 0 "register_operand")
2271 (match_operand:V4DF 1 "register_operand")]
2274 rtx tmp = gen_reg_rtx (V4DFmode);
2275 rtx tmp2 = gen_reg_rtx (V4DFmode);
2276 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2277 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2278 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2282 (define_expand "reduc_splus_v2df"
2283 [(match_operand:V2DF 0 "register_operand")
2284 (match_operand:V2DF 1 "register_operand")]
2287 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2291 (define_expand "reduc_splus_v16sf"
2292 [(match_operand:V16SF 0 "register_operand")
2293 (match_operand:V16SF 1 "register_operand")]
2296 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2300 (define_expand "reduc_splus_v8sf"
2301 [(match_operand:V8SF 0 "register_operand")
2302 (match_operand:V8SF 1 "register_operand")]
2305 rtx tmp = gen_reg_rtx (V8SFmode);
2306 rtx tmp2 = gen_reg_rtx (V8SFmode);
2307 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2308 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2309 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2310 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2314 (define_expand "reduc_splus_v4sf"
2315 [(match_operand:V4SF 0 "register_operand")
2316 (match_operand:V4SF 1 "register_operand")]
2321 rtx tmp = gen_reg_rtx (V4SFmode);
2322 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2323 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2326 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2330 ;; Modes handled by reduc_sm{in,ax}* patterns.
2331 (define_mode_iterator REDUC_SMINMAX_MODE
2332 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2333 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2334 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2335 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2336 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2337 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2338 (V8DF "TARGET_AVX512F")])
2340 (define_expand "reduc_<code>_<mode>"
2341 [(smaxmin:REDUC_SMINMAX_MODE
2342 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2343 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2346 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2350 (define_expand "reduc_<code>_<mode>"
2351 [(umaxmin:VI_AVX512BW
2352 (match_operand:VI_AVX512BW 0 "register_operand")
2353 (match_operand:VI_AVX512BW 1 "register_operand"))]
2356 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2360 (define_expand "reduc_<code>_<mode>"
2362 (match_operand:VI_256 0 "register_operand")
2363 (match_operand:VI_256 1 "register_operand"))]
2366 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2370 (define_expand "reduc_umin_v8hi"
2372 (match_operand:V8HI 0 "register_operand")
2373 (match_operand:V8HI 1 "register_operand"))]
2376 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2380 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2381 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2383 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2384 (match_operand:SI 2 "const_0_to_255_operand")]
2387 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2388 [(set_attr "type" "sse")
2389 (set_attr "prefix" "evex")
2390 (set_attr "mode" "<MODE>")])
2392 (define_insn "reduces<mode>"
2393 [(set (match_operand:VF_128 0 "register_operand" "=v")
2396 [(match_operand:VF_128 1 "register_operand" "v")
2397 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2398 (match_operand:SI 3 "const_0_to_255_operand")]
2403 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2404 [(set_attr "type" "sse")
2405 (set_attr "prefix" "evex")
2406 (set_attr "mode" "<MODE>")])
2408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2410 ;; Parallel floating point comparisons
2412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2414 (define_insn "avx_cmp<mode>3"
2415 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2417 [(match_operand:VF_128_256 1 "register_operand" "x")
2418 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2419 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2422 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2423 [(set_attr "type" "ssecmp")
2424 (set_attr "length_immediate" "1")
2425 (set_attr "prefix" "vex")
2426 (set_attr "mode" "<MODE>")])
2428 (define_insn "avx_vmcmp<mode>3"
2429 [(set (match_operand:VF_128 0 "register_operand" "=x")
2432 [(match_operand:VF_128 1 "register_operand" "x")
2433 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2439 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2440 [(set_attr "type" "ssecmp")
2441 (set_attr "length_immediate" "1")
2442 (set_attr "prefix" "vex")
2443 (set_attr "mode" "<ssescalarmode>")])
2445 (define_insn "*<sse>_maskcmp<mode>3_comm"
2446 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2447 (match_operator:VF_128_256 3 "sse_comparison_operator"
2448 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2449 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2451 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2453 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2454 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2455 [(set_attr "isa" "noavx,avx")
2456 (set_attr "type" "ssecmp")
2457 (set_attr "length_immediate" "1")
2458 (set_attr "prefix" "orig,vex")
2459 (set_attr "mode" "<MODE>")])
2461 (define_insn "<sse>_maskcmp<mode>3"
2462 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2463 (match_operator:VF_128_256 3 "sse_comparison_operator"
2464 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2465 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2468 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2469 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2470 [(set_attr "isa" "noavx,avx")
2471 (set_attr "type" "ssecmp")
2472 (set_attr "length_immediate" "1")
2473 (set_attr "prefix" "orig,vex")
2474 (set_attr "mode" "<MODE>")])
2476 (define_insn "<sse>_vmmaskcmp<mode>3"
2477 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2479 (match_operator:VF_128 3 "sse_comparison_operator"
2480 [(match_operand:VF_128 1 "register_operand" "0,x")
2481 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2486 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2487 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2488 [(set_attr "isa" "noavx,avx")
2489 (set_attr "type" "ssecmp")
2490 (set_attr "length_immediate" "1,*")
2491 (set_attr "prefix" "orig,vex")
2492 (set_attr "mode" "<ssescalarmode>")])
2494 (define_mode_attr cmp_imm_predicate
2495 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2496 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2497 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2498 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2499 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2500 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2501 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2502 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2503 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2505 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2506 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2507 (unspec:<avx512fmaskmode>
2508 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2509 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2510 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2512 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2513 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2514 [(set_attr "type" "ssecmp")
2515 (set_attr "length_immediate" "1")
2516 (set_attr "prefix" "evex")
2517 (set_attr "mode" "<sseinsnmode>")])
2519 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2520 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2521 (unspec:<avx512fmaskmode>
2522 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2523 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2524 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2527 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2528 [(set_attr "type" "ssecmp")
2529 (set_attr "length_immediate" "1")
2530 (set_attr "prefix" "evex")
2531 (set_attr "mode" "<sseinsnmode>")])
2533 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2534 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2535 (unspec:<avx512fmaskmode>
2536 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2537 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2538 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2539 UNSPEC_UNSIGNED_PCMP))]
2541 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2542 [(set_attr "type" "ssecmp")
2543 (set_attr "length_immediate" "1")
2544 (set_attr "prefix" "evex")
2545 (set_attr "mode" "<sseinsnmode>")])
2547 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2548 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2549 (unspec:<avx512fmaskmode>
2550 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2551 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2552 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2553 UNSPEC_UNSIGNED_PCMP))]
2555 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2556 [(set_attr "type" "ssecmp")
2557 (set_attr "length_immediate" "1")
2558 (set_attr "prefix" "evex")
2559 (set_attr "mode" "<sseinsnmode>")])
2561 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2562 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2563 (and:<avx512fmaskmode>
2564 (unspec:<avx512fmaskmode>
2565 [(match_operand:VF_128 1 "register_operand" "v")
2566 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2567 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2571 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2572 [(set_attr "type" "ssecmp")
2573 (set_attr "length_immediate" "1")
2574 (set_attr "prefix" "evex")
2575 (set_attr "mode" "<ssescalarmode>")])
2577 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2579 (and:<avx512fmaskmode>
2580 (unspec:<avx512fmaskmode>
2581 [(match_operand:VF_128 1 "register_operand" "v")
2582 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2583 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2585 (and:<avx512fmaskmode>
2586 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2589 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2590 [(set_attr "type" "ssecmp")
2591 (set_attr "length_immediate" "1")
2592 (set_attr "prefix" "evex")
2593 (set_attr "mode" "<ssescalarmode>")])
2595 (define_insn "avx512f_maskcmp<mode>3"
2596 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2597 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2598 [(match_operand:VF 1 "register_operand" "v")
2599 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2601 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2602 [(set_attr "type" "ssecmp")
2603 (set_attr "length_immediate" "1")
2604 (set_attr "prefix" "evex")
2605 (set_attr "mode" "<sseinsnmode>")])
2607 (define_insn "<sse>_comi<round_saeonly_name>"
2608 [(set (reg:CCFP FLAGS_REG)
2611 (match_operand:<ssevecmode> 0 "register_operand" "v")
2612 (parallel [(const_int 0)]))
2614 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2615 (parallel [(const_int 0)]))))]
2616 "SSE_FLOAT_MODE_P (<MODE>mode)"
2617 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2618 [(set_attr "type" "ssecomi")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "prefix_rep" "0")
2621 (set (attr "prefix_data16")
2622 (if_then_else (eq_attr "mode" "DF")
2624 (const_string "0")))
2625 (set_attr "mode" "<MODE>")])
2627 (define_insn "<sse>_ucomi<round_saeonly_name>"
2628 [(set (reg:CCFPU FLAGS_REG)
2631 (match_operand:<ssevecmode> 0 "register_operand" "v")
2632 (parallel [(const_int 0)]))
2634 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2635 (parallel [(const_int 0)]))))]
2636 "SSE_FLOAT_MODE_P (<MODE>mode)"
2637 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2638 [(set_attr "type" "ssecomi")
2639 (set_attr "prefix" "maybe_vex")
2640 (set_attr "prefix_rep" "0")
2641 (set (attr "prefix_data16")
2642 (if_then_else (eq_attr "mode" "DF")
2644 (const_string "0")))
2645 (set_attr "mode" "<MODE>")])
2647 (define_expand "vcond<V_512:mode><VF_512:mode>"
2648 [(set (match_operand:V_512 0 "register_operand")
2650 (match_operator 3 ""
2651 [(match_operand:VF_512 4 "nonimmediate_operand")
2652 (match_operand:VF_512 5 "nonimmediate_operand")])
2653 (match_operand:V_512 1 "general_operand")
2654 (match_operand:V_512 2 "general_operand")))]
2656 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2657 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2659 bool ok = ix86_expand_fp_vcond (operands);
2664 (define_expand "vcond<V_256:mode><VF_256:mode>"
2665 [(set (match_operand:V_256 0 "register_operand")
2667 (match_operator 3 ""
2668 [(match_operand:VF_256 4 "nonimmediate_operand")
2669 (match_operand:VF_256 5 "nonimmediate_operand")])
2670 (match_operand:V_256 1 "general_operand")
2671 (match_operand:V_256 2 "general_operand")))]
2673 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2674 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2676 bool ok = ix86_expand_fp_vcond (operands);
2681 (define_expand "vcond<V_128:mode><VF_128:mode>"
2682 [(set (match_operand:V_128 0 "register_operand")
2684 (match_operator 3 ""
2685 [(match_operand:VF_128 4 "nonimmediate_operand")
2686 (match_operand:VF_128 5 "nonimmediate_operand")])
2687 (match_operand:V_128 1 "general_operand")
2688 (match_operand:V_128 2 "general_operand")))]
2690 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2691 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2693 bool ok = ix86_expand_fp_vcond (operands);
2698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2700 ;; Parallel floating point logical operations
2702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2704 (define_insn "<sse>_andnot<mode>3<mask_name>"
2705 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2708 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2709 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2710 "TARGET_SSE && <mask_avx512vl_condition>"
2712 static char buf[128];
2716 switch (get_attr_mode (insn))
2723 suffix = "<ssemodesuffix>";
2726 switch (which_alternative)
2729 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2732 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2738 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2739 if (<mask_applied> && !TARGET_AVX512DQ)
2741 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2742 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2745 snprintf (buf, sizeof (buf), ops, suffix);
2748 [(set_attr "isa" "noavx,avx")
2749 (set_attr "type" "sselog")
2750 (set_attr "prefix" "orig,maybe_evex")
2752 (cond [(and (match_test "<MODE_SIZE> == 16")
2753 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2754 (const_string "<ssePSmode>")
2755 (match_test "TARGET_AVX")
2756 (const_string "<MODE>")
2757 (match_test "optimize_function_for_size_p (cfun)")
2758 (const_string "V4SF")
2760 (const_string "<MODE>")))])
2763 (define_insn "<sse>_andnot<mode>3<mask_name>"
2764 [(set (match_operand:VF_512 0 "register_operand" "=v")
2767 (match_operand:VF_512 1 "register_operand" "v"))
2768 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2771 static char buf[128];
2775 suffix = "<ssemodesuffix>";
2778 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2779 if (!TARGET_AVX512DQ)
2781 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2785 snprintf (buf, sizeof (buf),
2786 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2790 [(set_attr "type" "sselog")
2791 (set_attr "prefix" "evex")
2792 (set_attr "mode" "<sseinsnmode>")])
2794 (define_expand "<code><mode>3<mask_name>"
2795 [(set (match_operand:VF_128_256 0 "register_operand")
2796 (any_logic:VF_128_256
2797 (match_operand:VF_128_256 1 "nonimmediate_operand")
2798 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2799 "TARGET_SSE && <mask_avx512vl_condition>"
2800 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2802 (define_expand "<code><mode>3<mask_name>"
2803 [(set (match_operand:VF_512 0 "register_operand")
2805 (match_operand:VF_512 1 "nonimmediate_operand")
2806 (match_operand:VF_512 2 "nonimmediate_operand")))]
2808 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2810 (define_insn "*<code><mode>3<mask_name>"
2811 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2812 (any_logic:VF_128_256
2813 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2814 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2815 "TARGET_SSE && <mask_avx512vl_condition>
2816 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2818 static char buf[128];
2822 switch (get_attr_mode (insn))
2829 suffix = "<ssemodesuffix>";
2832 switch (which_alternative)
2835 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2838 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2844 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2845 if (<mask_applied> && !TARGET_AVX512DQ)
2847 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2848 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2851 snprintf (buf, sizeof (buf), ops, suffix);
2854 [(set_attr "isa" "noavx,avx")
2855 (set_attr "type" "sselog")
2856 (set_attr "prefix" "orig,maybe_evex")
2858 (cond [(and (match_test "<MODE_SIZE> == 16")
2859 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2860 (const_string "<ssePSmode>")
2861 (match_test "TARGET_AVX")
2862 (const_string "<MODE>")
2863 (match_test "optimize_function_for_size_p (cfun)")
2864 (const_string "V4SF")
2866 (const_string "<MODE>")))])
2868 (define_insn "*<code><mode>3<mask_name>"
2869 [(set (match_operand:VF_512 0 "register_operand" "=v")
2871 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2872 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2873 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2875 static char buf[128];
2879 suffix = "<ssemodesuffix>";
2882 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2883 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2885 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2889 snprintf (buf, sizeof (buf),
2890 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2894 [(set_attr "type" "sselog")
2895 (set_attr "prefix" "evex")
2896 (set_attr "mode" "<sseinsnmode>")])
2898 (define_expand "copysign<mode>3"
2901 (not:VF (match_dup 3))
2902 (match_operand:VF 1 "nonimmediate_operand")))
2904 (and:VF (match_dup 3)
2905 (match_operand:VF 2 "nonimmediate_operand")))
2906 (set (match_operand:VF 0 "register_operand")
2907 (ior:VF (match_dup 4) (match_dup 5)))]
2910 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2912 operands[4] = gen_reg_rtx (<MODE>mode);
2913 operands[5] = gen_reg_rtx (<MODE>mode);
2916 ;; Also define scalar versions. These are used for abs, neg, and
2917 ;; conditional move. Using subregs into vector modes causes register
2918 ;; allocation lossage. These patterns do not allow memory operands
2919 ;; because the native instructions read the full 128-bits.
2921 (define_insn "*andnot<mode>3"
2922 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2925 (match_operand:MODEF 1 "register_operand" "0,x"))
2926 (match_operand:MODEF 2 "register_operand" "x,x")))]
2927 "SSE_FLOAT_MODE_P (<MODE>mode)"
2929 static char buf[32];
2932 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2934 switch (which_alternative)
2937 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2940 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2946 snprintf (buf, sizeof (buf), ops, suffix);
2949 [(set_attr "isa" "noavx,avx")
2950 (set_attr "type" "sselog")
2951 (set_attr "prefix" "orig,vex")
2953 (cond [(and (match_test "<MODE_SIZE> == 16")
2954 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2955 (const_string "V4SF")
2956 (match_test "TARGET_AVX")
2957 (const_string "<ssevecmode>")
2958 (match_test "optimize_function_for_size_p (cfun)")
2959 (const_string "V4SF")
2961 (const_string "<ssevecmode>")))])
2963 (define_insn "*andnottf3"
2964 [(set (match_operand:TF 0 "register_operand" "=x,x")
2966 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2967 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2970 static char buf[32];
2973 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2975 switch (which_alternative)
2978 ops = "%s\t{%%2, %%0|%%0, %%2}";
2981 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2987 snprintf (buf, sizeof (buf), ops, tmp);
2990 [(set_attr "isa" "noavx,avx")
2991 (set_attr "type" "sselog")
2992 (set (attr "prefix_data16")
2994 (and (eq_attr "alternative" "0")
2995 (eq_attr "mode" "TI"))
2997 (const_string "*")))
2998 (set_attr "prefix" "orig,vex")
3000 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3001 (const_string "V4SF")
3002 (match_test "TARGET_AVX")
3004 (ior (not (match_test "TARGET_SSE2"))
3005 (match_test "optimize_function_for_size_p (cfun)"))
3006 (const_string "V4SF")
3008 (const_string "TI")))])
3010 (define_insn "*<code><mode>3"
3011 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3013 (match_operand:MODEF 1 "register_operand" "%0,x")
3014 (match_operand:MODEF 2 "register_operand" "x,x")))]
3015 "SSE_FLOAT_MODE_P (<MODE>mode)"
3017 static char buf[32];
3020 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3022 switch (which_alternative)
3025 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3028 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3034 snprintf (buf, sizeof (buf), ops, suffix);
3037 [(set_attr "isa" "noavx,avx")
3038 (set_attr "type" "sselog")
3039 (set_attr "prefix" "orig,vex")
3041 (cond [(and (match_test "<MODE_SIZE> == 16")
3042 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3043 (const_string "V4SF")
3044 (match_test "TARGET_AVX")
3045 (const_string "<ssevecmode>")
3046 (match_test "optimize_function_for_size_p (cfun)")
3047 (const_string "V4SF")
3049 (const_string "<ssevecmode>")))])
3051 (define_expand "<code>tf3"
3052 [(set (match_operand:TF 0 "register_operand")
3054 (match_operand:TF 1 "nonimmediate_operand")
3055 (match_operand:TF 2 "nonimmediate_operand")))]
3057 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3059 (define_insn "*<code>tf3"
3060 [(set (match_operand:TF 0 "register_operand" "=x,x")
3062 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3063 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3065 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3067 static char buf[32];
3070 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3072 switch (which_alternative)
3075 ops = "%s\t{%%2, %%0|%%0, %%2}";
3078 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3084 snprintf (buf, sizeof (buf), ops, tmp);
3087 [(set_attr "isa" "noavx,avx")
3088 (set_attr "type" "sselog")
3089 (set (attr "prefix_data16")
3091 (and (eq_attr "alternative" "0")
3092 (eq_attr "mode" "TI"))
3094 (const_string "*")))
3095 (set_attr "prefix" "orig,vex")
3097 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3098 (const_string "V4SF")
3099 (match_test "TARGET_AVX")
3101 (ior (not (match_test "TARGET_SSE2"))
3102 (match_test "optimize_function_for_size_p (cfun)"))
3103 (const_string "V4SF")
3105 (const_string "TI")))])
3107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3109 ;; FMA floating point multiply/accumulate instructions. These include
3110 ;; scalar versions of the instructions as well as vector versions.
3112 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3114 ;; The standard names for scalar FMA are only available with SSE math enabled.
3115 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3116 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3117 ;; and TARGET_FMA4 are both false.
3118 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3119 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3120 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3121 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3122 (define_mode_iterator FMAMODEM
3123 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3124 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3125 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3126 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3127 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3128 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3129 (V16SF "TARGET_AVX512F")
3130 (V8DF "TARGET_AVX512F")])
3132 (define_expand "fma<mode>4"
3133 [(set (match_operand:FMAMODEM 0 "register_operand")
3135 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3136 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3137 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3139 (define_expand "fms<mode>4"
3140 [(set (match_operand:FMAMODEM 0 "register_operand")
3142 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3143 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3144 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3146 (define_expand "fnma<mode>4"
3147 [(set (match_operand:FMAMODEM 0 "register_operand")
3149 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3150 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3151 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3153 (define_expand "fnms<mode>4"
3154 [(set (match_operand:FMAMODEM 0 "register_operand")
3156 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3157 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3158 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3160 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3161 (define_mode_iterator FMAMODE_AVX512
3162 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3163 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3164 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3165 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3166 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3167 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3168 (V16SF "TARGET_AVX512F")
3169 (V8DF "TARGET_AVX512F")])
3171 (define_mode_iterator FMAMODE
3172 [SF DF V4SF V2DF V8SF V4DF])
3174 (define_expand "fma4i_fmadd_<mode>"
3175 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3177 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3178 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3179 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3181 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3182 [(match_operand:VF_AVX512VL 0 "register_operand")
3183 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3184 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3185 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3186 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3187 "TARGET_AVX512F && <round_mode512bit_condition>"
3189 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3190 operands[0], operands[1], operands[2], operands[3],
3191 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3195 (define_insn "*fma_fmadd_<mode>"
3196 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3198 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3199 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3200 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3201 "TARGET_FMA || TARGET_FMA4"
3203 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3204 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3205 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3206 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3207 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3208 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3209 (set_attr "type" "ssemuladd")
3210 (set_attr "mode" "<MODE>")])
3212 ;; Suppose AVX-512F as baseline
3213 (define_mode_iterator VF_SF_AVX512VL
3214 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3215 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3217 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3218 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3220 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3221 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3222 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3223 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3225 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3226 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3227 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3228 [(set_attr "type" "ssemuladd")
3229 (set_attr "mode" "<MODE>")])
3231 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3232 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3233 (vec_merge:VF_AVX512VL
3235 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3236 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3237 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3239 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3240 "TARGET_AVX512F && <round_mode512bit_condition>"
3242 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3243 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3244 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3245 (set_attr "type" "ssemuladd")
3246 (set_attr "mode" "<MODE>")])
3248 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3249 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3250 (vec_merge:VF_AVX512VL
3252 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3253 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3254 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3256 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3258 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3259 [(set_attr "isa" "fma_avx512f")
3260 (set_attr "type" "ssemuladd")
3261 (set_attr "mode" "<MODE>")])
3263 (define_insn "*fma_fmsub_<mode>"
3264 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3266 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3267 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3269 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3270 "TARGET_FMA || TARGET_FMA4"
3272 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3273 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3274 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3275 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3276 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3277 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3278 (set_attr "type" "ssemuladd")
3279 (set_attr "mode" "<MODE>")])
3281 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3282 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3284 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3285 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3287 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3288 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3290 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3291 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3292 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3293 [(set_attr "type" "ssemuladd")
3294 (set_attr "mode" "<MODE>")])
3296 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3297 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3298 (vec_merge:VF_AVX512VL
3300 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3301 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3303 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3305 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3308 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3309 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3310 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3311 (set_attr "type" "ssemuladd")
3312 (set_attr "mode" "<MODE>")])
3314 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3315 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3316 (vec_merge:VF_AVX512VL
3318 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3319 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3321 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3323 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3324 "TARGET_AVX512F && <round_mode512bit_condition>"
3325 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3326 [(set_attr "isa" "fma_avx512f")
3327 (set_attr "type" "ssemuladd")
3328 (set_attr "mode" "<MODE>")])
3330 (define_insn "*fma_fnmadd_<mode>"
3331 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3334 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3335 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3336 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3337 "TARGET_FMA || TARGET_FMA4"
3339 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3340 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3341 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3342 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3343 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3344 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3345 (set_attr "type" "ssemuladd")
3346 (set_attr "mode" "<MODE>")])
3348 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3349 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3352 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3353 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3354 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3355 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3357 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3358 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3359 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3360 [(set_attr "type" "ssemuladd")
3361 (set_attr "mode" "<MODE>")])
3363 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3364 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3365 (vec_merge:VF_AVX512VL
3368 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3369 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3370 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3372 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3373 "TARGET_AVX512F && <round_mode512bit_condition>"
3375 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3376 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3377 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3378 (set_attr "type" "ssemuladd")
3379 (set_attr "mode" "<MODE>")])
3381 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3382 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3383 (vec_merge:VF_AVX512VL
3386 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3387 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3388 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3390 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3391 "TARGET_AVX512F && <round_mode512bit_condition>"
3392 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3393 [(set_attr "isa" "fma_avx512f")
3394 (set_attr "type" "ssemuladd")
3395 (set_attr "mode" "<MODE>")])
3397 (define_insn "*fma_fnmsub_<mode>"
3398 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3401 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3402 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3404 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3405 "TARGET_FMA || TARGET_FMA4"
3407 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3408 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3409 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3410 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3411 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3412 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3413 (set_attr "type" "ssemuladd")
3414 (set_attr "mode" "<MODE>")])
3416 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3417 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3420 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3421 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3423 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3424 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3426 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3427 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3428 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3429 [(set_attr "type" "ssemuladd")
3430 (set_attr "mode" "<MODE>")])
3432 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3433 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3434 (vec_merge:VF_AVX512VL
3437 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3438 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3440 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3442 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3443 "TARGET_AVX512F && <round_mode512bit_condition>"
3445 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3446 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3447 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3448 (set_attr "type" "ssemuladd")
3449 (set_attr "mode" "<MODE>")])
3451 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3452 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3453 (vec_merge:VF_AVX512VL
3456 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3457 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3459 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3461 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3463 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3464 [(set_attr "isa" "fma_avx512f")
3465 (set_attr "type" "ssemuladd")
3466 (set_attr "mode" "<MODE>")])
3468 ;; FMA parallel floating point multiply addsub and subadd operations.
3470 ;; It would be possible to represent these without the UNSPEC as
3473 ;; (fma op1 op2 op3)
3474 ;; (fma op1 op2 (neg op3))
3477 ;; But this doesn't seem useful in practice.
3479 (define_expand "fmaddsub_<mode>"
3480 [(set (match_operand:VF 0 "register_operand")
3482 [(match_operand:VF 1 "nonimmediate_operand")
3483 (match_operand:VF 2 "nonimmediate_operand")
3484 (match_operand:VF 3 "nonimmediate_operand")]
3486 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3488 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3489 [(match_operand:VF_AVX512VL 0 "register_operand")
3490 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3491 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3492 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3493 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3496 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3497 operands[0], operands[1], operands[2], operands[3],
3498 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3502 (define_insn "*fma_fmaddsub_<mode>"
3503 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3505 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3506 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3507 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3509 "TARGET_FMA || TARGET_FMA4"
3511 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3512 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3513 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3514 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3515 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3516 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3517 (set_attr "type" "ssemuladd")
3518 (set_attr "mode" "<MODE>")])
3520 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3521 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3522 (unspec:VF_SF_AVX512VL
3523 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3524 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3525 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3527 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3529 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3530 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3531 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3532 [(set_attr "type" "ssemuladd")
3533 (set_attr "mode" "<MODE>")])
3535 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3537 (vec_merge:VF_AVX512VL
3539 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3540 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3541 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3544 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3547 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3548 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3549 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3550 (set_attr "type" "ssemuladd")
3551 (set_attr "mode" "<MODE>")])
3553 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3554 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3555 (vec_merge:VF_AVX512VL
3557 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3558 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3559 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3562 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3564 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3565 [(set_attr "isa" "fma_avx512f")
3566 (set_attr "type" "ssemuladd")
3567 (set_attr "mode" "<MODE>")])
3569 (define_insn "*fma_fmsubadd_<mode>"
3570 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3572 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3573 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3575 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3577 "TARGET_FMA || TARGET_FMA4"
3579 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3580 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3581 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3582 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3583 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3584 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3585 (set_attr "type" "ssemuladd")
3586 (set_attr "mode" "<MODE>")])
3588 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3589 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3590 (unspec:VF_SF_AVX512VL
3591 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3592 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3594 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3596 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3598 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3599 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3600 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3601 [(set_attr "type" "ssemuladd")
3602 (set_attr "mode" "<MODE>")])
3604 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3605 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3606 (vec_merge:VF_AVX512VL
3608 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3609 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3611 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3614 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3617 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3618 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3619 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3620 (set_attr "type" "ssemuladd")
3621 (set_attr "mode" "<MODE>")])
3623 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3624 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3625 (vec_merge:VF_AVX512VL
3627 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3628 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3630 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3633 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3635 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3636 [(set_attr "isa" "fma_avx512f")
3637 (set_attr "type" "ssemuladd")
3638 (set_attr "mode" "<MODE>")])
3640 ;; FMA3 floating point scalar intrinsics. These merge result with
3641 ;; high-order elements from the destination register.
3643 (define_expand "fmai_vmfmadd_<mode><round_name>"
3644 [(set (match_operand:VF_128 0 "register_operand")
3647 (match_operand:VF_128 1 "<round_nimm_predicate>")
3648 (match_operand:VF_128 2 "<round_nimm_predicate>")
3649 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3654 (define_insn "*fmai_fmadd_<mode>"
3655 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3658 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3659 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3660 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3663 "TARGET_FMA || TARGET_AVX512F"
3665 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3666 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3667 [(set_attr "type" "ssemuladd")
3668 (set_attr "mode" "<MODE>")])
3670 (define_insn "*fmai_fmsub_<mode>"
3671 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3674 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3675 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3677 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3680 "TARGET_FMA || TARGET_AVX512F"
3682 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3683 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3684 [(set_attr "type" "ssemuladd")
3685 (set_attr "mode" "<MODE>")])
3687 (define_insn "*fmai_fnmadd_<mode><round_name>"
3688 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3692 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3693 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3694 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3697 "TARGET_FMA || TARGET_AVX512F"
3699 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3700 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3701 [(set_attr "type" "ssemuladd")
3702 (set_attr "mode" "<MODE>")])
3704 (define_insn "*fmai_fnmsub_<mode><round_name>"
3705 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3709 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3710 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3712 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3715 "TARGET_FMA || TARGET_AVX512F"
3717 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3718 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3719 [(set_attr "type" "ssemuladd")
3720 (set_attr "mode" "<MODE>")])
3722 ;; FMA4 floating point scalar intrinsics. These write the
3723 ;; entire destination register, with the high-order elements zeroed.
3725 (define_expand "fma4i_vmfmadd_<mode>"
3726 [(set (match_operand:VF_128 0 "register_operand")
3729 (match_operand:VF_128 1 "nonimmediate_operand")
3730 (match_operand:VF_128 2 "nonimmediate_operand")
3731 (match_operand:VF_128 3 "nonimmediate_operand"))
3735 "operands[4] = CONST0_RTX (<MODE>mode);")
3737 (define_insn "*fma4i_vmfmadd_<mode>"
3738 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3741 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3742 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3743 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3744 (match_operand:VF_128 4 "const0_operand")
3747 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3748 [(set_attr "type" "ssemuladd")
3749 (set_attr "mode" "<MODE>")])
3751 (define_insn "*fma4i_vmfmsub_<mode>"
3752 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3755 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3756 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3758 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3759 (match_operand:VF_128 4 "const0_operand")
3762 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3763 [(set_attr "type" "ssemuladd")
3764 (set_attr "mode" "<MODE>")])
3766 (define_insn "*fma4i_vmfnmadd_<mode>"
3767 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3771 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3772 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3773 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3774 (match_operand:VF_128 4 "const0_operand")
3777 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3778 [(set_attr "type" "ssemuladd")
3779 (set_attr "mode" "<MODE>")])
3781 (define_insn "*fma4i_vmfnmsub_<mode>"
3782 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3786 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3787 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3789 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3790 (match_operand:VF_128 4 "const0_operand")
3793 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3794 [(set_attr "type" "ssemuladd")
3795 (set_attr "mode" "<MODE>")])
3797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3799 ;; Parallel single-precision floating point conversion operations
3801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3803 (define_insn "sse_cvtpi2ps"
3804 [(set (match_operand:V4SF 0 "register_operand" "=x")
3807 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3808 (match_operand:V4SF 1 "register_operand" "0")
3811 "cvtpi2ps\t{%2, %0|%0, %2}"
3812 [(set_attr "type" "ssecvt")
3813 (set_attr "mode" "V4SF")])
3815 (define_insn "sse_cvtps2pi"
3816 [(set (match_operand:V2SI 0 "register_operand" "=y")
3818 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3820 (parallel [(const_int 0) (const_int 1)])))]
3822 "cvtps2pi\t{%1, %0|%0, %q1}"
3823 [(set_attr "type" "ssecvt")
3824 (set_attr "unit" "mmx")
3825 (set_attr "mode" "DI")])
3827 (define_insn "sse_cvttps2pi"
3828 [(set (match_operand:V2SI 0 "register_operand" "=y")
3830 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3831 (parallel [(const_int 0) (const_int 1)])))]
3833 "cvttps2pi\t{%1, %0|%0, %q1}"
3834 [(set_attr "type" "ssecvt")
3835 (set_attr "unit" "mmx")
3836 (set_attr "prefix_rep" "0")
3837 (set_attr "mode" "SF")])
3839 (define_insn "sse_cvtsi2ss<round_name>"
3840 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3843 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3844 (match_operand:V4SF 1 "register_operand" "0,0,v")
3848 cvtsi2ss\t{%2, %0|%0, %2}
3849 cvtsi2ss\t{%2, %0|%0, %2}
3850 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3851 [(set_attr "isa" "noavx,noavx,avx")
3852 (set_attr "type" "sseicvt")
3853 (set_attr "athlon_decode" "vector,double,*")
3854 (set_attr "amdfam10_decode" "vector,double,*")
3855 (set_attr "bdver1_decode" "double,direct,*")
3856 (set_attr "btver2_decode" "double,double,double")
3857 (set_attr "prefix" "orig,orig,maybe_evex")
3858 (set_attr "mode" "SF")])
3860 (define_insn "sse_cvtsi2ssq<round_name>"
3861 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3864 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3865 (match_operand:V4SF 1 "register_operand" "0,0,v")
3867 "TARGET_SSE && TARGET_64BIT"
3869 cvtsi2ssq\t{%2, %0|%0, %2}
3870 cvtsi2ssq\t{%2, %0|%0, %2}
3871 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3872 [(set_attr "isa" "noavx,noavx,avx")
3873 (set_attr "type" "sseicvt")
3874 (set_attr "athlon_decode" "vector,double,*")
3875 (set_attr "amdfam10_decode" "vector,double,*")
3876 (set_attr "bdver1_decode" "double,direct,*")
3877 (set_attr "btver2_decode" "double,double,double")
3878 (set_attr "length_vex" "*,*,4")
3879 (set_attr "prefix_rex" "1,1,*")
3880 (set_attr "prefix" "orig,orig,maybe_evex")
3881 (set_attr "mode" "SF")])
3883 (define_insn "sse_cvtss2si<round_name>"
3884 [(set (match_operand:SI 0 "register_operand" "=r,r")
3887 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3888 (parallel [(const_int 0)]))]
3889 UNSPEC_FIX_NOTRUNC))]
3891 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3892 [(set_attr "type" "sseicvt")
3893 (set_attr "athlon_decode" "double,vector")
3894 (set_attr "bdver1_decode" "double,double")
3895 (set_attr "prefix_rep" "1")
3896 (set_attr "prefix" "maybe_vex")
3897 (set_attr "mode" "SI")])
3899 (define_insn "sse_cvtss2si_2"
3900 [(set (match_operand:SI 0 "register_operand" "=r,r")
3901 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3902 UNSPEC_FIX_NOTRUNC))]
3904 "%vcvtss2si\t{%1, %0|%0, %k1}"
3905 [(set_attr "type" "sseicvt")
3906 (set_attr "athlon_decode" "double,vector")
3907 (set_attr "amdfam10_decode" "double,double")
3908 (set_attr "bdver1_decode" "double,double")
3909 (set_attr "prefix_rep" "1")
3910 (set_attr "prefix" "maybe_vex")
3911 (set_attr "mode" "SI")])
3913 (define_insn "sse_cvtss2siq<round_name>"
3914 [(set (match_operand:DI 0 "register_operand" "=r,r")
3917 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3918 (parallel [(const_int 0)]))]
3919 UNSPEC_FIX_NOTRUNC))]
3920 "TARGET_SSE && TARGET_64BIT"
3921 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3922 [(set_attr "type" "sseicvt")
3923 (set_attr "athlon_decode" "double,vector")
3924 (set_attr "bdver1_decode" "double,double")
3925 (set_attr "prefix_rep" "1")
3926 (set_attr "prefix" "maybe_vex")
3927 (set_attr "mode" "DI")])
3929 (define_insn "sse_cvtss2siq_2"
3930 [(set (match_operand:DI 0 "register_operand" "=r,r")
3931 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3932 UNSPEC_FIX_NOTRUNC))]
3933 "TARGET_SSE && TARGET_64BIT"
3934 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3935 [(set_attr "type" "sseicvt")
3936 (set_attr "athlon_decode" "double,vector")
3937 (set_attr "amdfam10_decode" "double,double")
3938 (set_attr "bdver1_decode" "double,double")
3939 (set_attr "prefix_rep" "1")
3940 (set_attr "prefix" "maybe_vex")
3941 (set_attr "mode" "DI")])
3943 (define_insn "sse_cvttss2si<round_saeonly_name>"
3944 [(set (match_operand:SI 0 "register_operand" "=r,r")
3947 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3948 (parallel [(const_int 0)]))))]
3950 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3951 [(set_attr "type" "sseicvt")
3952 (set_attr "athlon_decode" "double,vector")
3953 (set_attr "amdfam10_decode" "double,double")
3954 (set_attr "bdver1_decode" "double,double")
3955 (set_attr "prefix_rep" "1")
3956 (set_attr "prefix" "maybe_vex")
3957 (set_attr "mode" "SI")])
3959 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3960 [(set (match_operand:DI 0 "register_operand" "=r,r")
3963 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3964 (parallel [(const_int 0)]))))]
3965 "TARGET_SSE && TARGET_64BIT"
3966 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3967 [(set_attr "type" "sseicvt")
3968 (set_attr "athlon_decode" "double,vector")
3969 (set_attr "amdfam10_decode" "double,double")
3970 (set_attr "bdver1_decode" "double,double")
3971 (set_attr "prefix_rep" "1")
3972 (set_attr "prefix" "maybe_vex")
3973 (set_attr "mode" "DI")])
3975 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3976 [(set (match_operand:VF_128 0 "register_operand" "=v")
3978 (vec_duplicate:VF_128
3979 (unsigned_float:<ssescalarmode>
3980 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3981 (match_operand:VF_128 1 "register_operand" "v")
3983 "TARGET_AVX512F && <round_modev4sf_condition>"
3984 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3985 [(set_attr "type" "sseicvt")
3986 (set_attr "prefix" "evex")
3987 (set_attr "mode" "<ssescalarmode>")])
3989 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3990 [(set (match_operand:VF_128 0 "register_operand" "=v")
3992 (vec_duplicate:VF_128
3993 (unsigned_float:<ssescalarmode>
3994 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3995 (match_operand:VF_128 1 "register_operand" "v")
3997 "TARGET_AVX512F && TARGET_64BIT"
3998 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3999 [(set_attr "type" "sseicvt")
4000 (set_attr "prefix" "evex")
4001 (set_attr "mode" "<ssescalarmode>")])
4003 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4004 [(set (match_operand:VF1 0 "register_operand" "=v")
4006 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4007 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4008 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4009 [(set_attr "type" "ssecvt")
4010 (set_attr "prefix" "maybe_vex")
4011 (set_attr "mode" "<sseinsnmode>")])
4013 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4014 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4015 (unsigned_float:VF1_AVX512VL
4016 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4018 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4019 [(set_attr "type" "ssecvt")
4020 (set_attr "prefix" "evex")
4021 (set_attr "mode" "<MODE>")])
4023 (define_expand "floatuns<sseintvecmodelower><mode>2"
4024 [(match_operand:VF1 0 "register_operand")
4025 (match_operand:<sseintvecmode> 1 "register_operand")]
4026 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4028 if (<MODE>mode == V16SFmode)
4029 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4031 if (TARGET_AVX512VL)
4033 if (<MODE>mode == V4SFmode)
4034 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4036 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4039 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4045 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4046 (define_mode_attr sf2simodelower
4047 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4049 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4050 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4052 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4053 UNSPEC_FIX_NOTRUNC))]
4054 "TARGET_SSE2 && <mask_mode512bit_condition>"
4055 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4056 [(set_attr "type" "ssecvt")
4057 (set (attr "prefix_data16")
4059 (match_test "TARGET_AVX")
4061 (const_string "1")))
4062 (set_attr "prefix" "maybe_vex")
4063 (set_attr "mode" "<sseinsnmode>")])
4065 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4066 [(set (match_operand:V16SI 0 "register_operand" "=v")
4068 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4069 UNSPEC_FIX_NOTRUNC))]
4071 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4072 [(set_attr "type" "ssecvt")
4073 (set_attr "prefix" "evex")
4074 (set_attr "mode" "XI")])
4076 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4077 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4078 (unspec:VI4_AVX512VL
4079 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4080 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4082 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4083 [(set_attr "type" "ssecvt")
4084 (set_attr "prefix" "evex")
4085 (set_attr "mode" "<sseinsnmode>")])
4087 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4088 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4089 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4090 UNSPEC_FIX_NOTRUNC))]
4091 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4092 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4093 [(set_attr "type" "ssecvt")
4094 (set_attr "prefix" "evex")
4095 (set_attr "mode" "<sseinsnmode>")])
4097 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4098 [(set (match_operand:V2DI 0 "register_operand" "=v")
4101 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4102 (parallel [(const_int 0) (const_int 1)]))]
4103 UNSPEC_FIX_NOTRUNC))]
4104 "TARGET_AVX512DQ && TARGET_AVX512VL"
4105 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4106 [(set_attr "type" "ssecvt")
4107 (set_attr "prefix" "evex")
4108 (set_attr "mode" "TI")])
4110 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4111 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4112 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4113 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4114 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4115 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4116 [(set_attr "type" "ssecvt")
4117 (set_attr "prefix" "evex")
4118 (set_attr "mode" "<sseinsnmode>")])
4120 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4121 [(set (match_operand:V2DI 0 "register_operand" "=v")
4124 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4125 (parallel [(const_int 0) (const_int 1)]))]
4126 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4127 "TARGET_AVX512DQ && TARGET_AVX512VL"
4128 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4129 [(set_attr "type" "ssecvt")
4130 (set_attr "prefix" "evex")
4131 (set_attr "mode" "TI")])
4133 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4134 [(set (match_operand:V16SI 0 "register_operand" "=v")
4136 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4138 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4139 [(set_attr "type" "ssecvt")
4140 (set_attr "prefix" "evex")
4141 (set_attr "mode" "XI")])
4143 (define_insn "fix_truncv8sfv8si2<mask_name>"
4144 [(set (match_operand:V8SI 0 "register_operand" "=v")
4145 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4146 "TARGET_AVX && <mask_avx512vl_condition>"
4147 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4148 [(set_attr "type" "ssecvt")
4149 (set_attr "prefix" "<mask_prefix>")
4150 (set_attr "mode" "OI")])
4152 (define_insn "fix_truncv4sfv4si2<mask_name>"
4153 [(set (match_operand:V4SI 0 "register_operand" "=v")
4154 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4155 "TARGET_SSE2 && <mask_avx512vl_condition>"
4156 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4157 [(set_attr "type" "ssecvt")
4158 (set (attr "prefix_rep")
4160 (match_test "TARGET_AVX")
4162 (const_string "1")))
4163 (set (attr "prefix_data16")
4165 (match_test "TARGET_AVX")
4167 (const_string "0")))
4168 (set_attr "prefix_data16" "0")
4169 (set_attr "prefix" "<mask_prefix2>")
4170 (set_attr "mode" "TI")])
4172 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4173 [(match_operand:<sseintvecmode> 0 "register_operand")
4174 (match_operand:VF1 1 "register_operand")]
4177 if (<MODE>mode == V16SFmode)
4178 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4183 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4184 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4185 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4186 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4193 ;; Parallel double-precision floating point conversion operations
4195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4197 (define_insn "sse2_cvtpi2pd"
4198 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4199 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4201 "cvtpi2pd\t{%1, %0|%0, %1}"
4202 [(set_attr "type" "ssecvt")
4203 (set_attr "unit" "mmx,*")
4204 (set_attr "prefix_data16" "1,*")
4205 (set_attr "mode" "V2DF")])
4207 (define_insn "sse2_cvtpd2pi"
4208 [(set (match_operand:V2SI 0 "register_operand" "=y")
4209 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4210 UNSPEC_FIX_NOTRUNC))]
4212 "cvtpd2pi\t{%1, %0|%0, %1}"
4213 [(set_attr "type" "ssecvt")
4214 (set_attr "unit" "mmx")
4215 (set_attr "bdver1_decode" "double")
4216 (set_attr "btver2_decode" "direct")
4217 (set_attr "prefix_data16" "1")
4218 (set_attr "mode" "DI")])
4220 (define_insn "sse2_cvttpd2pi"
4221 [(set (match_operand:V2SI 0 "register_operand" "=y")
4222 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4224 "cvttpd2pi\t{%1, %0|%0, %1}"
4225 [(set_attr "type" "ssecvt")
4226 (set_attr "unit" "mmx")
4227 (set_attr "bdver1_decode" "double")
4228 (set_attr "prefix_data16" "1")
4229 (set_attr "mode" "TI")])
4231 (define_insn "sse2_cvtsi2sd"
4232 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4235 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4236 (match_operand:V2DF 1 "register_operand" "0,0,x")
4240 cvtsi2sd\t{%2, %0|%0, %2}
4241 cvtsi2sd\t{%2, %0|%0, %2}
4242 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4243 [(set_attr "isa" "noavx,noavx,avx")
4244 (set_attr "type" "sseicvt")
4245 (set_attr "athlon_decode" "double,direct,*")
4246 (set_attr "amdfam10_decode" "vector,double,*")
4247 (set_attr "bdver1_decode" "double,direct,*")
4248 (set_attr "btver2_decode" "double,double,double")
4249 (set_attr "prefix" "orig,orig,vex")
4250 (set_attr "mode" "DF")])
4252 (define_insn "sse2_cvtsi2sdq<round_name>"
4253 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4256 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4257 (match_operand:V2DF 1 "register_operand" "0,0,v")
4259 "TARGET_SSE2 && TARGET_64BIT"
4261 cvtsi2sdq\t{%2, %0|%0, %2}
4262 cvtsi2sdq\t{%2, %0|%0, %2}
4263 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4264 [(set_attr "isa" "noavx,noavx,avx")
4265 (set_attr "type" "sseicvt")
4266 (set_attr "athlon_decode" "double,direct,*")
4267 (set_attr "amdfam10_decode" "vector,double,*")
4268 (set_attr "bdver1_decode" "double,direct,*")
4269 (set_attr "length_vex" "*,*,4")
4270 (set_attr "prefix_rex" "1,1,*")
4271 (set_attr "prefix" "orig,orig,maybe_evex")
4272 (set_attr "mode" "DF")])
4274 (define_insn "avx512f_vcvtss2usi<round_name>"
4275 [(set (match_operand:SI 0 "register_operand" "=r")
4278 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4279 (parallel [(const_int 0)]))]
4280 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4282 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4283 [(set_attr "type" "sseicvt")
4284 (set_attr "prefix" "evex")
4285 (set_attr "mode" "SI")])
4287 (define_insn "avx512f_vcvtss2usiq<round_name>"
4288 [(set (match_operand:DI 0 "register_operand" "=r")
4291 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4292 (parallel [(const_int 0)]))]
4293 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4294 "TARGET_AVX512F && TARGET_64BIT"
4295 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4296 [(set_attr "type" "sseicvt")
4297 (set_attr "prefix" "evex")
4298 (set_attr "mode" "DI")])
4300 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4301 [(set (match_operand:SI 0 "register_operand" "=r")
4304 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4305 (parallel [(const_int 0)]))))]
4307 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4308 [(set_attr "type" "sseicvt")
4309 (set_attr "prefix" "evex")
4310 (set_attr "mode" "SI")])
4312 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4313 [(set (match_operand:DI 0 "register_operand" "=r")
4316 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4317 (parallel [(const_int 0)]))))]
4318 "TARGET_AVX512F && TARGET_64BIT"
4319 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4320 [(set_attr "type" "sseicvt")
4321 (set_attr "prefix" "evex")
4322 (set_attr "mode" "DI")])
4324 (define_insn "avx512f_vcvtsd2usi<round_name>"
4325 [(set (match_operand:SI 0 "register_operand" "=r")
4328 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4329 (parallel [(const_int 0)]))]
4330 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4332 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4333 [(set_attr "type" "sseicvt")
4334 (set_attr "prefix" "evex")
4335 (set_attr "mode" "SI")])
4337 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4338 [(set (match_operand:DI 0 "register_operand" "=r")
4341 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4342 (parallel [(const_int 0)]))]
4343 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4344 "TARGET_AVX512F && TARGET_64BIT"
4345 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4346 [(set_attr "type" "sseicvt")
4347 (set_attr "prefix" "evex")
4348 (set_attr "mode" "DI")])
4350 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4351 [(set (match_operand:SI 0 "register_operand" "=r")
4354 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4355 (parallel [(const_int 0)]))))]
4357 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4358 [(set_attr "type" "sseicvt")
4359 (set_attr "prefix" "evex")
4360 (set_attr "mode" "SI")])
4362 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4363 [(set (match_operand:DI 0 "register_operand" "=r")
4366 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4367 (parallel [(const_int 0)]))))]
4368 "TARGET_AVX512F && TARGET_64BIT"
4369 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4370 [(set_attr "type" "sseicvt")
4371 (set_attr "prefix" "evex")
4372 (set_attr "mode" "DI")])
4374 (define_insn "sse2_cvtsd2si<round_name>"
4375 [(set (match_operand:SI 0 "register_operand" "=r,r")
4378 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4379 (parallel [(const_int 0)]))]
4380 UNSPEC_FIX_NOTRUNC))]
4382 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4383 [(set_attr "type" "sseicvt")
4384 (set_attr "athlon_decode" "double,vector")
4385 (set_attr "bdver1_decode" "double,double")
4386 (set_attr "btver2_decode" "double,double")
4387 (set_attr "prefix_rep" "1")
4388 (set_attr "prefix" "maybe_vex")
4389 (set_attr "mode" "SI")])
4391 (define_insn "sse2_cvtsd2si_2"
4392 [(set (match_operand:SI 0 "register_operand" "=r,r")
4393 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4394 UNSPEC_FIX_NOTRUNC))]
4396 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4397 [(set_attr "type" "sseicvt")
4398 (set_attr "athlon_decode" "double,vector")
4399 (set_attr "amdfam10_decode" "double,double")
4400 (set_attr "bdver1_decode" "double,double")
4401 (set_attr "prefix_rep" "1")
4402 (set_attr "prefix" "maybe_vex")
4403 (set_attr "mode" "SI")])
4405 (define_insn "sse2_cvtsd2siq<round_name>"
4406 [(set (match_operand:DI 0 "register_operand" "=r,r")
4409 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4410 (parallel [(const_int 0)]))]
4411 UNSPEC_FIX_NOTRUNC))]
4412 "TARGET_SSE2 && TARGET_64BIT"
4413 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4414 [(set_attr "type" "sseicvt")
4415 (set_attr "athlon_decode" "double,vector")
4416 (set_attr "bdver1_decode" "double,double")
4417 (set_attr "prefix_rep" "1")
4418 (set_attr "prefix" "maybe_vex")
4419 (set_attr "mode" "DI")])
4421 (define_insn "sse2_cvtsd2siq_2"
4422 [(set (match_operand:DI 0 "register_operand" "=r,r")
4423 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4424 UNSPEC_FIX_NOTRUNC))]
4425 "TARGET_SSE2 && TARGET_64BIT"
4426 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4427 [(set_attr "type" "sseicvt")
4428 (set_attr "athlon_decode" "double,vector")
4429 (set_attr "amdfam10_decode" "double,double")
4430 (set_attr "bdver1_decode" "double,double")
4431 (set_attr "prefix_rep" "1")
4432 (set_attr "prefix" "maybe_vex")
4433 (set_attr "mode" "DI")])
4435 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4436 [(set (match_operand:SI 0 "register_operand" "=r,r")
4439 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4440 (parallel [(const_int 0)]))))]
4442 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4443 [(set_attr "type" "sseicvt")
4444 (set_attr "athlon_decode" "double,vector")
4445 (set_attr "amdfam10_decode" "double,double")
4446 (set_attr "bdver1_decode" "double,double")
4447 (set_attr "btver2_decode" "double,double")
4448 (set_attr "prefix_rep" "1")
4449 (set_attr "prefix" "maybe_vex")
4450 (set_attr "mode" "SI")])
4452 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4453 [(set (match_operand:DI 0 "register_operand" "=r,r")
4456 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4457 (parallel [(const_int 0)]))))]
4458 "TARGET_SSE2 && TARGET_64BIT"
4459 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4460 [(set_attr "type" "sseicvt")
4461 (set_attr "athlon_decode" "double,vector")
4462 (set_attr "amdfam10_decode" "double,double")
4463 (set_attr "bdver1_decode" "double,double")
4464 (set_attr "prefix_rep" "1")
4465 (set_attr "prefix" "maybe_vex")
4466 (set_attr "mode" "DI")])
4468 ;; For float<si2dfmode><mode>2 insn pattern
4469 (define_mode_attr si2dfmode
4470 [(V8DF "V8SI") (V4DF "V4SI")])
4471 (define_mode_attr si2dfmodelower
4472 [(V8DF "v8si") (V4DF "v4si")])
4474 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4475 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4476 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4477 "TARGET_AVX && <mask_mode512bit_condition>"
4478 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4479 [(set_attr "type" "ssecvt")
4480 (set_attr "prefix" "maybe_vex")
4481 (set_attr "mode" "<MODE>")])
4483 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4484 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4485 (any_float:VF2_AVX512VL
4486 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4488 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4489 [(set_attr "type" "ssecvt")
4490 (set_attr "prefix" "evex")
4491 (set_attr "mode" "<MODE>")])
4493 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4494 (define_mode_attr qq2pssuff
4495 [(V8SF "") (V4SF "{y}")])
4497 (define_mode_attr sselongvecmode
4498 [(V8SF "V8DI") (V4SF "V4DI")])
4500 (define_mode_attr sselongvecmodelower
4501 [(V8SF "v8di") (V4SF "v4di")])
4503 (define_mode_attr sseintvecmode3
4504 [(V8SF "XI") (V4SF "OI")
4505 (V8DF "OI") (V4DF "TI")])
4507 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4508 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4509 (any_float:VF1_128_256VL
4510 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4511 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4512 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4513 [(set_attr "type" "ssecvt")
4514 (set_attr "prefix" "evex")
4515 (set_attr "mode" "<MODE>")])
4517 (define_insn "*<floatsuffix>floatv2div2sf2"
4518 [(set (match_operand:V4SF 0 "register_operand" "=v")
4520 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4521 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4522 "TARGET_AVX512DQ && TARGET_AVX512VL"
4523 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4524 [(set_attr "type" "ssecvt")
4525 (set_attr "prefix" "evex")
4526 (set_attr "mode" "V4SF")])
4528 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4529 [(set (match_operand:V4SF 0 "register_operand" "=v")
4532 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4534 (match_operand:V4SF 2 "vector_move_operand" "0C")
4535 (parallel [(const_int 0) (const_int 1)]))
4536 (match_operand:QI 3 "register_operand" "Yk"))
4537 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4538 "TARGET_AVX512DQ && TARGET_AVX512VL"
4539 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4540 [(set_attr "type" "ssecvt")
4541 (set_attr "prefix" "evex")
4542 (set_attr "mode" "V4SF")])
4544 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4545 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4546 (unsigned_float:VF2_512_256VL
4547 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4549 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4550 [(set_attr "type" "ssecvt")
4551 (set_attr "prefix" "evex")
4552 (set_attr "mode" "<MODE>")])
4554 (define_insn "ufloatv2siv2df2<mask_name>"
4555 [(set (match_operand:V2DF 0 "register_operand" "=v")
4556 (unsigned_float:V2DF
4558 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4559 (parallel [(const_int 0) (const_int 1)]))))]
4561 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4562 [(set_attr "type" "ssecvt")
4563 (set_attr "prefix" "evex")
4564 (set_attr "mode" "V2DF")])
4566 (define_insn "avx512f_cvtdq2pd512_2"
4567 [(set (match_operand:V8DF 0 "register_operand" "=v")
4570 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4571 (parallel [(const_int 0) (const_int 1)
4572 (const_int 2) (const_int 3)
4573 (const_int 4) (const_int 5)
4574 (const_int 6) (const_int 7)]))))]
4576 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4577 [(set_attr "type" "ssecvt")
4578 (set_attr "prefix" "evex")
4579 (set_attr "mode" "V8DF")])
4581 (define_insn "avx_cvtdq2pd256_2"
4582 [(set (match_operand:V4DF 0 "register_operand" "=v")
4585 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4586 (parallel [(const_int 0) (const_int 1)
4587 (const_int 2) (const_int 3)]))))]
4589 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4590 [(set_attr "type" "ssecvt")
4591 (set_attr "prefix" "maybe_evex")
4592 (set_attr "mode" "V4DF")])
4594 (define_insn "sse2_cvtdq2pd<mask_name>"
4595 [(set (match_operand:V2DF 0 "register_operand" "=v")
4598 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4599 (parallel [(const_int 0) (const_int 1)]))))]
4600 "TARGET_SSE2 && <mask_avx512vl_condition>"
4601 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4602 [(set_attr "type" "ssecvt")
4603 (set_attr "prefix" "maybe_vex")
4604 (set_attr "ssememalign" "64")
4605 (set_attr "mode" "V2DF")])
4607 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4608 [(set (match_operand:V8SI 0 "register_operand" "=v")
4610 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4611 UNSPEC_FIX_NOTRUNC))]
4613 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4614 [(set_attr "type" "ssecvt")
4615 (set_attr "prefix" "evex")
4616 (set_attr "mode" "OI")])
4618 (define_insn "avx_cvtpd2dq256<mask_name>"
4619 [(set (match_operand:V4SI 0 "register_operand" "=v")
4620 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4621 UNSPEC_FIX_NOTRUNC))]
4622 "TARGET_AVX && <mask_avx512vl_condition>"
4623 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4624 [(set_attr "type" "ssecvt")
4625 (set_attr "prefix" "<mask_prefix>")
4626 (set_attr "mode" "OI")])
4628 (define_expand "avx_cvtpd2dq256_2"
4629 [(set (match_operand:V8SI 0 "register_operand")
4631 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4635 "operands[2] = CONST0_RTX (V4SImode);")
4637 (define_insn "*avx_cvtpd2dq256_2"
4638 [(set (match_operand:V8SI 0 "register_operand" "=x")
4640 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4642 (match_operand:V4SI 2 "const0_operand")))]
4644 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4645 [(set_attr "type" "ssecvt")
4646 (set_attr "prefix" "vex")
4647 (set_attr "btver2_decode" "vector")
4648 (set_attr "mode" "OI")])
4650 (define_insn "sse2_cvtpd2dq<mask_name>"
4651 [(set (match_operand:V4SI 0 "register_operand" "=v")
4653 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4655 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4656 "TARGET_SSE2 && <mask_avx512vl_condition>"
4659 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4661 return "cvtpd2dq\t{%1, %0|%0, %1}";
4663 [(set_attr "type" "ssecvt")
4664 (set_attr "prefix_rep" "1")
4665 (set_attr "prefix_data16" "0")
4666 (set_attr "prefix" "maybe_vex")
4667 (set_attr "mode" "TI")
4668 (set_attr "amdfam10_decode" "double")
4669 (set_attr "athlon_decode" "vector")
4670 (set_attr "bdver1_decode" "double")])
4672 ;; For ufix_notrunc* insn patterns
4673 (define_mode_attr pd2udqsuff
4674 [(V8DF "") (V4DF "{y}")])
4676 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4677 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4679 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4680 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4682 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4683 [(set_attr "type" "ssecvt")
4684 (set_attr "prefix" "evex")
4685 (set_attr "mode" "<sseinsnmode>")])
4687 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4688 [(set (match_operand:V4SI 0 "register_operand" "=v")
4691 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4692 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4693 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4695 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4696 [(set_attr "type" "ssecvt")
4697 (set_attr "prefix" "evex")
4698 (set_attr "mode" "TI")])
4700 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4701 [(set (match_operand:V8SI 0 "register_operand" "=v")
4703 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4705 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4706 [(set_attr "type" "ssecvt")
4707 (set_attr "prefix" "evex")
4708 (set_attr "mode" "OI")])
4710 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4711 [(set (match_operand:V4SI 0 "register_operand" "=v")
4713 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4714 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4716 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4717 [(set_attr "type" "ssecvt")
4718 (set_attr "prefix" "evex")
4719 (set_attr "mode" "TI")])
4721 (define_insn "fix_truncv4dfv4si2<mask_name>"
4722 [(set (match_operand:V4SI 0 "register_operand" "=v")
4723 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4724 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4725 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4726 [(set_attr "type" "ssecvt")
4727 (set_attr "prefix" "maybe_evex")
4728 (set_attr "mode" "OI")])
4730 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4731 [(set (match_operand:V4SI 0 "register_operand" "=v")
4732 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4733 "TARGET_AVX512VL && TARGET_AVX512F"
4734 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "maybe_evex")
4737 (set_attr "mode" "OI")])
4739 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4740 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4741 (any_fix:<sseintvecmode>
4742 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4743 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4744 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4745 [(set_attr "type" "ssecvt")
4746 (set_attr "prefix" "evex")
4747 (set_attr "mode" "<sseintvecmode2>")])
4749 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4750 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4751 (unspec:<sseintvecmode>
4752 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4753 UNSPEC_FIX_NOTRUNC))]
4754 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4755 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4756 [(set_attr "type" "ssecvt")
4757 (set_attr "prefix" "evex")
4758 (set_attr "mode" "<sseintvecmode2>")])
4760 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4761 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4762 (unspec:<sseintvecmode>
4763 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4764 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4765 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4766 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4767 [(set_attr "type" "ssecvt")
4768 (set_attr "prefix" "evex")
4769 (set_attr "mode" "<sseintvecmode2>")])
4771 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4772 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4773 (any_fix:<sselongvecmode>
4774 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4775 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4776 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4777 [(set_attr "type" "ssecvt")
4778 (set_attr "prefix" "evex")
4779 (set_attr "mode" "<sseintvecmode3>")])
4781 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4782 [(set (match_operand:V2DI 0 "register_operand" "=v")
4785 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4786 (parallel [(const_int 0) (const_int 1)]))))]
4787 "TARGET_AVX512DQ && TARGET_AVX512VL"
4788 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4789 [(set_attr "type" "ssecvt")
4790 (set_attr "prefix" "evex")
4791 (set_attr "mode" "TI")])
4793 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4794 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4795 (unsigned_fix:<sseintvecmode>
4796 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4798 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4799 [(set_attr "type" "ssecvt")
4800 (set_attr "prefix" "evex")
4801 (set_attr "mode" "<sseintvecmode2>")])
4803 (define_expand "avx_cvttpd2dq256_2"
4804 [(set (match_operand:V8SI 0 "register_operand")
4806 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4809 "operands[2] = CONST0_RTX (V4SImode);")
4811 (define_insn "sse2_cvttpd2dq<mask_name>"
4812 [(set (match_operand:V4SI 0 "register_operand" "=v")
4814 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4815 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4816 "TARGET_SSE2 && <mask_avx512vl_condition>"
4819 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4821 return "cvttpd2dq\t{%1, %0|%0, %1}";
4823 [(set_attr "type" "ssecvt")
4824 (set_attr "amdfam10_decode" "double")
4825 (set_attr "athlon_decode" "vector")
4826 (set_attr "bdver1_decode" "double")
4827 (set_attr "prefix" "maybe_vex")
4828 (set_attr "mode" "TI")])
4830 (define_insn "sse2_cvtsd2ss<round_name>"
4831 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4834 (float_truncate:V2SF
4835 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4836 (match_operand:V4SF 1 "register_operand" "0,0,v")
4840 cvtsd2ss\t{%2, %0|%0, %2}
4841 cvtsd2ss\t{%2, %0|%0, %q2}
4842 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4843 [(set_attr "isa" "noavx,noavx,avx")
4844 (set_attr "type" "ssecvt")
4845 (set_attr "athlon_decode" "vector,double,*")
4846 (set_attr "amdfam10_decode" "vector,double,*")
4847 (set_attr "bdver1_decode" "direct,direct,*")
4848 (set_attr "btver2_decode" "double,double,double")
4849 (set_attr "prefix" "orig,orig,<round_prefix>")
4850 (set_attr "mode" "SF")])
4852 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4853 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4857 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4858 (parallel [(const_int 0) (const_int 1)])))
4859 (match_operand:V2DF 1 "register_operand" "0,0,v")
4863 cvtss2sd\t{%2, %0|%0, %2}
4864 cvtss2sd\t{%2, %0|%0, %k2}
4865 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4866 [(set_attr "isa" "noavx,noavx,avx")
4867 (set_attr "type" "ssecvt")
4868 (set_attr "amdfam10_decode" "vector,double,*")
4869 (set_attr "athlon_decode" "direct,direct,*")
4870 (set_attr "bdver1_decode" "direct,direct,*")
4871 (set_attr "btver2_decode" "double,double,double")
4872 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4873 (set_attr "mode" "DF")])
4875 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4876 [(set (match_operand:V8SF 0 "register_operand" "=v")
4877 (float_truncate:V8SF
4878 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4880 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4881 [(set_attr "type" "ssecvt")
4882 (set_attr "prefix" "evex")
4883 (set_attr "mode" "V8SF")])
4885 (define_insn "avx_cvtpd2ps256<mask_name>"
4886 [(set (match_operand:V4SF 0 "register_operand" "=v")
4887 (float_truncate:V4SF
4888 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4889 "TARGET_AVX && <mask_avx512vl_condition>"
4890 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4891 [(set_attr "type" "ssecvt")
4892 (set_attr "prefix" "maybe_evex")
4893 (set_attr "btver2_decode" "vector")
4894 (set_attr "mode" "V4SF")])
4896 (define_expand "sse2_cvtpd2ps"
4897 [(set (match_operand:V4SF 0 "register_operand")
4899 (float_truncate:V2SF
4900 (match_operand:V2DF 1 "nonimmediate_operand"))
4903 "operands[2] = CONST0_RTX (V2SFmode);")
4905 (define_expand "sse2_cvtpd2ps_mask"
4906 [(set (match_operand:V4SF 0 "register_operand")
4909 (float_truncate:V2SF
4910 (match_operand:V2DF 1 "nonimmediate_operand"))
4912 (match_operand:V4SF 2 "register_operand")
4913 (match_operand:QI 3 "register_operand")))]
4915 "operands[4] = CONST0_RTX (V2SFmode);")
4917 (define_insn "*sse2_cvtpd2ps<mask_name>"
4918 [(set (match_operand:V4SF 0 "register_operand" "=v")
4920 (float_truncate:V2SF
4921 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4922 (match_operand:V2SF 2 "const0_operand")))]
4923 "TARGET_SSE2 && <mask_avx512vl_condition>"
4926 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4928 return "cvtpd2ps\t{%1, %0|%0, %1}";
4930 [(set_attr "type" "ssecvt")
4931 (set_attr "amdfam10_decode" "double")
4932 (set_attr "athlon_decode" "vector")
4933 (set_attr "bdver1_decode" "double")
4934 (set_attr "prefix_data16" "1")
4935 (set_attr "prefix" "maybe_vex")
4936 (set_attr "mode" "V4SF")])
4938 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4939 (define_mode_attr sf2dfmode
4940 [(V8DF "V8SF") (V4DF "V4SF")])
4942 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4943 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4944 (float_extend:VF2_512_256
4945 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4946 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4947 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4948 [(set_attr "type" "ssecvt")
4949 (set_attr "prefix" "maybe_vex")
4950 (set_attr "mode" "<MODE>")])
4952 (define_insn "*avx_cvtps2pd256_2"
4953 [(set (match_operand:V4DF 0 "register_operand" "=x")
4956 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4957 (parallel [(const_int 0) (const_int 1)
4958 (const_int 2) (const_int 3)]))))]
4960 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4961 [(set_attr "type" "ssecvt")
4962 (set_attr "prefix" "vex")
4963 (set_attr "mode" "V4DF")])
4965 (define_insn "vec_unpacks_lo_v16sf"
4966 [(set (match_operand:V8DF 0 "register_operand" "=v")
4969 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4970 (parallel [(const_int 0) (const_int 1)
4971 (const_int 2) (const_int 3)
4972 (const_int 4) (const_int 5)
4973 (const_int 6) (const_int 7)]))))]
4975 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4976 [(set_attr "type" "ssecvt")
4977 (set_attr "prefix" "evex")
4978 (set_attr "mode" "V8DF")])
4980 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4981 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4982 (unspec:<avx512fmaskmode>
4983 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
4984 UNSPEC_CVTINT2MASK))]
4986 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4987 [(set_attr "prefix" "evex")
4988 (set_attr "mode" "<sseinsnmode>")])
4990 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4991 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4992 (unspec:<avx512fmaskmode>
4993 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
4994 UNSPEC_CVTINT2MASK))]
4996 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4997 [(set_attr "prefix" "evex")
4998 (set_attr "mode" "<sseinsnmode>")])
5000 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5001 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5002 (vec_merge:VI12_AVX512VL
5005 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5008 operands[2] = CONSTM1_RTX (<MODE>mode);
5009 operands[3] = CONST0_RTX (<MODE>mode);
5012 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5013 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5014 (vec_merge:VI12_AVX512VL
5015 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5016 (match_operand:VI12_AVX512VL 3 "const0_operand")
5017 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5019 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5020 [(set_attr "prefix" "evex")
5021 (set_attr "mode" "<sseinsnmode>")])
5023 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5024 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5025 (vec_merge:VI48_AVX512VL
5028 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5031 operands[2] = CONSTM1_RTX (<MODE>mode);
5032 operands[3] = CONST0_RTX (<MODE>mode);
5035 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5036 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5037 (vec_merge:VI48_AVX512VL
5038 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5039 (match_operand:VI48_AVX512VL 3 "const0_operand")
5040 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5042 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5043 [(set_attr "prefix" "evex")
5044 (set_attr "mode" "<sseinsnmode>")])
5046 (define_insn "sse2_cvtps2pd<mask_name>"
5047 [(set (match_operand:V2DF 0 "register_operand" "=v")
5050 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5051 (parallel [(const_int 0) (const_int 1)]))))]
5052 "TARGET_SSE2 && <mask_avx512vl_condition>"
5053 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5054 [(set_attr "type" "ssecvt")
5055 (set_attr "amdfam10_decode" "direct")
5056 (set_attr "athlon_decode" "double")
5057 (set_attr "bdver1_decode" "double")
5058 (set_attr "prefix_data16" "0")
5059 (set_attr "prefix" "maybe_vex")
5060 (set_attr "mode" "V2DF")])
5062 (define_expand "vec_unpacks_hi_v4sf"
5067 (match_operand:V4SF 1 "nonimmediate_operand"))
5068 (parallel [(const_int 6) (const_int 7)
5069 (const_int 2) (const_int 3)])))
5070 (set (match_operand:V2DF 0 "register_operand")
5074 (parallel [(const_int 0) (const_int 1)]))))]
5076 "operands[2] = gen_reg_rtx (V4SFmode);")
5078 (define_expand "vec_unpacks_hi_v8sf"
5081 (match_operand:V8SF 1 "register_operand")
5082 (parallel [(const_int 4) (const_int 5)
5083 (const_int 6) (const_int 7)])))
5084 (set (match_operand:V4DF 0 "register_operand")
5088 "operands[2] = gen_reg_rtx (V4SFmode);")
5090 (define_expand "vec_unpacks_hi_v16sf"
5093 (match_operand:V16SF 1 "register_operand")
5094 (parallel [(const_int 8) (const_int 9)
5095 (const_int 10) (const_int 11)
5096 (const_int 12) (const_int 13)
5097 (const_int 14) (const_int 15)])))
5098 (set (match_operand:V8DF 0 "register_operand")
5102 "operands[2] = gen_reg_rtx (V8SFmode);")
5104 (define_expand "vec_unpacks_lo_v4sf"
5105 [(set (match_operand:V2DF 0 "register_operand")
5108 (match_operand:V4SF 1 "nonimmediate_operand")
5109 (parallel [(const_int 0) (const_int 1)]))))]
5112 (define_expand "vec_unpacks_lo_v8sf"
5113 [(set (match_operand:V4DF 0 "register_operand")
5116 (match_operand:V8SF 1 "nonimmediate_operand")
5117 (parallel [(const_int 0) (const_int 1)
5118 (const_int 2) (const_int 3)]))))]
5121 (define_mode_attr sseunpackfltmode
5122 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5123 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5125 (define_expand "vec_unpacks_float_hi_<mode>"
5126 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5127 (match_operand:VI2_AVX512F 1 "register_operand")]
5130 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5132 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5133 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5134 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5138 (define_expand "vec_unpacks_float_lo_<mode>"
5139 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5140 (match_operand:VI2_AVX512F 1 "register_operand")]
5143 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5145 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5146 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5147 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5151 (define_expand "vec_unpacku_float_hi_<mode>"
5152 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5153 (match_operand:VI2_AVX512F 1 "register_operand")]
5156 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5158 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5159 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5160 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5164 (define_expand "vec_unpacku_float_lo_<mode>"
5165 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5166 (match_operand:VI2_AVX512F 1 "register_operand")]
5169 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5171 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5172 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5173 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5177 (define_expand "vec_unpacks_float_hi_v4si"
5180 (match_operand:V4SI 1 "nonimmediate_operand")
5181 (parallel [(const_int 2) (const_int 3)
5182 (const_int 2) (const_int 3)])))
5183 (set (match_operand:V2DF 0 "register_operand")
5187 (parallel [(const_int 0) (const_int 1)]))))]
5189 "operands[2] = gen_reg_rtx (V4SImode);")
5191 (define_expand "vec_unpacks_float_lo_v4si"
5192 [(set (match_operand:V2DF 0 "register_operand")
5195 (match_operand:V4SI 1 "nonimmediate_operand")
5196 (parallel [(const_int 0) (const_int 1)]))))]
5199 (define_expand "vec_unpacks_float_hi_v8si"
5202 (match_operand:V8SI 1 "nonimmediate_operand")
5203 (parallel [(const_int 4) (const_int 5)
5204 (const_int 6) (const_int 7)])))
5205 (set (match_operand:V4DF 0 "register_operand")
5209 "operands[2] = gen_reg_rtx (V4SImode);")
5211 (define_expand "vec_unpacks_float_lo_v8si"
5212 [(set (match_operand:V4DF 0 "register_operand")
5215 (match_operand:V8SI 1 "nonimmediate_operand")
5216 (parallel [(const_int 0) (const_int 1)
5217 (const_int 2) (const_int 3)]))))]
5220 (define_expand "vec_unpacks_float_hi_v16si"
5223 (match_operand:V16SI 1 "nonimmediate_operand")
5224 (parallel [(const_int 8) (const_int 9)
5225 (const_int 10) (const_int 11)
5226 (const_int 12) (const_int 13)
5227 (const_int 14) (const_int 15)])))
5228 (set (match_operand:V8DF 0 "register_operand")
5232 "operands[2] = gen_reg_rtx (V8SImode);")
5234 (define_expand "vec_unpacks_float_lo_v16si"
5235 [(set (match_operand:V8DF 0 "register_operand")
5238 (match_operand:V16SI 1 "nonimmediate_operand")
5239 (parallel [(const_int 0) (const_int 1)
5240 (const_int 2) (const_int 3)
5241 (const_int 4) (const_int 5)
5242 (const_int 6) (const_int 7)]))))]
5245 (define_expand "vec_unpacku_float_hi_v4si"
5248 (match_operand:V4SI 1 "nonimmediate_operand")
5249 (parallel [(const_int 2) (const_int 3)
5250 (const_int 2) (const_int 3)])))
5255 (parallel [(const_int 0) (const_int 1)]))))
5257 (lt:V2DF (match_dup 6) (match_dup 3)))
5259 (and:V2DF (match_dup 7) (match_dup 4)))
5260 (set (match_operand:V2DF 0 "register_operand")
5261 (plus:V2DF (match_dup 6) (match_dup 8)))]
5264 REAL_VALUE_TYPE TWO32r;
5268 real_ldexp (&TWO32r, &dconst1, 32);
5269 x = const_double_from_real_value (TWO32r, DFmode);
5271 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5272 operands[4] = force_reg (V2DFmode,
5273 ix86_build_const_vector (V2DFmode, 1, x));
5275 operands[5] = gen_reg_rtx (V4SImode);
5277 for (i = 6; i < 9; i++)
5278 operands[i] = gen_reg_rtx (V2DFmode);
5281 (define_expand "vec_unpacku_float_lo_v4si"
5285 (match_operand:V4SI 1 "nonimmediate_operand")
5286 (parallel [(const_int 0) (const_int 1)]))))
5288 (lt:V2DF (match_dup 5) (match_dup 3)))
5290 (and:V2DF (match_dup 6) (match_dup 4)))
5291 (set (match_operand:V2DF 0 "register_operand")
5292 (plus:V2DF (match_dup 5) (match_dup 7)))]
5295 REAL_VALUE_TYPE TWO32r;
5299 real_ldexp (&TWO32r, &dconst1, 32);
5300 x = const_double_from_real_value (TWO32r, DFmode);
5302 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5303 operands[4] = force_reg (V2DFmode,
5304 ix86_build_const_vector (V2DFmode, 1, x));
5306 for (i = 5; i < 8; i++)
5307 operands[i] = gen_reg_rtx (V2DFmode);
5310 (define_expand "vec_unpacku_float_hi_v8si"
5311 [(match_operand:V4DF 0 "register_operand")
5312 (match_operand:V8SI 1 "register_operand")]
5315 REAL_VALUE_TYPE TWO32r;
5319 real_ldexp (&TWO32r, &dconst1, 32);
5320 x = const_double_from_real_value (TWO32r, DFmode);
5322 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5323 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5324 tmp[5] = gen_reg_rtx (V4SImode);
5326 for (i = 2; i < 5; i++)
5327 tmp[i] = gen_reg_rtx (V4DFmode);
5328 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5329 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5330 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5331 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5332 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5333 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5337 (define_expand "vec_unpacku_float_hi_v16si"
5338 [(match_operand:V8DF 0 "register_operand")
5339 (match_operand:V16SI 1 "register_operand")]
5342 REAL_VALUE_TYPE TWO32r;
5345 real_ldexp (&TWO32r, &dconst1, 32);
5346 x = const_double_from_real_value (TWO32r, DFmode);
5348 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5349 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5350 tmp[2] = gen_reg_rtx (V8DFmode);
5351 tmp[3] = gen_reg_rtx (V8SImode);
5352 k = gen_reg_rtx (QImode);
5354 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5355 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5356 emit_insn (gen_rtx_SET (VOIDmode, k,
5357 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5358 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5359 emit_move_insn (operands[0], tmp[2]);
5363 (define_expand "vec_unpacku_float_lo_v8si"
5364 [(match_operand:V4DF 0 "register_operand")
5365 (match_operand:V8SI 1 "nonimmediate_operand")]
5368 REAL_VALUE_TYPE TWO32r;
5372 real_ldexp (&TWO32r, &dconst1, 32);
5373 x = const_double_from_real_value (TWO32r, DFmode);
5375 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5376 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5378 for (i = 2; i < 5; i++)
5379 tmp[i] = gen_reg_rtx (V4DFmode);
5380 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5381 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5382 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5383 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5384 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5388 (define_expand "vec_unpacku_float_lo_v16si"
5389 [(match_operand:V8DF 0 "register_operand")
5390 (match_operand:V16SI 1 "nonimmediate_operand")]
5393 REAL_VALUE_TYPE TWO32r;
5396 real_ldexp (&TWO32r, &dconst1, 32);
5397 x = const_double_from_real_value (TWO32r, DFmode);
5399 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5400 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5401 tmp[2] = gen_reg_rtx (V8DFmode);
5402 k = gen_reg_rtx (QImode);
5404 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5405 emit_insn (gen_rtx_SET (VOIDmode, k,
5406 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5407 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5408 emit_move_insn (operands[0], tmp[2]);
5412 (define_expand "vec_pack_trunc_<mode>"
5414 (float_truncate:<sf2dfmode>
5415 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5417 (float_truncate:<sf2dfmode>
5418 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5419 (set (match_operand:<ssePSmode> 0 "register_operand")
5420 (vec_concat:<ssePSmode>
5425 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5426 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5429 (define_expand "vec_pack_trunc_v2df"
5430 [(match_operand:V4SF 0 "register_operand")
5431 (match_operand:V2DF 1 "nonimmediate_operand")
5432 (match_operand:V2DF 2 "nonimmediate_operand")]
5437 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5439 tmp0 = gen_reg_rtx (V4DFmode);
5440 tmp1 = force_reg (V2DFmode, operands[1]);
5442 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5443 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5447 tmp0 = gen_reg_rtx (V4SFmode);
5448 tmp1 = gen_reg_rtx (V4SFmode);
5450 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5451 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5452 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5457 (define_expand "vec_pack_sfix_trunc_v8df"
5458 [(match_operand:V16SI 0 "register_operand")
5459 (match_operand:V8DF 1 "nonimmediate_operand")
5460 (match_operand:V8DF 2 "nonimmediate_operand")]
5465 r1 = gen_reg_rtx (V8SImode);
5466 r2 = gen_reg_rtx (V8SImode);
5468 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5469 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5470 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5474 (define_expand "vec_pack_sfix_trunc_v4df"
5475 [(match_operand:V8SI 0 "register_operand")
5476 (match_operand:V4DF 1 "nonimmediate_operand")
5477 (match_operand:V4DF 2 "nonimmediate_operand")]
5482 r1 = gen_reg_rtx (V4SImode);
5483 r2 = gen_reg_rtx (V4SImode);
5485 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5486 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5487 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5491 (define_expand "vec_pack_sfix_trunc_v2df"
5492 [(match_operand:V4SI 0 "register_operand")
5493 (match_operand:V2DF 1 "nonimmediate_operand")
5494 (match_operand:V2DF 2 "nonimmediate_operand")]
5497 rtx tmp0, tmp1, tmp2;
5499 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5501 tmp0 = gen_reg_rtx (V4DFmode);
5502 tmp1 = force_reg (V2DFmode, operands[1]);
5504 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5505 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5509 tmp0 = gen_reg_rtx (V4SImode);
5510 tmp1 = gen_reg_rtx (V4SImode);
5511 tmp2 = gen_reg_rtx (V2DImode);
5513 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5514 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5515 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5516 gen_lowpart (V2DImode, tmp0),
5517 gen_lowpart (V2DImode, tmp1)));
5518 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5523 (define_mode_attr ssepackfltmode
5524 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5526 (define_expand "vec_pack_ufix_trunc_<mode>"
5527 [(match_operand:<ssepackfltmode> 0 "register_operand")
5528 (match_operand:VF2 1 "register_operand")
5529 (match_operand:VF2 2 "register_operand")]
5532 if (<MODE>mode == V8DFmode)
5536 r1 = gen_reg_rtx (V8SImode);
5537 r2 = gen_reg_rtx (V8SImode);
5539 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5540 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5541 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5546 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5547 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5548 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5549 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5550 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5552 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5553 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5557 tmp[5] = gen_reg_rtx (V8SFmode);
5558 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5559 gen_lowpart (V8SFmode, tmp[3]), 0);
5560 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5562 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5563 operands[0], 0, OPTAB_DIRECT);
5564 if (tmp[6] != operands[0])
5565 emit_move_insn (operands[0], tmp[6]);
5571 (define_expand "vec_pack_sfix_v4df"
5572 [(match_operand:V8SI 0 "register_operand")
5573 (match_operand:V4DF 1 "nonimmediate_operand")
5574 (match_operand:V4DF 2 "nonimmediate_operand")]
5579 r1 = gen_reg_rtx (V4SImode);
5580 r2 = gen_reg_rtx (V4SImode);
5582 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5583 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5584 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5588 (define_expand "vec_pack_sfix_v2df"
5589 [(match_operand:V4SI 0 "register_operand")
5590 (match_operand:V2DF 1 "nonimmediate_operand")
5591 (match_operand:V2DF 2 "nonimmediate_operand")]
5594 rtx tmp0, tmp1, tmp2;
5596 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5598 tmp0 = gen_reg_rtx (V4DFmode);
5599 tmp1 = force_reg (V2DFmode, operands[1]);
5601 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5602 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5606 tmp0 = gen_reg_rtx (V4SImode);
5607 tmp1 = gen_reg_rtx (V4SImode);
5608 tmp2 = gen_reg_rtx (V2DImode);
5610 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5611 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5612 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5613 gen_lowpart (V2DImode, tmp0),
5614 gen_lowpart (V2DImode, tmp1)));
5615 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5622 ;; Parallel single-precision floating point element swizzling
5624 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5626 (define_expand "sse_movhlps_exp"
5627 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5630 (match_operand:V4SF 1 "nonimmediate_operand")
5631 (match_operand:V4SF 2 "nonimmediate_operand"))
5632 (parallel [(const_int 6)
5638 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5640 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5642 /* Fix up the destination if needed. */
5643 if (dst != operands[0])
5644 emit_move_insn (operands[0], dst);
5649 (define_insn "sse_movhlps"
5650 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5653 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5654 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5655 (parallel [(const_int 6)
5659 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5661 movhlps\t{%2, %0|%0, %2}
5662 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5663 movlps\t{%H2, %0|%0, %H2}
5664 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5665 %vmovhps\t{%2, %0|%q0, %2}"
5666 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5667 (set_attr "type" "ssemov")
5668 (set_attr "ssememalign" "64")
5669 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5670 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5672 (define_expand "sse_movlhps_exp"
5673 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5676 (match_operand:V4SF 1 "nonimmediate_operand")
5677 (match_operand:V4SF 2 "nonimmediate_operand"))
5678 (parallel [(const_int 0)
5684 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5686 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5688 /* Fix up the destination if needed. */
5689 if (dst != operands[0])
5690 emit_move_insn (operands[0], dst);
5695 (define_insn "sse_movlhps"
5696 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5699 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5700 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5701 (parallel [(const_int 0)
5705 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5707 movlhps\t{%2, %0|%0, %2}
5708 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5709 movhps\t{%2, %0|%0, %q2}
5710 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5711 %vmovlps\t{%2, %H0|%H0, %2}"
5712 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5713 (set_attr "type" "ssemov")
5714 (set_attr "ssememalign" "64")
5715 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5716 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5718 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5719 [(set (match_operand:V16SF 0 "register_operand" "=v")
5722 (match_operand:V16SF 1 "register_operand" "v")
5723 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5724 (parallel [(const_int 2) (const_int 18)
5725 (const_int 3) (const_int 19)
5726 (const_int 6) (const_int 22)
5727 (const_int 7) (const_int 23)
5728 (const_int 10) (const_int 26)
5729 (const_int 11) (const_int 27)
5730 (const_int 14) (const_int 30)
5731 (const_int 15) (const_int 31)])))]
5733 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5734 [(set_attr "type" "sselog")
5735 (set_attr "prefix" "evex")
5736 (set_attr "mode" "V16SF")])
5738 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5739 (define_insn "avx_unpckhps256<mask_name>"
5740 [(set (match_operand:V8SF 0 "register_operand" "=v")
5743 (match_operand:V8SF 1 "register_operand" "v")
5744 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5745 (parallel [(const_int 2) (const_int 10)
5746 (const_int 3) (const_int 11)
5747 (const_int 6) (const_int 14)
5748 (const_int 7) (const_int 15)])))]
5749 "TARGET_AVX && <mask_avx512vl_condition>"
5750 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5751 [(set_attr "type" "sselog")
5752 (set_attr "prefix" "vex")
5753 (set_attr "mode" "V8SF")])
5755 (define_expand "vec_interleave_highv8sf"
5759 (match_operand:V8SF 1 "register_operand" "x")
5760 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5761 (parallel [(const_int 0) (const_int 8)
5762 (const_int 1) (const_int 9)
5763 (const_int 4) (const_int 12)
5764 (const_int 5) (const_int 13)])))
5770 (parallel [(const_int 2) (const_int 10)
5771 (const_int 3) (const_int 11)
5772 (const_int 6) (const_int 14)
5773 (const_int 7) (const_int 15)])))
5774 (set (match_operand:V8SF 0 "register_operand")
5779 (parallel [(const_int 4) (const_int 5)
5780 (const_int 6) (const_int 7)
5781 (const_int 12) (const_int 13)
5782 (const_int 14) (const_int 15)])))]
5785 operands[3] = gen_reg_rtx (V8SFmode);
5786 operands[4] = gen_reg_rtx (V8SFmode);
5789 (define_insn "vec_interleave_highv4sf<mask_name>"
5790 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5793 (match_operand:V4SF 1 "register_operand" "0,v")
5794 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5795 (parallel [(const_int 2) (const_int 6)
5796 (const_int 3) (const_int 7)])))]
5797 "TARGET_SSE && <mask_avx512vl_condition>"
5799 unpckhps\t{%2, %0|%0, %2}
5800 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5801 [(set_attr "isa" "noavx,avx")
5802 (set_attr "type" "sselog")
5803 (set_attr "prefix" "orig,vex")
5804 (set_attr "mode" "V4SF")])
5806 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5807 [(set (match_operand:V16SF 0 "register_operand" "=v")
5810 (match_operand:V16SF 1 "register_operand" "v")
5811 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5812 (parallel [(const_int 0) (const_int 16)
5813 (const_int 1) (const_int 17)
5814 (const_int 4) (const_int 20)
5815 (const_int 5) (const_int 21)
5816 (const_int 8) (const_int 24)
5817 (const_int 9) (const_int 25)
5818 (const_int 12) (const_int 28)
5819 (const_int 13) (const_int 29)])))]
5821 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5822 [(set_attr "type" "sselog")
5823 (set_attr "prefix" "evex")
5824 (set_attr "mode" "V16SF")])
5826 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5827 (define_insn "avx_unpcklps256<mask_name>"
5828 [(set (match_operand:V8SF 0 "register_operand" "=v")
5831 (match_operand:V8SF 1 "register_operand" "v")
5832 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5833 (parallel [(const_int 0) (const_int 8)
5834 (const_int 1) (const_int 9)
5835 (const_int 4) (const_int 12)
5836 (const_int 5) (const_int 13)])))]
5837 "TARGET_AVX && <mask_avx512vl_condition>"
5838 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5839 [(set_attr "type" "sselog")
5840 (set_attr "prefix" "vex")
5841 (set_attr "mode" "V8SF")])
5843 (define_insn "unpcklps128_mask"
5844 [(set (match_operand:V4SF 0 "register_operand" "=v")
5848 (match_operand:V4SF 1 "register_operand" "v")
5849 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5850 (parallel [(const_int 0) (const_int 4)
5851 (const_int 1) (const_int 5)]))
5852 (match_operand:V4SF 3 "vector_move_operand" "0C")
5853 (match_operand:QI 4 "register_operand" "Yk")))]
5855 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5856 [(set_attr "type" "sselog")
5857 (set_attr "prefix" "evex")
5858 (set_attr "mode" "V4SF")])
5860 (define_expand "vec_interleave_lowv8sf"
5864 (match_operand:V8SF 1 "register_operand" "x")
5865 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5866 (parallel [(const_int 0) (const_int 8)
5867 (const_int 1) (const_int 9)
5868 (const_int 4) (const_int 12)
5869 (const_int 5) (const_int 13)])))
5875 (parallel [(const_int 2) (const_int 10)
5876 (const_int 3) (const_int 11)
5877 (const_int 6) (const_int 14)
5878 (const_int 7) (const_int 15)])))
5879 (set (match_operand:V8SF 0 "register_operand")
5884 (parallel [(const_int 0) (const_int 1)
5885 (const_int 2) (const_int 3)
5886 (const_int 8) (const_int 9)
5887 (const_int 10) (const_int 11)])))]
5890 operands[3] = gen_reg_rtx (V8SFmode);
5891 operands[4] = gen_reg_rtx (V8SFmode);
5894 (define_insn "vec_interleave_lowv4sf"
5895 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5898 (match_operand:V4SF 1 "register_operand" "0,x")
5899 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5900 (parallel [(const_int 0) (const_int 4)
5901 (const_int 1) (const_int 5)])))]
5904 unpcklps\t{%2, %0|%0, %2}
5905 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5906 [(set_attr "isa" "noavx,avx")
5907 (set_attr "type" "sselog")
5908 (set_attr "prefix" "orig,vex")
5909 (set_attr "mode" "V4SF")])
5911 ;; These are modeled with the same vec_concat as the others so that we
5912 ;; capture users of shufps that can use the new instructions
5913 (define_insn "avx_movshdup256<mask_name>"
5914 [(set (match_operand:V8SF 0 "register_operand" "=v")
5917 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5919 (parallel [(const_int 1) (const_int 1)
5920 (const_int 3) (const_int 3)
5921 (const_int 5) (const_int 5)
5922 (const_int 7) (const_int 7)])))]
5923 "TARGET_AVX && <mask_avx512vl_condition>"
5924 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5925 [(set_attr "type" "sse")
5926 (set_attr "prefix" "vex")
5927 (set_attr "mode" "V8SF")])
5929 (define_insn "sse3_movshdup<mask_name>"
5930 [(set (match_operand:V4SF 0 "register_operand" "=v")
5933 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5935 (parallel [(const_int 1)
5939 "TARGET_SSE3 && <mask_avx512vl_condition>"
5940 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5941 [(set_attr "type" "sse")
5942 (set_attr "prefix_rep" "1")
5943 (set_attr "prefix" "maybe_vex")
5944 (set_attr "mode" "V4SF")])
5946 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5947 [(set (match_operand:V16SF 0 "register_operand" "=v")
5950 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5952 (parallel [(const_int 1) (const_int 1)
5953 (const_int 3) (const_int 3)
5954 (const_int 5) (const_int 5)
5955 (const_int 7) (const_int 7)
5956 (const_int 9) (const_int 9)
5957 (const_int 11) (const_int 11)
5958 (const_int 13) (const_int 13)
5959 (const_int 15) (const_int 15)])))]
5961 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5962 [(set_attr "type" "sse")
5963 (set_attr "prefix" "evex")
5964 (set_attr "mode" "V16SF")])
5966 (define_insn "avx_movsldup256<mask_name>"
5967 [(set (match_operand:V8SF 0 "register_operand" "=v")
5970 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5972 (parallel [(const_int 0) (const_int 0)
5973 (const_int 2) (const_int 2)
5974 (const_int 4) (const_int 4)
5975 (const_int 6) (const_int 6)])))]
5976 "TARGET_AVX && <mask_avx512vl_condition>"
5977 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5978 [(set_attr "type" "sse")
5979 (set_attr "prefix" "vex")
5980 (set_attr "mode" "V8SF")])
5982 (define_insn "sse3_movsldup<mask_name>"
5983 [(set (match_operand:V4SF 0 "register_operand" "=v")
5986 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5988 (parallel [(const_int 0)
5992 "TARGET_SSE3 && <mask_avx512vl_condition>"
5993 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5994 [(set_attr "type" "sse")
5995 (set_attr "prefix_rep" "1")
5996 (set_attr "prefix" "maybe_vex")
5997 (set_attr "mode" "V4SF")])
5999 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6000 [(set (match_operand:V16SF 0 "register_operand" "=v")
6003 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6005 (parallel [(const_int 0) (const_int 0)
6006 (const_int 2) (const_int 2)
6007 (const_int 4) (const_int 4)
6008 (const_int 6) (const_int 6)
6009 (const_int 8) (const_int 8)
6010 (const_int 10) (const_int 10)
6011 (const_int 12) (const_int 12)
6012 (const_int 14) (const_int 14)])))]
6014 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6015 [(set_attr "type" "sse")
6016 (set_attr "prefix" "evex")
6017 (set_attr "mode" "V16SF")])
6019 (define_expand "avx_shufps256<mask_expand4_name>"
6020 [(match_operand:V8SF 0 "register_operand")
6021 (match_operand:V8SF 1 "register_operand")
6022 (match_operand:V8SF 2 "nonimmediate_operand")
6023 (match_operand:SI 3 "const_int_operand")]
6026 int mask = INTVAL (operands[3]);
6027 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6030 GEN_INT ((mask >> 0) & 3),
6031 GEN_INT ((mask >> 2) & 3),
6032 GEN_INT (((mask >> 4) & 3) + 8),
6033 GEN_INT (((mask >> 6) & 3) + 8),
6034 GEN_INT (((mask >> 0) & 3) + 4),
6035 GEN_INT (((mask >> 2) & 3) + 4),
6036 GEN_INT (((mask >> 4) & 3) + 12),
6037 GEN_INT (((mask >> 6) & 3) + 12)
6038 <mask_expand4_args>));
6042 ;; One bit in mask selects 2 elements.
6043 (define_insn "avx_shufps256_1<mask_name>"
6044 [(set (match_operand:V8SF 0 "register_operand" "=v")
6047 (match_operand:V8SF 1 "register_operand" "v")
6048 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6049 (parallel [(match_operand 3 "const_0_to_3_operand" )
6050 (match_operand 4 "const_0_to_3_operand" )
6051 (match_operand 5 "const_8_to_11_operand" )
6052 (match_operand 6 "const_8_to_11_operand" )
6053 (match_operand 7 "const_4_to_7_operand" )
6054 (match_operand 8 "const_4_to_7_operand" )
6055 (match_operand 9 "const_12_to_15_operand")
6056 (match_operand 10 "const_12_to_15_operand")])))]
6058 && <mask_avx512vl_condition>
6059 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6060 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6061 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6062 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6065 mask = INTVAL (operands[3]);
6066 mask |= INTVAL (operands[4]) << 2;
6067 mask |= (INTVAL (operands[5]) - 8) << 4;
6068 mask |= (INTVAL (operands[6]) - 8) << 6;
6069 operands[3] = GEN_INT (mask);
6071 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6073 [(set_attr "type" "sseshuf")
6074 (set_attr "length_immediate" "1")
6075 (set_attr "prefix" "<mask_prefix>")
6076 (set_attr "mode" "V8SF")])
6078 (define_expand "sse_shufps<mask_expand4_name>"
6079 [(match_operand:V4SF 0 "register_operand")
6080 (match_operand:V4SF 1 "register_operand")
6081 (match_operand:V4SF 2 "nonimmediate_operand")
6082 (match_operand:SI 3 "const_int_operand")]
6085 int mask = INTVAL (operands[3]);
6086 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6089 GEN_INT ((mask >> 0) & 3),
6090 GEN_INT ((mask >> 2) & 3),
6091 GEN_INT (((mask >> 4) & 3) + 4),
6092 GEN_INT (((mask >> 6) & 3) + 4)
6093 <mask_expand4_args>));
6097 (define_insn "sse_shufps_v4sf_mask"
6098 [(set (match_operand:V4SF 0 "register_operand" "=v")
6102 (match_operand:V4SF 1 "register_operand" "v")
6103 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6104 (parallel [(match_operand 3 "const_0_to_3_operand")
6105 (match_operand 4 "const_0_to_3_operand")
6106 (match_operand 5 "const_4_to_7_operand")
6107 (match_operand 6 "const_4_to_7_operand")]))
6108 (match_operand:V4SF 7 "vector_move_operand" "0C")
6109 (match_operand:QI 8 "register_operand" "Yk")))]
6113 mask |= INTVAL (operands[3]) << 0;
6114 mask |= INTVAL (operands[4]) << 2;
6115 mask |= (INTVAL (operands[5]) - 4) << 4;
6116 mask |= (INTVAL (operands[6]) - 4) << 6;
6117 operands[3] = GEN_INT (mask);
6119 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6121 [(set_attr "type" "sseshuf")
6122 (set_attr "length_immediate" "1")
6123 (set_attr "prefix" "evex")
6124 (set_attr "mode" "V4SF")])
6126 (define_insn "sse_shufps_<mode>"
6127 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6128 (vec_select:VI4F_128
6129 (vec_concat:<ssedoublevecmode>
6130 (match_operand:VI4F_128 1 "register_operand" "0,x")
6131 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6132 (parallel [(match_operand 3 "const_0_to_3_operand")
6133 (match_operand 4 "const_0_to_3_operand")
6134 (match_operand 5 "const_4_to_7_operand")
6135 (match_operand 6 "const_4_to_7_operand")])))]
6139 mask |= INTVAL (operands[3]) << 0;
6140 mask |= INTVAL (operands[4]) << 2;
6141 mask |= (INTVAL (operands[5]) - 4) << 4;
6142 mask |= (INTVAL (operands[6]) - 4) << 6;
6143 operands[3] = GEN_INT (mask);
6145 switch (which_alternative)
6148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6150 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6155 [(set_attr "isa" "noavx,avx")
6156 (set_attr "type" "sseshuf")
6157 (set_attr "length_immediate" "1")
6158 (set_attr "prefix" "orig,vex")
6159 (set_attr "mode" "V4SF")])
6161 (define_insn "sse_storehps"
6162 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6164 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6165 (parallel [(const_int 2) (const_int 3)])))]
6168 %vmovhps\t{%1, %0|%q0, %1}
6169 %vmovhlps\t{%1, %d0|%d0, %1}
6170 %vmovlps\t{%H1, %d0|%d0, %H1}"
6171 [(set_attr "type" "ssemov")
6172 (set_attr "ssememalign" "64")
6173 (set_attr "prefix" "maybe_vex")
6174 (set_attr "mode" "V2SF,V4SF,V2SF")])
6176 (define_expand "sse_loadhps_exp"
6177 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6180 (match_operand:V4SF 1 "nonimmediate_operand")
6181 (parallel [(const_int 0) (const_int 1)]))
6182 (match_operand:V2SF 2 "nonimmediate_operand")))]
6185 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6187 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6189 /* Fix up the destination if needed. */
6190 if (dst != operands[0])
6191 emit_move_insn (operands[0], dst);
6196 (define_insn "sse_loadhps"
6197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6200 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6201 (parallel [(const_int 0) (const_int 1)]))
6202 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6205 movhps\t{%2, %0|%0, %q2}
6206 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6207 movlhps\t{%2, %0|%0, %2}
6208 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6209 %vmovlps\t{%2, %H0|%H0, %2}"
6210 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6211 (set_attr "type" "ssemov")
6212 (set_attr "ssememalign" "64")
6213 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6214 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6216 (define_insn "sse_storelps"
6217 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6219 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6220 (parallel [(const_int 0) (const_int 1)])))]
6223 %vmovlps\t{%1, %0|%q0, %1}
6224 %vmovaps\t{%1, %0|%0, %1}
6225 %vmovlps\t{%1, %d0|%d0, %q1}"
6226 [(set_attr "type" "ssemov")
6227 (set_attr "prefix" "maybe_vex")
6228 (set_attr "mode" "V2SF,V4SF,V2SF")])
6230 (define_expand "sse_loadlps_exp"
6231 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6233 (match_operand:V2SF 2 "nonimmediate_operand")
6235 (match_operand:V4SF 1 "nonimmediate_operand")
6236 (parallel [(const_int 2) (const_int 3)]))))]
6239 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6241 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6243 /* Fix up the destination if needed. */
6244 if (dst != operands[0])
6245 emit_move_insn (operands[0], dst);
6250 (define_insn "sse_loadlps"
6251 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6253 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6255 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6256 (parallel [(const_int 2) (const_int 3)]))))]
6259 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6260 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6261 movlps\t{%2, %0|%0, %q2}
6262 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6263 %vmovlps\t{%2, %0|%q0, %2}"
6264 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6265 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6266 (set_attr "ssememalign" "64")
6267 (set_attr "length_immediate" "1,1,*,*,*")
6268 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6269 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6271 (define_insn "sse_movss"
6272 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6274 (match_operand:V4SF 2 "register_operand" " x,x")
6275 (match_operand:V4SF 1 "register_operand" " 0,x")
6279 movss\t{%2, %0|%0, %2}
6280 vmovss\t{%2, %1, %0|%0, %1, %2}"
6281 [(set_attr "isa" "noavx,avx")
6282 (set_attr "type" "ssemov")
6283 (set_attr "prefix" "orig,vex")
6284 (set_attr "mode" "SF")])
6286 (define_insn "avx2_vec_dup<mode>"
6287 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6288 (vec_duplicate:VF1_128_256
6290 (match_operand:V4SF 1 "register_operand" "x")
6291 (parallel [(const_int 0)]))))]
6293 "vbroadcastss\t{%1, %0|%0, %1}"
6294 [(set_attr "type" "sselog1")
6295 (set_attr "prefix" "vex")
6296 (set_attr "mode" "<MODE>")])
6298 (define_insn "avx2_vec_dupv8sf_1"
6299 [(set (match_operand:V8SF 0 "register_operand" "=x")
6302 (match_operand:V8SF 1 "register_operand" "x")
6303 (parallel [(const_int 0)]))))]
6305 "vbroadcastss\t{%x1, %0|%0, %x1}"
6306 [(set_attr "type" "sselog1")
6307 (set_attr "prefix" "vex")
6308 (set_attr "mode" "V8SF")])
6310 (define_insn "avx512f_vec_dup<mode>_1"
6311 [(set (match_operand:VF_512 0 "register_operand" "=v")
6312 (vec_duplicate:VF_512
6313 (vec_select:<ssescalarmode>
6314 (match_operand:VF_512 1 "register_operand" "v")
6315 (parallel [(const_int 0)]))))]
6317 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6318 [(set_attr "type" "sselog1")
6319 (set_attr "prefix" "evex")
6320 (set_attr "mode" "<MODE>")])
6322 ;; Although insertps takes register source, we prefer
6323 ;; unpcklps with register source since it is shorter.
6324 (define_insn "*vec_concatv2sf_sse4_1"
6325 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6327 (match_operand:SF 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,m, 0 , m")
6328 (match_operand:SF 2 "vector_move_operand" " Yr,*x,x, m,m, m,C,*ym, C")))]
6331 unpcklps\t{%2, %0|%0, %2}
6332 unpcklps\t{%2, %0|%0, %2}
6333 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6334 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6335 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6336 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6337 %vmovss\t{%1, %0|%0, %1}
6338 punpckldq\t{%2, %0|%0, %2}
6339 movd\t{%1, %0|%0, %1}"
6340 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6341 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6342 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6343 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6344 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6345 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6346 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6348 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6349 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6350 ;; alternatives pretty much forces the MMX alternative to be chosen.
6351 (define_insn "*vec_concatv2sf_sse"
6352 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6354 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6355 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6358 unpcklps\t{%2, %0|%0, %2}
6359 movss\t{%1, %0|%0, %1}
6360 punpckldq\t{%2, %0|%0, %2}
6361 movd\t{%1, %0|%0, %1}"
6362 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6363 (set_attr "mode" "V4SF,SF,DI,DI")])
6365 (define_insn "*vec_concatv4sf"
6366 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6368 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6369 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6372 movlhps\t{%2, %0|%0, %2}
6373 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6374 movhps\t{%2, %0|%0, %q2}
6375 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6376 [(set_attr "isa" "noavx,avx,noavx,avx")
6377 (set_attr "type" "ssemov")
6378 (set_attr "prefix" "orig,vex,orig,vex")
6379 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6381 (define_expand "vec_init<mode>"
6382 [(match_operand:V_128 0 "register_operand")
6386 ix86_expand_vector_init (false, operands[0], operands[1]);
6390 ;; Avoid combining registers from different units in a single alternative,
6391 ;; see comment above inline_secondary_memory_needed function in i386.c
6392 (define_insn "vec_set<mode>_0"
6393 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6394 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6396 (vec_duplicate:VI4F_128
6397 (match_operand:<ssescalarmode> 2 "general_operand"
6398 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6399 (match_operand:VI4F_128 1 "vector_move_operand"
6400 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6404 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6405 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6406 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6407 %vmovd\t{%2, %0|%0, %2}
6408 movss\t{%2, %0|%0, %2}
6409 movss\t{%2, %0|%0, %2}
6410 vmovss\t{%2, %1, %0|%0, %1, %2}
6411 pinsrd\t{$0, %2, %0|%0, %2, 0}
6412 pinsrd\t{$0, %2, %0|%0, %2, 0}
6413 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6417 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6419 (cond [(eq_attr "alternative" "0,1,7,8,9")
6420 (const_string "sselog")
6421 (eq_attr "alternative" "11")
6422 (const_string "imov")
6423 (eq_attr "alternative" "12")
6424 (const_string "fmov")
6426 (const_string "ssemov")))
6427 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6428 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6429 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6430 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6432 ;; A subset is vec_setv4sf.
6433 (define_insn "*vec_setv4sf_sse4_1"
6434 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6437 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6438 (match_operand:V4SF 1 "register_operand" "0,0,x")
6439 (match_operand:SI 3 "const_int_operand")))]
6441 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6442 < GET_MODE_NUNITS (V4SFmode))"
6444 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6445 switch (which_alternative)
6449 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6451 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6456 [(set_attr "isa" "noavx,noavx,avx")
6457 (set_attr "type" "sselog")
6458 (set_attr "prefix_data16" "1,1,*")
6459 (set_attr "prefix_extra" "1")
6460 (set_attr "length_immediate" "1")
6461 (set_attr "prefix" "orig,orig,vex")
6462 (set_attr "mode" "V4SF")])
6464 (define_insn "sse4_1_insertps"
6465 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6466 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6467 (match_operand:V4SF 1 "register_operand" "0,0,x")
6468 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6472 if (MEM_P (operands[2]))
6474 unsigned count_s = INTVAL (operands[3]) >> 6;
6476 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6477 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6479 switch (which_alternative)
6483 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6485 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6490 [(set_attr "isa" "noavx,noavx,avx")
6491 (set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1,1,*")
6493 (set_attr "prefix_extra" "1")
6494 (set_attr "length_immediate" "1")
6495 (set_attr "prefix" "orig,orig,vex")
6496 (set_attr "mode" "V4SF")])
6499 [(set (match_operand:VI4F_128 0 "memory_operand")
6501 (vec_duplicate:VI4F_128
6502 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6505 "TARGET_SSE && reload_completed"
6506 [(set (match_dup 0) (match_dup 1))]
6507 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6509 (define_expand "vec_set<mode>"
6510 [(match_operand:V 0 "register_operand")
6511 (match_operand:<ssescalarmode> 1 "register_operand")
6512 (match_operand 2 "const_int_operand")]
6515 ix86_expand_vector_set (false, operands[0], operands[1],
6516 INTVAL (operands[2]));
6520 (define_insn_and_split "*vec_extractv4sf_0"
6521 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6523 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6524 (parallel [(const_int 0)])))]
6525 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6527 "&& reload_completed"
6528 [(set (match_dup 0) (match_dup 1))]
6530 if (REG_P (operands[1]))
6531 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6533 operands[1] = adjust_address (operands[1], SFmode, 0);
6536 (define_insn_and_split "*sse4_1_extractps"
6537 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6539 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6540 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6543 %vextractps\t{%2, %1, %0|%0, %1, %2}
6544 %vextractps\t{%2, %1, %0|%0, %1, %2}
6547 "&& reload_completed && SSE_REG_P (operands[0])"
6550 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6551 switch (INTVAL (operands[2]))
6555 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6556 operands[2], operands[2],
6557 GEN_INT (INTVAL (operands[2]) + 4),
6558 GEN_INT (INTVAL (operands[2]) + 4)));
6561 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6564 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6569 [(set_attr "isa" "*,*,noavx,avx")
6570 (set_attr "type" "sselog,sselog,*,*")
6571 (set_attr "prefix_data16" "1,1,*,*")
6572 (set_attr "prefix_extra" "1,1,*,*")
6573 (set_attr "length_immediate" "1,1,*,*")
6574 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6575 (set_attr "mode" "V4SF,V4SF,*,*")])
6577 (define_insn_and_split "*vec_extractv4sf_mem"
6578 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6580 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6581 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6584 "&& reload_completed"
6585 [(set (match_dup 0) (match_dup 1))]
6587 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6590 (define_mode_attr extract_type
6591 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6593 (define_mode_attr extract_suf
6594 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6596 (define_mode_iterator AVX512_VEC
6597 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6599 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6600 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6601 (match_operand:AVX512_VEC 1 "register_operand")
6602 (match_operand:SI 2 "const_0_to_3_operand")
6603 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6604 (match_operand:QI 4 "register_operand")]
6608 mask = INTVAL (operands[2]);
6610 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6611 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6613 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6614 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6615 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6616 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6619 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6620 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6625 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6626 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6627 (vec_merge:<ssequartermode>
6628 (vec_select:<ssequartermode>
6629 (match_operand:V8FI 1 "register_operand" "v")
6630 (parallel [(match_operand 2 "const_0_to_7_operand")
6631 (match_operand 3 "const_0_to_7_operand")]))
6632 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6633 (match_operand:QI 5 "register_operand" "k")))]
6635 && (INTVAL (operands[2]) % 2 == 0)
6636 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6637 && rtx_equal_p (operands[4], operands[0])"
6639 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6640 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6642 [(set_attr "type" "sselog")
6643 (set_attr "prefix_extra" "1")
6644 (set_attr "length_immediate" "1")
6645 (set_attr "memory" "store")
6646 (set_attr "prefix" "evex")
6647 (set_attr "mode" "<sseinsnmode>")])
6649 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6650 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6651 (vec_merge:<ssequartermode>
6652 (vec_select:<ssequartermode>
6653 (match_operand:V16FI 1 "register_operand" "v")
6654 (parallel [(match_operand 2 "const_0_to_15_operand")
6655 (match_operand 3 "const_0_to_15_operand")
6656 (match_operand 4 "const_0_to_15_operand")
6657 (match_operand 5 "const_0_to_15_operand")]))
6658 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6659 (match_operand:QI 7 "register_operand" "Yk")))]
6661 && ((INTVAL (operands[2]) % 4 == 0)
6662 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6663 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6664 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6665 && rtx_equal_p (operands[6], operands[0])"
6667 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6668 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "length_immediate" "1")
6673 (set_attr "memory" "store")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "<sseinsnmode>")])
6677 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6678 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6679 (vec_select:<ssequartermode>
6680 (match_operand:V8FI 1 "register_operand" "v")
6681 (parallel [(match_operand 2 "const_0_to_7_operand")
6682 (match_operand 3 "const_0_to_7_operand")])))]
6683 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6685 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6686 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6688 [(set_attr "type" "sselog1")
6689 (set_attr "prefix_extra" "1")
6690 (set_attr "length_immediate" "1")
6691 (set_attr "prefix" "evex")
6692 (set_attr "mode" "<sseinsnmode>")])
6694 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6695 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6696 (vec_select:<ssequartermode>
6697 (match_operand:V16FI 1 "register_operand" "v")
6698 (parallel [(match_operand 2 "const_0_to_15_operand")
6699 (match_operand 3 "const_0_to_15_operand")
6700 (match_operand 4 "const_0_to_15_operand")
6701 (match_operand 5 "const_0_to_15_operand")])))]
6703 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6704 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6705 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6707 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6708 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6710 [(set_attr "type" "sselog1")
6711 (set_attr "prefix_extra" "1")
6712 (set_attr "length_immediate" "1")
6713 (set_attr "prefix" "evex")
6714 (set_attr "mode" "<sseinsnmode>")])
6716 (define_mode_attr extract_type_2
6717 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6719 (define_mode_attr extract_suf_2
6720 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6722 (define_mode_iterator AVX512_VEC_2
6723 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6725 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6726 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6727 (match_operand:AVX512_VEC_2 1 "register_operand")
6728 (match_operand:SI 2 "const_0_to_1_operand")
6729 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6730 (match_operand:QI 4 "register_operand")]
6733 rtx (*insn)(rtx, rtx, rtx, rtx);
6735 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6736 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6738 switch (INTVAL (operands[2]))
6741 insn = gen_vec_extract_lo_<mode>_mask;
6744 insn = gen_vec_extract_hi_<mode>_mask;
6750 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6755 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6756 (vec_select:<ssehalfvecmode>
6757 (match_operand:V8FI 1 "nonimmediate_operand")
6758 (parallel [(const_int 0) (const_int 1)
6759 (const_int 2) (const_int 3)])))]
6760 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6761 && reload_completed"
6764 rtx op1 = operands[1];
6766 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6768 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6769 emit_move_insn (operands[0], op1);
6773 (define_insn "vec_extract_lo_<mode>_maskm"
6774 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6775 (vec_merge:<ssehalfvecmode>
6776 (vec_select:<ssehalfvecmode>
6777 (match_operand:V8FI 1 "register_operand" "v")
6778 (parallel [(const_int 0) (const_int 1)
6779 (const_int 2) (const_int 3)]))
6780 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6781 (match_operand:QI 3 "register_operand" "Yk")))]
6783 && rtx_equal_p (operands[2], operands[0])"
6784 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6785 [(set_attr "type" "sselog1")
6786 (set_attr "prefix_extra" "1")
6787 (set_attr "length_immediate" "1")
6788 (set_attr "prefix" "evex")
6789 (set_attr "mode" "<sseinsnmode>")])
6791 (define_insn "vec_extract_lo_<mode><mask_name>"
6792 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6793 (vec_select:<ssehalfvecmode>
6794 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6795 (parallel [(const_int 0) (const_int 1)
6796 (const_int 2) (const_int 3)])))]
6797 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6800 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6804 [(set_attr "type" "sselog1")
6805 (set_attr "prefix_extra" "1")
6806 (set_attr "length_immediate" "1")
6807 (set_attr "prefix" "evex")
6808 (set_attr "mode" "<sseinsnmode>")])
6810 (define_insn "vec_extract_hi_<mode>_maskm"
6811 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6812 (vec_merge:<ssehalfvecmode>
6813 (vec_select:<ssehalfvecmode>
6814 (match_operand:V8FI 1 "register_operand" "v")
6815 (parallel [(const_int 4) (const_int 5)
6816 (const_int 6) (const_int 7)]))
6817 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6818 (match_operand:QI 3 "register_operand" "Yk")))]
6820 && rtx_equal_p (operands[2], operands[0])"
6821 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6822 [(set_attr "type" "sselog")
6823 (set_attr "prefix_extra" "1")
6824 (set_attr "length_immediate" "1")
6825 (set_attr "memory" "store")
6826 (set_attr "prefix" "evex")
6827 (set_attr "mode" "<sseinsnmode>")])
6829 (define_insn "vec_extract_hi_<mode><mask_name>"
6830 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6831 (vec_select:<ssehalfvecmode>
6832 (match_operand:V8FI 1 "register_operand" "v")
6833 (parallel [(const_int 4) (const_int 5)
6834 (const_int 6) (const_int 7)])))]
6836 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6837 [(set_attr "type" "sselog1")
6838 (set_attr "prefix_extra" "1")
6839 (set_attr "length_immediate" "1")
6840 (set_attr "prefix" "evex")
6841 (set_attr "mode" "<sseinsnmode>")])
6843 (define_insn "vec_extract_hi_<mode>_maskm"
6844 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6845 (vec_merge:<ssehalfvecmode>
6846 (vec_select:<ssehalfvecmode>
6847 (match_operand:V16FI 1 "register_operand" "v")
6848 (parallel [(const_int 8) (const_int 9)
6849 (const_int 10) (const_int 11)
6850 (const_int 12) (const_int 13)
6851 (const_int 14) (const_int 15)]))
6852 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6853 (match_operand:QI 3 "register_operand" "k")))]
6855 && rtx_equal_p (operands[2], operands[0])"
6856 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6857 [(set_attr "type" "sselog1")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "length_immediate" "1")
6860 (set_attr "prefix" "evex")
6861 (set_attr "mode" "<sseinsnmode>")])
6863 (define_insn "vec_extract_hi_<mode><mask_name>"
6864 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6865 (vec_select:<ssehalfvecmode>
6866 (match_operand:V16FI 1 "register_operand" "v,v")
6867 (parallel [(const_int 8) (const_int 9)
6868 (const_int 10) (const_int 11)
6869 (const_int 12) (const_int 13)
6870 (const_int 14) (const_int 15)])))]
6871 "TARGET_AVX512F && <mask_avx512dq_condition>"
6873 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6874 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6875 [(set_attr "type" "sselog1")
6876 (set_attr "prefix_extra" "1")
6877 (set_attr "isa" "avx512dq,noavx512dq")
6878 (set_attr "length_immediate" "1")
6879 (set_attr "prefix" "evex")
6880 (set_attr "mode" "<sseinsnmode>")])
6882 (define_expand "avx512vl_vextractf128<mode>"
6883 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6884 (match_operand:VI48F_256 1 "register_operand")
6885 (match_operand:SI 2 "const_0_to_1_operand")
6886 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6887 (match_operand:QI 4 "register_operand")]
6888 "TARGET_AVX512DQ && TARGET_AVX512VL"
6890 rtx (*insn)(rtx, rtx, rtx, rtx);
6892 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6893 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6895 switch (INTVAL (operands[2]))
6898 insn = gen_vec_extract_lo_<mode>_mask;
6901 insn = gen_vec_extract_hi_<mode>_mask;
6907 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6911 (define_expand "avx_vextractf128<mode>"
6912 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6913 (match_operand:V_256 1 "register_operand")
6914 (match_operand:SI 2 "const_0_to_1_operand")]
6917 rtx (*insn)(rtx, rtx);
6919 switch (INTVAL (operands[2]))
6922 insn = gen_vec_extract_lo_<mode>;
6925 insn = gen_vec_extract_hi_<mode>;
6931 emit_insn (insn (operands[0], operands[1]));
6935 (define_insn "vec_extract_lo_<mode><mask_name>"
6936 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6937 (vec_select:<ssehalfvecmode>
6938 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6939 (parallel [(const_int 0) (const_int 1)
6940 (const_int 2) (const_int 3)
6941 (const_int 4) (const_int 5)
6942 (const_int 6) (const_int 7)])))]
6944 && <mask_mode512bit_condition>
6945 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6948 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6954 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6955 (vec_select:<ssehalfvecmode>
6956 (match_operand:V16FI 1 "nonimmediate_operand")
6957 (parallel [(const_int 0) (const_int 1)
6958 (const_int 2) (const_int 3)
6959 (const_int 4) (const_int 5)
6960 (const_int 6) (const_int 7)])))]
6961 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6962 && reload_completed"
6965 rtx op1 = operands[1];
6967 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6969 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6970 emit_move_insn (operands[0], op1);
6974 (define_insn "vec_extract_lo_<mode><mask_name>"
6975 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6976 (vec_select:<ssehalfvecmode>
6977 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6978 (parallel [(const_int 0) (const_int 1)])))]
6980 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
6981 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6984 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6988 [(set_attr "type" "sselog")
6989 (set_attr "prefix_extra" "1")
6990 (set_attr "length_immediate" "1")
6991 (set_attr "memory" "none,store")
6992 (set_attr "prefix" "evex")
6993 (set_attr "mode" "XI")])
6996 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6997 (vec_select:<ssehalfvecmode>
6998 (match_operand:VI8F_256 1 "nonimmediate_operand")
6999 (parallel [(const_int 0) (const_int 1)])))]
7000 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7001 && reload_completed"
7004 rtx op1 = operands[1];
7006 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7008 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7009 emit_move_insn (operands[0], op1);
7013 (define_insn "vec_extract_hi_<mode><mask_name>"
7014 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7015 (vec_select:<ssehalfvecmode>
7016 (match_operand:VI8F_256 1 "register_operand" "v,v")
7017 (parallel [(const_int 2) (const_int 3)])))]
7018 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7020 if (TARGET_AVX512VL)
7022 if (TARGET_AVX512DQ)
7023 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7025 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7028 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7030 [(set_attr "type" "sselog")
7031 (set_attr "prefix_extra" "1")
7032 (set_attr "length_immediate" "1")
7033 (set_attr "memory" "none,store")
7034 (set_attr "prefix" "vex")
7035 (set_attr "mode" "<sseinsnmode>")])
7038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7039 (vec_select:<ssehalfvecmode>
7040 (match_operand:VI4F_256 1 "nonimmediate_operand")
7041 (parallel [(const_int 0) (const_int 1)
7042 (const_int 2) (const_int 3)])))]
7043 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7046 rtx op1 = operands[1];
7048 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7050 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7051 emit_move_insn (operands[0], op1);
7056 (define_insn "vec_extract_lo_<mode><mask_name>"
7057 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7058 (vec_select:<ssehalfvecmode>
7059 (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
7060 (parallel [(const_int 0) (const_int 1)
7061 (const_int 2) (const_int 3)])))]
7062 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7065 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7069 [(set_attr "type" "sselog1")
7070 (set_attr "prefix_extra" "1")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "evex")
7073 (set_attr "mode" "<sseinsnmode>")])
7075 (define_insn "vec_extract_lo_<mode>_maskm"
7076 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7077 (vec_merge:<ssehalfvecmode>
7078 (vec_select:<ssehalfvecmode>
7079 (match_operand:VI4F_256 1 "register_operand" "v")
7080 (parallel [(const_int 0) (const_int 1)
7081 (const_int 2) (const_int 3)]))
7082 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7083 (match_operand:QI 3 "register_operand" "k")))]
7084 "TARGET_AVX512VL && TARGET_AVX512F
7085 && rtx_equal_p (operands[2], operands[0])"
7086 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7087 [(set_attr "type" "sselog1")
7088 (set_attr "prefix_extra" "1")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "prefix" "evex")
7091 (set_attr "mode" "<sseinsnmode>")])
7093 (define_insn "vec_extract_hi_<mode>_maskm"
7094 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7095 (vec_merge:<ssehalfvecmode>
7096 (vec_select:<ssehalfvecmode>
7097 (match_operand:VI4F_256 1 "register_operand" "v")
7098 (parallel [(const_int 4) (const_int 5)
7099 (const_int 6) (const_int 7)]))
7100 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7101 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7102 "TARGET_AVX512F && TARGET_AVX512VL
7103 && rtx_equal_p (operands[2], operands[0])"
7104 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7105 [(set_attr "type" "sselog1")
7106 (set_attr "prefix_extra" "1")
7107 (set_attr "length_immediate" "1")
7108 (set_attr "prefix" "evex")
7109 (set_attr "mode" "<sseinsnmode>")])
7111 (define_insn "vec_extract_hi_<mode><mask_name>"
7112 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7113 (vec_select:<ssehalfvecmode>
7114 (match_operand:VI4F_256 1 "register_operand" "v")
7115 (parallel [(const_int 4) (const_int 5)
7116 (const_int 6) (const_int 7)])))]
7117 "TARGET_AVX && <mask_avx512vl_condition>"
7119 if (TARGET_AVX512VL)
7120 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7122 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7124 [(set_attr "type" "sselog1")
7125 (set_attr "prefix_extra" "1")
7126 (set_attr "length_immediate" "1")
7127 (set (attr "prefix")
7129 (match_test "TARGET_AVX512VL")
7130 (const_string "evex")
7131 (const_string "vex")))
7132 (set_attr "mode" "<sseinsnmode>")])
7134 (define_insn_and_split "vec_extract_lo_v32hi"
7135 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7137 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7138 (parallel [(const_int 0) (const_int 1)
7139 (const_int 2) (const_int 3)
7140 (const_int 4) (const_int 5)
7141 (const_int 6) (const_int 7)
7142 (const_int 8) (const_int 9)
7143 (const_int 10) (const_int 11)
7144 (const_int 12) (const_int 13)
7145 (const_int 14) (const_int 15)])))]
7146 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7148 "&& reload_completed"
7149 [(set (match_dup 0) (match_dup 1))]
7151 if (REG_P (operands[1]))
7152 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7154 operands[1] = adjust_address (operands[1], V16HImode, 0);
7157 (define_insn "vec_extract_hi_v32hi"
7158 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7160 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
7161 (parallel [(const_int 16) (const_int 17)
7162 (const_int 18) (const_int 19)
7163 (const_int 20) (const_int 21)
7164 (const_int 22) (const_int 23)
7165 (const_int 24) (const_int 25)
7166 (const_int 26) (const_int 27)
7167 (const_int 28) (const_int 29)
7168 (const_int 30) (const_int 31)])))]
7170 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7171 [(set_attr "type" "sselog")
7172 (set_attr "prefix_extra" "1")
7173 (set_attr "length_immediate" "1")
7174 (set_attr "memory" "none,store")
7175 (set_attr "prefix" "evex")
7176 (set_attr "mode" "XI")])
7178 (define_insn_and_split "vec_extract_lo_v16hi"
7179 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7181 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7182 (parallel [(const_int 0) (const_int 1)
7183 (const_int 2) (const_int 3)
7184 (const_int 4) (const_int 5)
7185 (const_int 6) (const_int 7)])))]
7186 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7188 "&& reload_completed"
7189 [(set (match_dup 0) (match_dup 1))]
7191 if (REG_P (operands[1]))
7192 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7194 operands[1] = adjust_address (operands[1], V8HImode, 0);
7197 (define_insn "vec_extract_hi_v16hi"
7198 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7200 (match_operand:V16HI 1 "register_operand" "x,x")
7201 (parallel [(const_int 8) (const_int 9)
7202 (const_int 10) (const_int 11)
7203 (const_int 12) (const_int 13)
7204 (const_int 14) (const_int 15)])))]
7206 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7207 [(set_attr "type" "sselog")
7208 (set_attr "prefix_extra" "1")
7209 (set_attr "length_immediate" "1")
7210 (set_attr "memory" "none,store")
7211 (set_attr "prefix" "vex")
7212 (set_attr "mode" "OI")])
7214 (define_insn_and_split "vec_extract_lo_v64qi"
7215 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7217 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7218 (parallel [(const_int 0) (const_int 1)
7219 (const_int 2) (const_int 3)
7220 (const_int 4) (const_int 5)
7221 (const_int 6) (const_int 7)
7222 (const_int 8) (const_int 9)
7223 (const_int 10) (const_int 11)
7224 (const_int 12) (const_int 13)
7225 (const_int 14) (const_int 15)
7226 (const_int 16) (const_int 17)
7227 (const_int 18) (const_int 19)
7228 (const_int 20) (const_int 21)
7229 (const_int 22) (const_int 23)
7230 (const_int 24) (const_int 25)
7231 (const_int 26) (const_int 27)
7232 (const_int 28) (const_int 29)
7233 (const_int 30) (const_int 31)])))]
7234 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7236 "&& reload_completed"
7237 [(set (match_dup 0) (match_dup 1))]
7239 if (REG_P (operands[1]))
7240 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7242 operands[1] = adjust_address (operands[1], V32QImode, 0);
7245 (define_insn "vec_extract_hi_v64qi"
7246 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7248 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
7249 (parallel [(const_int 32) (const_int 33)
7250 (const_int 34) (const_int 35)
7251 (const_int 36) (const_int 37)
7252 (const_int 38) (const_int 39)
7253 (const_int 40) (const_int 41)
7254 (const_int 42) (const_int 43)
7255 (const_int 44) (const_int 45)
7256 (const_int 46) (const_int 47)
7257 (const_int 48) (const_int 49)
7258 (const_int 50) (const_int 51)
7259 (const_int 52) (const_int 53)
7260 (const_int 54) (const_int 55)
7261 (const_int 56) (const_int 57)
7262 (const_int 58) (const_int 59)
7263 (const_int 60) (const_int 61)
7264 (const_int 62) (const_int 63)])))]
7266 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7267 [(set_attr "type" "sselog")
7268 (set_attr "prefix_extra" "1")
7269 (set_attr "length_immediate" "1")
7270 (set_attr "memory" "none,store")
7271 (set_attr "prefix" "evex")
7272 (set_attr "mode" "XI")])
7274 (define_insn_and_split "vec_extract_lo_v32qi"
7275 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7277 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7278 (parallel [(const_int 0) (const_int 1)
7279 (const_int 2) (const_int 3)
7280 (const_int 4) (const_int 5)
7281 (const_int 6) (const_int 7)
7282 (const_int 8) (const_int 9)
7283 (const_int 10) (const_int 11)
7284 (const_int 12) (const_int 13)
7285 (const_int 14) (const_int 15)])))]
7286 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7288 "&& reload_completed"
7289 [(set (match_dup 0) (match_dup 1))]
7291 if (REG_P (operands[1]))
7292 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7294 operands[1] = adjust_address (operands[1], V16QImode, 0);
7297 (define_insn "vec_extract_hi_v32qi"
7298 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7300 (match_operand:V32QI 1 "register_operand" "x,x")
7301 (parallel [(const_int 16) (const_int 17)
7302 (const_int 18) (const_int 19)
7303 (const_int 20) (const_int 21)
7304 (const_int 22) (const_int 23)
7305 (const_int 24) (const_int 25)
7306 (const_int 26) (const_int 27)
7307 (const_int 28) (const_int 29)
7308 (const_int 30) (const_int 31)])))]
7310 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7311 [(set_attr "type" "sselog")
7312 (set_attr "prefix_extra" "1")
7313 (set_attr "length_immediate" "1")
7314 (set_attr "memory" "none,store")
7315 (set_attr "prefix" "vex")
7316 (set_attr "mode" "OI")])
7318 ;; Modes handled by vec_extract patterns.
7319 (define_mode_iterator VEC_EXTRACT_MODE
7320 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7321 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7322 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7323 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7324 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7325 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7327 (define_expand "vec_extract<mode>"
7328 [(match_operand:<ssescalarmode> 0 "register_operand")
7329 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7330 (match_operand 2 "const_int_operand")]
7333 ix86_expand_vector_extract (false, operands[0], operands[1],
7334 INTVAL (operands[2]));
7338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7340 ;; Parallel double-precision floating point element swizzling
7342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7344 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7345 [(set (match_operand:V8DF 0 "register_operand" "=v")
7348 (match_operand:V8DF 1 "nonimmediate_operand" "v")
7349 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7350 (parallel [(const_int 1) (const_int 9)
7351 (const_int 3) (const_int 11)
7352 (const_int 5) (const_int 13)
7353 (const_int 7) (const_int 15)])))]
7355 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7356 [(set_attr "type" "sselog")
7357 (set_attr "prefix" "evex")
7358 (set_attr "mode" "V8DF")])
7360 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7361 (define_insn "avx_unpckhpd256<mask_name>"
7362 [(set (match_operand:V4DF 0 "register_operand" "=v")
7365 (match_operand:V4DF 1 "register_operand" "v")
7366 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7367 (parallel [(const_int 1) (const_int 5)
7368 (const_int 3) (const_int 7)])))]
7369 "TARGET_AVX && <mask_avx512vl_condition>"
7370 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7371 [(set_attr "type" "sselog")
7372 (set_attr "prefix" "vex")
7373 (set_attr "mode" "V4DF")])
7375 (define_expand "vec_interleave_highv4df"
7379 (match_operand:V4DF 1 "register_operand" "x")
7380 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7381 (parallel [(const_int 0) (const_int 4)
7382 (const_int 2) (const_int 6)])))
7388 (parallel [(const_int 1) (const_int 5)
7389 (const_int 3) (const_int 7)])))
7390 (set (match_operand:V4DF 0 "register_operand")
7395 (parallel [(const_int 2) (const_int 3)
7396 (const_int 6) (const_int 7)])))]
7399 operands[3] = gen_reg_rtx (V4DFmode);
7400 operands[4] = gen_reg_rtx (V4DFmode);
7404 (define_insn "avx512vl_unpckhpd128_mask"
7405 [(set (match_operand:V2DF 0 "register_operand" "=v")
7409 (match_operand:V2DF 1 "register_operand" "v")
7410 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7411 (parallel [(const_int 1) (const_int 3)]))
7412 (match_operand:V2DF 3 "vector_move_operand" "0C")
7413 (match_operand:QI 4 "register_operand" "Yk")))]
7415 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7416 [(set_attr "type" "sselog")
7417 (set_attr "prefix" "evex")
7418 (set_attr "mode" "V2DF")])
7420 (define_expand "vec_interleave_highv2df"
7421 [(set (match_operand:V2DF 0 "register_operand")
7424 (match_operand:V2DF 1 "nonimmediate_operand")
7425 (match_operand:V2DF 2 "nonimmediate_operand"))
7426 (parallel [(const_int 1)
7430 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7431 operands[2] = force_reg (V2DFmode, operands[2]);
7434 (define_insn "*vec_interleave_highv2df"
7435 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7438 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7439 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7440 (parallel [(const_int 1)
7442 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7444 unpckhpd\t{%2, %0|%0, %2}
7445 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7446 %vmovddup\t{%H1, %0|%0, %H1}
7447 movlpd\t{%H1, %0|%0, %H1}
7448 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7449 %vmovhpd\t{%1, %0|%q0, %1}"
7450 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7451 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7452 (set_attr "ssememalign" "64")
7453 (set_attr "prefix_data16" "*,*,*,1,*,1")
7454 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7455 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7457 (define_expand "avx512f_movddup512<mask_name>"
7458 [(set (match_operand:V8DF 0 "register_operand")
7461 (match_operand:V8DF 1 "nonimmediate_operand")
7463 (parallel [(const_int 0) (const_int 8)
7464 (const_int 2) (const_int 10)
7465 (const_int 4) (const_int 12)
7466 (const_int 6) (const_int 14)])))]
7469 (define_expand "avx512f_unpcklpd512<mask_name>"
7470 [(set (match_operand:V8DF 0 "register_operand")
7473 (match_operand:V8DF 1 "register_operand")
7474 (match_operand:V8DF 2 "nonimmediate_operand"))
7475 (parallel [(const_int 0) (const_int 8)
7476 (const_int 2) (const_int 10)
7477 (const_int 4) (const_int 12)
7478 (const_int 6) (const_int 14)])))]
7481 (define_insn "*avx512f_unpcklpd512<mask_name>"
7482 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7485 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7486 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7487 (parallel [(const_int 0) (const_int 8)
7488 (const_int 2) (const_int 10)
7489 (const_int 4) (const_int 12)
7490 (const_int 6) (const_int 14)])))]
7493 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7494 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7495 [(set_attr "type" "sselog")
7496 (set_attr "prefix" "evex")
7497 (set_attr "mode" "V8DF")])
7499 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7500 (define_expand "avx_movddup256<mask_name>"
7501 [(set (match_operand:V4DF 0 "register_operand")
7504 (match_operand:V4DF 1 "nonimmediate_operand")
7506 (parallel [(const_int 0) (const_int 4)
7507 (const_int 2) (const_int 6)])))]
7508 "TARGET_AVX && <mask_avx512vl_condition>")
7510 (define_expand "avx_unpcklpd256<mask_name>"
7511 [(set (match_operand:V4DF 0 "register_operand")
7514 (match_operand:V4DF 1 "register_operand")
7515 (match_operand:V4DF 2 "nonimmediate_operand"))
7516 (parallel [(const_int 0) (const_int 4)
7517 (const_int 2) (const_int 6)])))]
7518 "TARGET_AVX && <mask_avx512vl_condition>")
7520 (define_insn "*avx_unpcklpd256<mask_name>"
7521 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7524 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7525 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7526 (parallel [(const_int 0) (const_int 4)
7527 (const_int 2) (const_int 6)])))]
7528 "TARGET_AVX && <mask_avx512vl_condition>"
7530 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7531 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7532 [(set_attr "type" "sselog")
7533 (set_attr "prefix" "vex")
7534 (set_attr "mode" "V4DF")])
7536 (define_expand "vec_interleave_lowv4df"
7540 (match_operand:V4DF 1 "register_operand" "x")
7541 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7542 (parallel [(const_int 0) (const_int 4)
7543 (const_int 2) (const_int 6)])))
7549 (parallel [(const_int 1) (const_int 5)
7550 (const_int 3) (const_int 7)])))
7551 (set (match_operand:V4DF 0 "register_operand")
7556 (parallel [(const_int 0) (const_int 1)
7557 (const_int 4) (const_int 5)])))]
7560 operands[3] = gen_reg_rtx (V4DFmode);
7561 operands[4] = gen_reg_rtx (V4DFmode);
7564 (define_insn "avx512vl_unpcklpd128_mask"
7565 [(set (match_operand:V2DF 0 "register_operand" "=v")
7569 (match_operand:V2DF 1 "register_operand" "v")
7570 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7571 (parallel [(const_int 0) (const_int 2)]))
7572 (match_operand:V2DF 3 "vector_move_operand" "0C")
7573 (match_operand:QI 4 "register_operand" "Yk")))]
7575 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7576 [(set_attr "type" "sselog")
7577 (set_attr "prefix" "evex")
7578 (set_attr "mode" "V2DF")])
7580 (define_expand "vec_interleave_lowv2df"
7581 [(set (match_operand:V2DF 0 "register_operand")
7584 (match_operand:V2DF 1 "nonimmediate_operand")
7585 (match_operand:V2DF 2 "nonimmediate_operand"))
7586 (parallel [(const_int 0)
7590 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7591 operands[1] = force_reg (V2DFmode, operands[1]);
7594 (define_insn "*vec_interleave_lowv2df"
7595 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7598 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7599 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7600 (parallel [(const_int 0)
7602 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7604 unpcklpd\t{%2, %0|%0, %2}
7605 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7606 %vmovddup\t{%1, %0|%0, %q1}
7607 movhpd\t{%2, %0|%0, %q2}
7608 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7609 %vmovlpd\t{%2, %H0|%H0, %2}"
7610 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7611 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7612 (set_attr "ssememalign" "64")
7613 (set_attr "prefix_data16" "*,*,*,1,*,1")
7614 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7615 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7618 [(set (match_operand:V2DF 0 "memory_operand")
7621 (match_operand:V2DF 1 "register_operand")
7623 (parallel [(const_int 0)
7625 "TARGET_SSE3 && reload_completed"
7628 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7629 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7630 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7635 [(set (match_operand:V2DF 0 "register_operand")
7638 (match_operand:V2DF 1 "memory_operand")
7640 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7641 (match_operand:SI 3 "const_int_operand")])))]
7642 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7643 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7645 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7648 (define_insn "avx512f_vmscalef<mode><round_name>"
7649 [(set (match_operand:VF_128 0 "register_operand" "=v")
7652 [(match_operand:VF_128 1 "register_operand" "v")
7653 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7658 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7659 [(set_attr "prefix" "evex")
7660 (set_attr "mode" "<ssescalarmode>")])
7662 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7663 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7665 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7666 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7669 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7670 [(set_attr "prefix" "evex")
7671 (set_attr "mode" "<MODE>")])
7673 (define_expand "<avx512>_vternlog<mode>_maskz"
7674 [(match_operand:VI48_AVX512VL 0 "register_operand")
7675 (match_operand:VI48_AVX512VL 1 "register_operand")
7676 (match_operand:VI48_AVX512VL 2 "register_operand")
7677 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7678 (match_operand:SI 4 "const_0_to_255_operand")
7679 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7682 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7683 operands[0], operands[1], operands[2], operands[3],
7684 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7688 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7689 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7690 (unspec:VI48_AVX512VL
7691 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7692 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7693 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7694 (match_operand:SI 4 "const_0_to_255_operand")]
7697 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7698 [(set_attr "type" "sselog")
7699 (set_attr "prefix" "evex")
7700 (set_attr "mode" "<sseinsnmode>")])
7702 (define_insn "<avx512>_vternlog<mode>_mask"
7703 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7704 (vec_merge:VI48_AVX512VL
7705 (unspec:VI48_AVX512VL
7706 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7707 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7708 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7709 (match_operand:SI 4 "const_0_to_255_operand")]
7712 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7714 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7715 [(set_attr "type" "sselog")
7716 (set_attr "prefix" "evex")
7717 (set_attr "mode" "<sseinsnmode>")])
7719 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7720 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7721 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7724 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7725 [(set_attr "prefix" "evex")
7726 (set_attr "mode" "<MODE>")])
7728 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7729 [(set (match_operand:VF_128 0 "register_operand" "=v")
7732 [(match_operand:VF_128 1 "register_operand" "v")
7733 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7738 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7739 [(set_attr "prefix" "evex")
7740 (set_attr "mode" "<ssescalarmode>")])
7742 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7743 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7744 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7745 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7746 (match_operand:SI 3 "const_0_to_255_operand")]
7749 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7750 [(set_attr "prefix" "evex")
7751 (set_attr "mode" "<sseinsnmode>")])
7753 (define_expand "avx512f_shufps512_mask"
7754 [(match_operand:V16SF 0 "register_operand")
7755 (match_operand:V16SF 1 "register_operand")
7756 (match_operand:V16SF 2 "nonimmediate_operand")
7757 (match_operand:SI 3 "const_0_to_255_operand")
7758 (match_operand:V16SF 4 "register_operand")
7759 (match_operand:HI 5 "register_operand")]
7762 int mask = INTVAL (operands[3]);
7763 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7764 GEN_INT ((mask >> 0) & 3),
7765 GEN_INT ((mask >> 2) & 3),
7766 GEN_INT (((mask >> 4) & 3) + 16),
7767 GEN_INT (((mask >> 6) & 3) + 16),
7768 GEN_INT (((mask >> 0) & 3) + 4),
7769 GEN_INT (((mask >> 2) & 3) + 4),
7770 GEN_INT (((mask >> 4) & 3) + 20),
7771 GEN_INT (((mask >> 6) & 3) + 20),
7772 GEN_INT (((mask >> 0) & 3) + 8),
7773 GEN_INT (((mask >> 2) & 3) + 8),
7774 GEN_INT (((mask >> 4) & 3) + 24),
7775 GEN_INT (((mask >> 6) & 3) + 24),
7776 GEN_INT (((mask >> 0) & 3) + 12),
7777 GEN_INT (((mask >> 2) & 3) + 12),
7778 GEN_INT (((mask >> 4) & 3) + 28),
7779 GEN_INT (((mask >> 6) & 3) + 28),
7780 operands[4], operands[5]));
7785 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7786 [(match_operand:VF_AVX512VL 0 "register_operand")
7787 (match_operand:VF_AVX512VL 1 "register_operand")
7788 (match_operand:VF_AVX512VL 2 "register_operand")
7789 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7790 (match_operand:SI 4 "const_0_to_255_operand")
7791 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7794 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7795 operands[0], operands[1], operands[2], operands[3],
7796 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7797 <round_saeonly_expand_operand6>));
7801 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7802 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7804 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7805 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7806 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7807 (match_operand:SI 4 "const_0_to_255_operand")]
7810 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7811 [(set_attr "prefix" "evex")
7812 (set_attr "mode" "<MODE>")])
7814 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7815 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7816 (vec_merge:VF_AVX512VL
7818 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7819 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7820 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7821 (match_operand:SI 4 "const_0_to_255_operand")]
7824 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7826 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7827 [(set_attr "prefix" "evex")
7828 (set_attr "mode" "<MODE>")])
7830 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7831 [(match_operand:VF_128 0 "register_operand")
7832 (match_operand:VF_128 1 "register_operand")
7833 (match_operand:VF_128 2 "register_operand")
7834 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7835 (match_operand:SI 4 "const_0_to_255_operand")
7836 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7839 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7840 operands[0], operands[1], operands[2], operands[3],
7841 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7842 <round_saeonly_expand_operand6>));
7846 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7847 [(set (match_operand:VF_128 0 "register_operand" "=v")
7850 [(match_operand:VF_128 1 "register_operand" "0")
7851 (match_operand:VF_128 2 "register_operand" "v")
7852 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7853 (match_operand:SI 4 "const_0_to_255_operand")]
7858 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7859 [(set_attr "prefix" "evex")
7860 (set_attr "mode" "<ssescalarmode>")])
7862 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7863 [(set (match_operand:VF_128 0 "register_operand" "=v")
7867 [(match_operand:VF_128 1 "register_operand" "0")
7868 (match_operand:VF_128 2 "register_operand" "v")
7869 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7870 (match_operand:SI 4 "const_0_to_255_operand")]
7875 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7877 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7878 [(set_attr "prefix" "evex")
7879 (set_attr "mode" "<ssescalarmode>")])
7881 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7882 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7884 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7885 (match_operand:SI 2 "const_0_to_255_operand")]
7888 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7889 [(set_attr "length_immediate" "1")
7890 (set_attr "prefix" "evex")
7891 (set_attr "mode" "<MODE>")])
7893 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7894 [(set (match_operand:VF_128 0 "register_operand" "=v")
7897 [(match_operand:VF_128 1 "register_operand" "v")
7898 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7899 (match_operand:SI 3 "const_0_to_255_operand")]
7904 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7905 [(set_attr "length_immediate" "1")
7906 (set_attr "prefix" "evex")
7907 (set_attr "mode" "<MODE>")])
7909 ;; One bit in mask selects 2 elements.
7910 (define_insn "avx512f_shufps512_1<mask_name>"
7911 [(set (match_operand:V16SF 0 "register_operand" "=v")
7914 (match_operand:V16SF 1 "register_operand" "v")
7915 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7916 (parallel [(match_operand 3 "const_0_to_3_operand")
7917 (match_operand 4 "const_0_to_3_operand")
7918 (match_operand 5 "const_16_to_19_operand")
7919 (match_operand 6 "const_16_to_19_operand")
7920 (match_operand 7 "const_4_to_7_operand")
7921 (match_operand 8 "const_4_to_7_operand")
7922 (match_operand 9 "const_20_to_23_operand")
7923 (match_operand 10 "const_20_to_23_operand")
7924 (match_operand 11 "const_8_to_11_operand")
7925 (match_operand 12 "const_8_to_11_operand")
7926 (match_operand 13 "const_24_to_27_operand")
7927 (match_operand 14 "const_24_to_27_operand")
7928 (match_operand 15 "const_12_to_15_operand")
7929 (match_operand 16 "const_12_to_15_operand")
7930 (match_operand 17 "const_28_to_31_operand")
7931 (match_operand 18 "const_28_to_31_operand")])))]
7933 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7934 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7935 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7936 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7937 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7938 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7939 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7940 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7941 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7942 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7943 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7944 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7947 mask = INTVAL (operands[3]);
7948 mask |= INTVAL (operands[4]) << 2;
7949 mask |= (INTVAL (operands[5]) - 16) << 4;
7950 mask |= (INTVAL (operands[6]) - 16) << 6;
7951 operands[3] = GEN_INT (mask);
7953 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7955 [(set_attr "type" "sselog")
7956 (set_attr "length_immediate" "1")
7957 (set_attr "prefix" "evex")
7958 (set_attr "mode" "V16SF")])
7960 (define_expand "avx512f_shufpd512_mask"
7961 [(match_operand:V8DF 0 "register_operand")
7962 (match_operand:V8DF 1 "register_operand")
7963 (match_operand:V8DF 2 "nonimmediate_operand")
7964 (match_operand:SI 3 "const_0_to_255_operand")
7965 (match_operand:V8DF 4 "register_operand")
7966 (match_operand:QI 5 "register_operand")]
7969 int mask = INTVAL (operands[3]);
7970 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7972 GEN_INT (mask & 2 ? 9 : 8),
7973 GEN_INT (mask & 4 ? 3 : 2),
7974 GEN_INT (mask & 8 ? 11 : 10),
7975 GEN_INT (mask & 16 ? 5 : 4),
7976 GEN_INT (mask & 32 ? 13 : 12),
7977 GEN_INT (mask & 64 ? 7 : 6),
7978 GEN_INT (mask & 128 ? 15 : 14),
7979 operands[4], operands[5]));
7983 (define_insn "avx512f_shufpd512_1<mask_name>"
7984 [(set (match_operand:V8DF 0 "register_operand" "=v")
7987 (match_operand:V8DF 1 "register_operand" "v")
7988 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7989 (parallel [(match_operand 3 "const_0_to_1_operand")
7990 (match_operand 4 "const_8_to_9_operand")
7991 (match_operand 5 "const_2_to_3_operand")
7992 (match_operand 6 "const_10_to_11_operand")
7993 (match_operand 7 "const_4_to_5_operand")
7994 (match_operand 8 "const_12_to_13_operand")
7995 (match_operand 9 "const_6_to_7_operand")
7996 (match_operand 10 "const_14_to_15_operand")])))]
8000 mask = INTVAL (operands[3]);
8001 mask |= (INTVAL (operands[4]) - 8) << 1;
8002 mask |= (INTVAL (operands[5]) - 2) << 2;
8003 mask |= (INTVAL (operands[6]) - 10) << 3;
8004 mask |= (INTVAL (operands[7]) - 4) << 4;
8005 mask |= (INTVAL (operands[8]) - 12) << 5;
8006 mask |= (INTVAL (operands[9]) - 6) << 6;
8007 mask |= (INTVAL (operands[10]) - 14) << 7;
8008 operands[3] = GEN_INT (mask);
8010 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8012 [(set_attr "type" "sselog")
8013 (set_attr "length_immediate" "1")
8014 (set_attr "prefix" "evex")
8015 (set_attr "mode" "V8DF")])
8017 (define_expand "avx_shufpd256<mask_expand4_name>"
8018 [(match_operand:V4DF 0 "register_operand")
8019 (match_operand:V4DF 1 "register_operand")
8020 (match_operand:V4DF 2 "nonimmediate_operand")
8021 (match_operand:SI 3 "const_int_operand")]
8024 int mask = INTVAL (operands[3]);
8025 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8029 GEN_INT (mask & 2 ? 5 : 4),
8030 GEN_INT (mask & 4 ? 3 : 2),
8031 GEN_INT (mask & 8 ? 7 : 6)
8032 <mask_expand4_args>));
8036 (define_insn "avx_shufpd256_1<mask_name>"
8037 [(set (match_operand:V4DF 0 "register_operand" "=v")
8040 (match_operand:V4DF 1 "register_operand" "v")
8041 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8042 (parallel [(match_operand 3 "const_0_to_1_operand")
8043 (match_operand 4 "const_4_to_5_operand")
8044 (match_operand 5 "const_2_to_3_operand")
8045 (match_operand 6 "const_6_to_7_operand")])))]
8046 "TARGET_AVX && <mask_avx512vl_condition>"
8049 mask = INTVAL (operands[3]);
8050 mask |= (INTVAL (operands[4]) - 4) << 1;
8051 mask |= (INTVAL (operands[5]) - 2) << 2;
8052 mask |= (INTVAL (operands[6]) - 6) << 3;
8053 operands[3] = GEN_INT (mask);
8055 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8057 [(set_attr "type" "sseshuf")
8058 (set_attr "length_immediate" "1")
8059 (set_attr "prefix" "vex")
8060 (set_attr "mode" "V4DF")])
8062 (define_expand "sse2_shufpd<mask_expand4_name>"
8063 [(match_operand:V2DF 0 "register_operand")
8064 (match_operand:V2DF 1 "register_operand")
8065 (match_operand:V2DF 2 "nonimmediate_operand")
8066 (match_operand:SI 3 "const_int_operand")]
8069 int mask = INTVAL (operands[3]);
8070 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8071 operands[2], GEN_INT (mask & 1),
8072 GEN_INT (mask & 2 ? 3 : 2)
8073 <mask_expand4_args>));
8077 (define_insn "sse2_shufpd_v2df_mask"
8078 [(set (match_operand:V2DF 0 "register_operand" "=v")
8082 (match_operand:V2DF 1 "register_operand" "v")
8083 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8084 (parallel [(match_operand 3 "const_0_to_1_operand")
8085 (match_operand 4 "const_2_to_3_operand")]))
8086 (match_operand:V2DF 5 "vector_move_operand" "0C")
8087 (match_operand:QI 6 "register_operand" "Yk")))]
8091 mask = INTVAL (operands[3]);
8092 mask |= (INTVAL (operands[4]) - 2) << 1;
8093 operands[3] = GEN_INT (mask);
8095 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8097 [(set_attr "type" "sseshuf")
8098 (set_attr "length_immediate" "1")
8099 (set_attr "prefix" "evex")
8100 (set_attr "mode" "V2DF")])
8102 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8103 (define_insn "avx2_interleave_highv4di<mask_name>"
8104 [(set (match_operand:V4DI 0 "register_operand" "=v")
8107 (match_operand:V4DI 1 "register_operand" "v")
8108 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8109 (parallel [(const_int 1)
8113 "TARGET_AVX2 && <mask_avx512vl_condition>"
8114 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8115 [(set_attr "type" "sselog")
8116 (set_attr "prefix" "vex")
8117 (set_attr "mode" "OI")])
8119 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8120 [(set (match_operand:V8DI 0 "register_operand" "=v")
8123 (match_operand:V8DI 1 "register_operand" "v")
8124 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8125 (parallel [(const_int 1) (const_int 9)
8126 (const_int 3) (const_int 11)
8127 (const_int 5) (const_int 13)
8128 (const_int 7) (const_int 15)])))]
8130 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8131 [(set_attr "type" "sselog")
8132 (set_attr "prefix" "evex")
8133 (set_attr "mode" "XI")])
8135 (define_insn "vec_interleave_highv2di<mask_name>"
8136 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8139 (match_operand:V2DI 1 "register_operand" "0,v")
8140 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8141 (parallel [(const_int 1)
8143 "TARGET_SSE2 && <mask_avx512vl_condition>"
8145 punpckhqdq\t{%2, %0|%0, %2}
8146 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8147 [(set_attr "isa" "noavx,avx")
8148 (set_attr "type" "sselog")
8149 (set_attr "prefix_data16" "1,*")
8150 (set_attr "prefix" "orig,<mask_prefix>")
8151 (set_attr "mode" "TI")])
8153 (define_insn "avx2_interleave_lowv4di<mask_name>"
8154 [(set (match_operand:V4DI 0 "register_operand" "=v")
8157 (match_operand:V4DI 1 "register_operand" "v")
8158 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8159 (parallel [(const_int 0)
8163 "TARGET_AVX2 && <mask_avx512vl_condition>"
8164 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8165 [(set_attr "type" "sselog")
8166 (set_attr "prefix" "vex")
8167 (set_attr "mode" "OI")])
8169 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8170 [(set (match_operand:V8DI 0 "register_operand" "=v")
8173 (match_operand:V8DI 1 "register_operand" "v")
8174 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8175 (parallel [(const_int 0) (const_int 8)
8176 (const_int 2) (const_int 10)
8177 (const_int 4) (const_int 12)
8178 (const_int 6) (const_int 14)])))]
8180 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8181 [(set_attr "type" "sselog")
8182 (set_attr "prefix" "evex")
8183 (set_attr "mode" "XI")])
8185 (define_insn "vec_interleave_lowv2di<mask_name>"
8186 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8189 (match_operand:V2DI 1 "register_operand" "0,v")
8190 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8191 (parallel [(const_int 0)
8193 "TARGET_SSE2 && <mask_avx512vl_condition>"
8195 punpcklqdq\t{%2, %0|%0, %2}
8196 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8197 [(set_attr "isa" "noavx,avx")
8198 (set_attr "type" "sselog")
8199 (set_attr "prefix_data16" "1,*")
8200 (set_attr "prefix" "orig,vex")
8201 (set_attr "mode" "TI")])
8203 (define_insn "sse2_shufpd_<mode>"
8204 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8205 (vec_select:VI8F_128
8206 (vec_concat:<ssedoublevecmode>
8207 (match_operand:VI8F_128 1 "register_operand" "0,x")
8208 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8209 (parallel [(match_operand 3 "const_0_to_1_operand")
8210 (match_operand 4 "const_2_to_3_operand")])))]
8214 mask = INTVAL (operands[3]);
8215 mask |= (INTVAL (operands[4]) - 2) << 1;
8216 operands[3] = GEN_INT (mask);
8218 switch (which_alternative)
8221 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8223 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8228 [(set_attr "isa" "noavx,avx")
8229 (set_attr "type" "sseshuf")
8230 (set_attr "length_immediate" "1")
8231 (set_attr "prefix" "orig,vex")
8232 (set_attr "mode" "V2DF")])
8234 ;; Avoid combining registers from different units in a single alternative,
8235 ;; see comment above inline_secondary_memory_needed function in i386.c
8236 (define_insn "sse2_storehpd"
8237 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8239 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8240 (parallel [(const_int 1)])))]
8241 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8243 %vmovhpd\t{%1, %0|%0, %1}
8245 vunpckhpd\t{%d1, %0|%0, %d1}
8249 [(set_attr "isa" "*,noavx,avx,*,*,*")
8250 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8251 (set (attr "prefix_data16")
8253 (and (eq_attr "alternative" "0")
8254 (not (match_test "TARGET_AVX")))
8256 (const_string "*")))
8257 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8258 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8261 [(set (match_operand:DF 0 "register_operand")
8263 (match_operand:V2DF 1 "memory_operand")
8264 (parallel [(const_int 1)])))]
8265 "TARGET_SSE2 && reload_completed"
8266 [(set (match_dup 0) (match_dup 1))]
8267 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8269 (define_insn "*vec_extractv2df_1_sse"
8270 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8272 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8273 (parallel [(const_int 1)])))]
8274 "!TARGET_SSE2 && TARGET_SSE
8275 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8277 movhps\t{%1, %0|%q0, %1}
8278 movhlps\t{%1, %0|%0, %1}
8279 movlps\t{%H1, %0|%0, %H1}"
8280 [(set_attr "type" "ssemov")
8281 (set_attr "ssememalign" "64")
8282 (set_attr "mode" "V2SF,V4SF,V2SF")])
8284 ;; Avoid combining registers from different units in a single alternative,
8285 ;; see comment above inline_secondary_memory_needed function in i386.c
8286 (define_insn "sse2_storelpd"
8287 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8289 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8290 (parallel [(const_int 0)])))]
8291 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8293 %vmovlpd\t{%1, %0|%0, %1}
8298 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8299 (set_attr "prefix_data16" "1,*,*,*,*")
8300 (set_attr "prefix" "maybe_vex")
8301 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8304 [(set (match_operand:DF 0 "register_operand")
8306 (match_operand:V2DF 1 "nonimmediate_operand")
8307 (parallel [(const_int 0)])))]
8308 "TARGET_SSE2 && reload_completed"
8309 [(set (match_dup 0) (match_dup 1))]
8311 if (REG_P (operands[1]))
8312 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8314 operands[1] = adjust_address (operands[1], DFmode, 0);
8317 (define_insn "*vec_extractv2df_0_sse"
8318 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8320 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8321 (parallel [(const_int 0)])))]
8322 "!TARGET_SSE2 && TARGET_SSE
8323 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8325 movlps\t{%1, %0|%0, %1}
8326 movaps\t{%1, %0|%0, %1}
8327 movlps\t{%1, %0|%0, %q1}"
8328 [(set_attr "type" "ssemov")
8329 (set_attr "mode" "V2SF,V4SF,V2SF")])
8331 (define_expand "sse2_loadhpd_exp"
8332 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8335 (match_operand:V2DF 1 "nonimmediate_operand")
8336 (parallel [(const_int 0)]))
8337 (match_operand:DF 2 "nonimmediate_operand")))]
8340 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8342 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8344 /* Fix up the destination if needed. */
8345 if (dst != operands[0])
8346 emit_move_insn (operands[0], dst);
8351 ;; Avoid combining registers from different units in a single alternative,
8352 ;; see comment above inline_secondary_memory_needed function in i386.c
8353 (define_insn "sse2_loadhpd"
8354 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8358 (match_operand:V2DF 1 "nonimmediate_operand"
8360 (parallel [(const_int 0)]))
8361 (match_operand:DF 2 "nonimmediate_operand"
8362 " m,m,x,x,x,*f,r")))]
8363 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8365 movhpd\t{%2, %0|%0, %2}
8366 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8367 unpcklpd\t{%2, %0|%0, %2}
8368 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8372 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8373 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8374 (set_attr "ssememalign" "64")
8375 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8376 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8377 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8380 [(set (match_operand:V2DF 0 "memory_operand")
8382 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8383 (match_operand:DF 1 "register_operand")))]
8384 "TARGET_SSE2 && reload_completed"
8385 [(set (match_dup 0) (match_dup 1))]
8386 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8388 (define_expand "sse2_loadlpd_exp"
8389 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8391 (match_operand:DF 2 "nonimmediate_operand")
8393 (match_operand:V2DF 1 "nonimmediate_operand")
8394 (parallel [(const_int 1)]))))]
8397 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8399 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8401 /* Fix up the destination if needed. */
8402 if (dst != operands[0])
8403 emit_move_insn (operands[0], dst);
8408 ;; Avoid combining registers from different units in a single alternative,
8409 ;; see comment above inline_secondary_memory_needed function in i386.c
8410 (define_insn "sse2_loadlpd"
8411 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8412 "=x,x,x,x,x,x,x,x,m,m ,m")
8414 (match_operand:DF 2 "nonimmediate_operand"
8415 " m,m,m,x,x,0,0,x,x,*f,r")
8417 (match_operand:V2DF 1 "vector_move_operand"
8418 " C,0,x,0,x,x,o,o,0,0 ,0")
8419 (parallel [(const_int 1)]))))]
8420 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8422 %vmovsd\t{%2, %0|%0, %2}
8423 movlpd\t{%2, %0|%0, %2}
8424 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8425 movsd\t{%2, %0|%0, %2}
8426 vmovsd\t{%2, %1, %0|%0, %1, %2}
8427 shufpd\t{$2, %1, %0|%0, %1, 2}
8428 movhpd\t{%H1, %0|%0, %H1}
8429 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8433 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8435 (cond [(eq_attr "alternative" "5")
8436 (const_string "sselog")
8437 (eq_attr "alternative" "9")
8438 (const_string "fmov")
8439 (eq_attr "alternative" "10")
8440 (const_string "imov")
8442 (const_string "ssemov")))
8443 (set_attr "ssememalign" "64")
8444 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8445 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8446 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8447 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8450 [(set (match_operand:V2DF 0 "memory_operand")
8452 (match_operand:DF 1 "register_operand")
8453 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8454 "TARGET_SSE2 && reload_completed"
8455 [(set (match_dup 0) (match_dup 1))]
8456 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8458 (define_insn "sse2_movsd"
8459 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8461 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8462 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8466 movsd\t{%2, %0|%0, %2}
8467 vmovsd\t{%2, %1, %0|%0, %1, %2}
8468 movlpd\t{%2, %0|%0, %q2}
8469 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8470 %vmovlpd\t{%2, %0|%q0, %2}
8471 shufpd\t{$2, %1, %0|%0, %1, 2}
8472 movhps\t{%H1, %0|%0, %H1}
8473 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8474 %vmovhps\t{%1, %H0|%H0, %1}"
8475 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8478 (eq_attr "alternative" "5")
8479 (const_string "sselog")
8480 (const_string "ssemov")))
8481 (set (attr "prefix_data16")
8483 (and (eq_attr "alternative" "2,4")
8484 (not (match_test "TARGET_AVX")))
8486 (const_string "*")))
8487 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8488 (set_attr "ssememalign" "64")
8489 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8490 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8492 (define_insn "vec_dupv2df<mask_name>"
8493 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8495 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8496 "TARGET_SSE2 && <mask_avx512vl_condition>"
8499 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8500 [(set_attr "isa" "noavx,sse3")
8501 (set_attr "type" "sselog1")
8502 (set_attr "prefix" "orig,maybe_vex")
8503 (set_attr "mode" "V2DF,DF")])
8505 (define_insn "*vec_concatv2df"
8506 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8508 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8509 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8512 unpcklpd\t{%2, %0|%0, %2}
8513 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8514 %vmovddup\t{%1, %0|%0, %1}
8515 movhpd\t{%2, %0|%0, %2}
8516 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8517 %vmovsd\t{%1, %0|%0, %1}
8518 movlhps\t{%2, %0|%0, %2}
8519 movhps\t{%2, %0|%0, %2}"
8520 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8523 (eq_attr "alternative" "0,1,2")
8524 (const_string "sselog")
8525 (const_string "ssemov")))
8526 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8527 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8528 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8532 ;; Parallel integer down-conversion operations
8534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8536 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8537 (define_mode_attr pmov_src_mode
8538 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8539 (define_mode_attr pmov_src_lower
8540 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8541 (define_mode_attr pmov_suff_1
8542 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8544 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8545 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8546 (any_truncate:PMOV_DST_MODE_1
8547 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8549 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8550 [(set_attr "type" "ssemov")
8551 (set_attr "memory" "none,store")
8552 (set_attr "prefix" "evex")
8553 (set_attr "mode" "<sseinsnmode>")])
8555 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8556 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8557 (vec_merge:PMOV_DST_MODE_1
8558 (any_truncate:PMOV_DST_MODE_1
8559 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8560 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8561 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8563 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8564 [(set_attr "type" "ssemov")
8565 (set_attr "memory" "none,store")
8566 (set_attr "prefix" "evex")
8567 (set_attr "mode" "<sseinsnmode>")])
8569 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8570 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8571 (vec_merge:PMOV_DST_MODE_1
8572 (any_truncate:PMOV_DST_MODE_1
8573 (match_operand:<pmov_src_mode> 1 "register_operand"))
8575 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8578 (define_insn "*avx512bw_<code>v32hiv32qi2"
8579 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8581 (match_operand:V32HI 1 "register_operand" "v,v")))]
8583 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8584 [(set_attr "type" "ssemov")
8585 (set_attr "memory" "none,store")
8586 (set_attr "prefix" "evex")
8587 (set_attr "mode" "XI")])
8589 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8590 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8593 (match_operand:V32HI 1 "register_operand" "v,v"))
8594 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8595 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8597 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8598 [(set_attr "type" "ssemov")
8599 (set_attr "memory" "none,store")
8600 (set_attr "prefix" "evex")
8601 (set_attr "mode" "XI")])
8603 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8604 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8607 (match_operand:V32HI 1 "register_operand"))
8609 (match_operand:SI 2 "register_operand")))]
8612 (define_mode_iterator PMOV_DST_MODE_2
8613 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8614 (define_mode_attr pmov_suff_2
8615 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8617 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8618 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8619 (any_truncate:PMOV_DST_MODE_2
8620 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8622 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8623 [(set_attr "type" "ssemov")
8624 (set_attr "memory" "none,store")
8625 (set_attr "prefix" "evex")
8626 (set_attr "mode" "<sseinsnmode>")])
8628 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8629 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8630 (vec_merge:PMOV_DST_MODE_2
8631 (any_truncate:PMOV_DST_MODE_2
8632 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8633 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8634 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8636 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8637 [(set_attr "type" "ssemov")
8638 (set_attr "memory" "none,store")
8639 (set_attr "prefix" "evex")
8640 (set_attr "mode" "<sseinsnmode>")])
8642 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8643 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8644 (vec_merge:PMOV_DST_MODE_2
8645 (any_truncate:PMOV_DST_MODE_2
8646 (match_operand:<ssedoublemode> 1 "register_operand"))
8648 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8651 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8652 (define_mode_attr pmov_dst_3
8653 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8654 (define_mode_attr pmov_dst_zeroed_3
8655 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8656 (define_mode_attr pmov_suff_3
8657 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8659 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8660 [(set (match_operand:V16QI 0 "register_operand" "=v")
8662 (any_truncate:<pmov_dst_3>
8663 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8664 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8666 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8667 [(set_attr "type" "ssemov")
8668 (set_attr "prefix" "evex")
8669 (set_attr "mode" "TI")])
8671 (define_insn "*avx512vl_<code>v2div2qi2_store"
8672 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8675 (match_operand:V2DI 1 "register_operand" "v"))
8678 (parallel [(const_int 2) (const_int 3)
8679 (const_int 4) (const_int 5)
8680 (const_int 6) (const_int 7)
8681 (const_int 8) (const_int 9)
8682 (const_int 10) (const_int 11)
8683 (const_int 12) (const_int 13)
8684 (const_int 14) (const_int 15)]))))]
8686 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8687 [(set_attr "type" "ssemov")
8688 (set_attr "memory" "store")
8689 (set_attr "prefix" "evex")
8690 (set_attr "mode" "TI")])
8692 (define_insn "avx512vl_<code>v2div2qi2_mask"
8693 [(set (match_operand:V16QI 0 "register_operand" "=v")
8697 (match_operand:V2DI 1 "register_operand" "v"))
8699 (match_operand:V16QI 2 "vector_move_operand" "0C")
8700 (parallel [(const_int 0) (const_int 1)]))
8701 (match_operand:QI 3 "register_operand" "Yk"))
8702 (const_vector:V14QI [(const_int 0) (const_int 0)
8703 (const_int 0) (const_int 0)
8704 (const_int 0) (const_int 0)
8705 (const_int 0) (const_int 0)
8706 (const_int 0) (const_int 0)
8707 (const_int 0) (const_int 0)
8708 (const_int 0) (const_int 0)])))]
8710 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8711 [(set_attr "type" "ssemov")
8712 (set_attr "prefix" "evex")
8713 (set_attr "mode" "TI")])
8715 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8716 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8720 (match_operand:V2DI 1 "register_operand" "v"))
8723 (parallel [(const_int 0) (const_int 1)]))
8724 (match_operand:QI 2 "register_operand" "Yk"))
8727 (parallel [(const_int 2) (const_int 3)
8728 (const_int 4) (const_int 5)
8729 (const_int 6) (const_int 7)
8730 (const_int 8) (const_int 9)
8731 (const_int 10) (const_int 11)
8732 (const_int 12) (const_int 13)
8733 (const_int 14) (const_int 15)]))))]
8735 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8736 [(set_attr "type" "ssemov")
8737 (set_attr "memory" "store")
8738 (set_attr "prefix" "evex")
8739 (set_attr "mode" "TI")])
8741 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8742 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8745 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8748 (parallel [(const_int 4) (const_int 5)
8749 (const_int 6) (const_int 7)
8750 (const_int 8) (const_int 9)
8751 (const_int 10) (const_int 11)
8752 (const_int 12) (const_int 13)
8753 (const_int 14) (const_int 15)]))))]
8755 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8756 [(set_attr "type" "ssemov")
8757 (set_attr "memory" "store")
8758 (set_attr "prefix" "evex")
8759 (set_attr "mode" "TI")])
8761 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8762 [(set (match_operand:V16QI 0 "register_operand" "=v")
8766 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8768 (match_operand:V16QI 2 "vector_move_operand" "0C")
8769 (parallel [(const_int 0) (const_int 1)
8770 (const_int 2) (const_int 3)]))
8771 (match_operand:QI 3 "register_operand" "Yk"))
8772 (const_vector:V12QI [(const_int 0) (const_int 0)
8773 (const_int 0) (const_int 0)
8774 (const_int 0) (const_int 0)
8775 (const_int 0) (const_int 0)
8776 (const_int 0) (const_int 0)
8777 (const_int 0) (const_int 0)])))]
8779 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8780 [(set_attr "type" "ssemov")
8781 (set_attr "prefix" "evex")
8782 (set_attr "mode" "TI")])
8784 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8785 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8789 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8792 (parallel [(const_int 0) (const_int 1)
8793 (const_int 2) (const_int 3)]))
8794 (match_operand:QI 2 "register_operand" "Yk"))
8797 (parallel [(const_int 4) (const_int 5)
8798 (const_int 6) (const_int 7)
8799 (const_int 8) (const_int 9)
8800 (const_int 10) (const_int 11)
8801 (const_int 12) (const_int 13)
8802 (const_int 14) (const_int 15)]))))]
8804 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8805 [(set_attr "type" "ssemov")
8806 (set_attr "memory" "store")
8807 (set_attr "prefix" "evex")
8808 (set_attr "mode" "TI")])
8810 (define_mode_iterator VI2_128_BW_4_256
8811 [(V8HI "TARGET_AVX512BW") V8SI])
8813 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8814 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8817 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8820 (parallel [(const_int 8) (const_int 9)
8821 (const_int 10) (const_int 11)
8822 (const_int 12) (const_int 13)
8823 (const_int 14) (const_int 15)]))))]
8825 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8826 [(set_attr "type" "ssemov")
8827 (set_attr "memory" "store")
8828 (set_attr "prefix" "evex")
8829 (set_attr "mode" "TI")])
8831 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8832 [(set (match_operand:V16QI 0 "register_operand" "=v")
8836 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8838 (match_operand:V16QI 2 "vector_move_operand" "0C")
8839 (parallel [(const_int 0) (const_int 1)
8840 (const_int 2) (const_int 3)
8841 (const_int 4) (const_int 5)
8842 (const_int 6) (const_int 7)]))
8843 (match_operand:QI 3 "register_operand" "Yk"))
8844 (const_vector:V8QI [(const_int 0) (const_int 0)
8845 (const_int 0) (const_int 0)
8846 (const_int 0) (const_int 0)
8847 (const_int 0) (const_int 0)])))]
8849 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8850 [(set_attr "type" "ssemov")
8851 (set_attr "prefix" "evex")
8852 (set_attr "mode" "TI")])
8854 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8855 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8859 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8862 (parallel [(const_int 0) (const_int 1)
8863 (const_int 2) (const_int 3)
8864 (const_int 4) (const_int 5)
8865 (const_int 6) (const_int 7)]))
8866 (match_operand:QI 2 "register_operand" "Yk"))
8869 (parallel [(const_int 8) (const_int 9)
8870 (const_int 10) (const_int 11)
8871 (const_int 12) (const_int 13)
8872 (const_int 14) (const_int 15)]))))]
8874 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8875 [(set_attr "type" "ssemov")
8876 (set_attr "memory" "store")
8877 (set_attr "prefix" "evex")
8878 (set_attr "mode" "TI")])
8880 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8881 (define_mode_attr pmov_dst_4
8882 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8883 (define_mode_attr pmov_dst_zeroed_4
8884 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8885 (define_mode_attr pmov_suff_4
8886 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8888 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8889 [(set (match_operand:V8HI 0 "register_operand" "=v")
8891 (any_truncate:<pmov_dst_4>
8892 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8893 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8895 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8896 [(set_attr "type" "ssemov")
8897 (set_attr "prefix" "evex")
8898 (set_attr "mode" "TI")])
8900 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8901 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8904 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8907 (parallel [(const_int 4) (const_int 5)
8908 (const_int 6) (const_int 7)]))))]
8910 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8911 [(set_attr "type" "ssemov")
8912 (set_attr "memory" "store")
8913 (set_attr "prefix" "evex")
8914 (set_attr "mode" "TI")])
8916 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8917 [(set (match_operand:V8HI 0 "register_operand" "=v")
8921 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8923 (match_operand:V8HI 2 "vector_move_operand" "0C")
8924 (parallel [(const_int 0) (const_int 1)
8925 (const_int 2) (const_int 3)]))
8926 (match_operand:QI 3 "register_operand" "Yk"))
8927 (const_vector:V4HI [(const_int 0) (const_int 0)
8928 (const_int 0) (const_int 0)])))]
8930 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8931 [(set_attr "type" "ssemov")
8932 (set_attr "prefix" "evex")
8933 (set_attr "mode" "TI")])
8935 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8936 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8940 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8943 (parallel [(const_int 0) (const_int 1)
8944 (const_int 2) (const_int 3)]))
8945 (match_operand:QI 2 "register_operand" "Yk"))
8948 (parallel [(const_int 4) (const_int 5)
8949 (const_int 6) (const_int 7)]))))]
8951 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8952 [(set_attr "type" "ssemov")
8953 (set_attr "memory" "store")
8954 (set_attr "prefix" "evex")
8955 (set_attr "mode" "TI")])
8957 (define_insn "*avx512vl_<code>v2div2hi2_store"
8958 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8961 (match_operand:V2DI 1 "register_operand" "v"))
8964 (parallel [(const_int 2) (const_int 3)
8965 (const_int 4) (const_int 5)
8966 (const_int 6) (const_int 7)]))))]
8968 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8969 [(set_attr "type" "ssemov")
8970 (set_attr "memory" "store")
8971 (set_attr "prefix" "evex")
8972 (set_attr "mode" "TI")])
8974 (define_insn "avx512vl_<code>v2div2hi2_mask"
8975 [(set (match_operand:V8HI 0 "register_operand" "=v")
8979 (match_operand:V2DI 1 "register_operand" "v"))
8981 (match_operand:V8HI 2 "vector_move_operand" "0C")
8982 (parallel [(const_int 0) (const_int 1)]))
8983 (match_operand:QI 3 "register_operand" "Yk"))
8984 (const_vector:V6HI [(const_int 0) (const_int 0)
8985 (const_int 0) (const_int 0)
8986 (const_int 0) (const_int 0)])))]
8988 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8989 [(set_attr "type" "ssemov")
8990 (set_attr "prefix" "evex")
8991 (set_attr "mode" "TI")])
8993 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
8994 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8998 (match_operand:V2DI 1 "register_operand" "v"))
9001 (parallel [(const_int 0) (const_int 1)]))
9002 (match_operand:QI 2 "register_operand" "Yk"))
9005 (parallel [(const_int 2) (const_int 3)
9006 (const_int 4) (const_int 5)
9007 (const_int 6) (const_int 7)]))))]
9009 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9010 [(set_attr "type" "ssemov")
9011 (set_attr "memory" "store")
9012 (set_attr "prefix" "evex")
9013 (set_attr "mode" "TI")])
9015 (define_insn "*avx512vl_<code>v2div2si2"
9016 [(set (match_operand:V4SI 0 "register_operand" "=v")
9019 (match_operand:V2DI 1 "register_operand" "v"))
9020 (match_operand:V2SI 2 "const0_operand")))]
9022 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9023 [(set_attr "type" "ssemov")
9024 (set_attr "prefix" "evex")
9025 (set_attr "mode" "TI")])
9027 (define_insn "*avx512vl_<code>v2div2si2_store"
9028 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9031 (match_operand:V2DI 1 "register_operand" "v"))
9034 (parallel [(const_int 2) (const_int 3)]))))]
9036 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9037 [(set_attr "type" "ssemov")
9038 (set_attr "memory" "store")
9039 (set_attr "prefix" "evex")
9040 (set_attr "mode" "TI")])
9042 (define_insn "avx512vl_<code>v2div2si2_mask"
9043 [(set (match_operand:V4SI 0 "register_operand" "=v")
9047 (match_operand:V2DI 1 "register_operand" "v"))
9049 (match_operand:V4SI 2 "vector_move_operand" "0C")
9050 (parallel [(const_int 0) (const_int 1)]))
9051 (match_operand:QI 3 "register_operand" "Yk"))
9052 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9054 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9055 [(set_attr "type" "ssemov")
9056 (set_attr "prefix" "evex")
9057 (set_attr "mode" "TI")])
9059 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9060 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9064 (match_operand:V2DI 1 "register_operand" "v"))
9067 (parallel [(const_int 0) (const_int 1)]))
9068 (match_operand:QI 2 "register_operand" "Yk"))
9071 (parallel [(const_int 2) (const_int 3)]))))]
9073 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9074 [(set_attr "type" "ssemov")
9075 (set_attr "memory" "store")
9076 (set_attr "prefix" "evex")
9077 (set_attr "mode" "TI")])
9079 (define_insn "*avx512f_<code>v8div16qi2"
9080 [(set (match_operand:V16QI 0 "register_operand" "=v")
9083 (match_operand:V8DI 1 "register_operand" "v"))
9084 (const_vector:V8QI [(const_int 0) (const_int 0)
9085 (const_int 0) (const_int 0)
9086 (const_int 0) (const_int 0)
9087 (const_int 0) (const_int 0)])))]
9089 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9090 [(set_attr "type" "ssemov")
9091 (set_attr "prefix" "evex")
9092 (set_attr "mode" "TI")])
9094 (define_insn "*avx512f_<code>v8div16qi2_store"
9095 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9098 (match_operand:V8DI 1 "register_operand" "v"))
9101 (parallel [(const_int 8) (const_int 9)
9102 (const_int 10) (const_int 11)
9103 (const_int 12) (const_int 13)
9104 (const_int 14) (const_int 15)]))))]
9106 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9107 [(set_attr "type" "ssemov")
9108 (set_attr "memory" "store")
9109 (set_attr "prefix" "evex")
9110 (set_attr "mode" "TI")])
9112 (define_insn "avx512f_<code>v8div16qi2_mask"
9113 [(set (match_operand:V16QI 0 "register_operand" "=v")
9117 (match_operand:V8DI 1 "register_operand" "v"))
9119 (match_operand:V16QI 2 "vector_move_operand" "0C")
9120 (parallel [(const_int 0) (const_int 1)
9121 (const_int 2) (const_int 3)
9122 (const_int 4) (const_int 5)
9123 (const_int 6) (const_int 7)]))
9124 (match_operand:QI 3 "register_operand" "Yk"))
9125 (const_vector:V8QI [(const_int 0) (const_int 0)
9126 (const_int 0) (const_int 0)
9127 (const_int 0) (const_int 0)
9128 (const_int 0) (const_int 0)])))]
9130 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9131 [(set_attr "type" "ssemov")
9132 (set_attr "prefix" "evex")
9133 (set_attr "mode" "TI")])
9135 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9136 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9140 (match_operand:V8DI 1 "register_operand" "v"))
9143 (parallel [(const_int 0) (const_int 1)
9144 (const_int 2) (const_int 3)
9145 (const_int 4) (const_int 5)
9146 (const_int 6) (const_int 7)]))
9147 (match_operand:QI 2 "register_operand" "Yk"))
9150 (parallel [(const_int 8) (const_int 9)
9151 (const_int 10) (const_int 11)
9152 (const_int 12) (const_int 13)
9153 (const_int 14) (const_int 15)]))))]
9155 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9156 [(set_attr "type" "ssemov")
9157 (set_attr "memory" "store")
9158 (set_attr "prefix" "evex")
9159 (set_attr "mode" "TI")])
9161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9163 ;; Parallel integral arithmetic
9165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9167 (define_expand "neg<mode>2"
9168 [(set (match_operand:VI_AVX2 0 "register_operand")
9171 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9173 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9175 (define_expand "<plusminus_insn><mode>3"
9176 [(set (match_operand:VI_AVX2 0 "register_operand")
9178 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9179 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9181 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9183 (define_expand "<plusminus_insn><mode>3_mask"
9184 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9185 (vec_merge:VI48_AVX512VL
9186 (plusminus:VI48_AVX512VL
9187 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9188 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9189 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9190 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9192 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9194 (define_expand "<plusminus_insn><mode>3_mask"
9195 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9196 (vec_merge:VI12_AVX512VL
9197 (plusminus:VI12_AVX512VL
9198 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9199 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9200 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9201 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9203 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9205 (define_insn "*<plusminus_insn><mode>3"
9206 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9208 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9209 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9211 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9213 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9214 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9215 [(set_attr "isa" "noavx,avx")
9216 (set_attr "type" "sseiadd")
9217 (set_attr "prefix_data16" "1,*")
9218 (set_attr "prefix" "<mask_prefix3>")
9219 (set_attr "mode" "<sseinsnmode>")])
9221 (define_insn "*<plusminus_insn><mode>3_mask"
9222 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9223 (vec_merge:VI48_AVX512VL
9224 (plusminus:VI48_AVX512VL
9225 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9226 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9227 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9228 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9230 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9231 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9232 [(set_attr "type" "sseiadd")
9233 (set_attr "prefix" "evex")
9234 (set_attr "mode" "<sseinsnmode>")])
9236 (define_insn "*<plusminus_insn><mode>3_mask"
9237 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9238 (vec_merge:VI12_AVX512VL
9239 (plusminus:VI12_AVX512VL
9240 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9241 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9242 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9243 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9244 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9245 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9246 [(set_attr "type" "sseiadd")
9247 (set_attr "prefix" "evex")
9248 (set_attr "mode" "<sseinsnmode>")])
9250 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9251 [(set (match_operand:VI12_AVX2 0 "register_operand")
9252 (sat_plusminus:VI12_AVX2
9253 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9254 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9255 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9256 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9258 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9259 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9260 (sat_plusminus:VI12_AVX2
9261 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9262 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9263 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9264 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9266 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9267 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9268 [(set_attr "isa" "noavx,avx")
9269 (set_attr "type" "sseiadd")
9270 (set_attr "prefix_data16" "1,*")
9271 (set_attr "prefix" "orig,maybe_evex")
9272 (set_attr "mode" "TI")])
9274 (define_expand "mul<mode>3<mask_name>"
9275 [(set (match_operand:VI1_AVX512 0 "register_operand")
9276 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9277 (match_operand:VI1_AVX512 2 "register_operand")))]
9278 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9280 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9284 (define_expand "mul<mode>3<mask_name>"
9285 [(set (match_operand:VI2_AVX2 0 "register_operand")
9286 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9287 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9288 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9289 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9291 (define_insn "*mul<mode>3<mask_name>"
9292 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9293 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9294 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9296 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9297 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9299 pmullw\t{%2, %0|%0, %2}
9300 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9301 [(set_attr "isa" "noavx,avx")
9302 (set_attr "type" "sseimul")
9303 (set_attr "prefix_data16" "1,*")
9304 (set_attr "prefix" "orig,vex")
9305 (set_attr "mode" "<sseinsnmode>")])
9307 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9308 [(set (match_operand:VI2_AVX2 0 "register_operand")
9310 (lshiftrt:<ssedoublemode>
9311 (mult:<ssedoublemode>
9312 (any_extend:<ssedoublemode>
9313 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9314 (any_extend:<ssedoublemode>
9315 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9318 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9319 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9321 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9322 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9324 (lshiftrt:<ssedoublemode>
9325 (mult:<ssedoublemode>
9326 (any_extend:<ssedoublemode>
9327 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9328 (any_extend:<ssedoublemode>
9329 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9332 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9333 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9335 pmulh<u>w\t{%2, %0|%0, %2}
9336 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9337 [(set_attr "isa" "noavx,avx")
9338 (set_attr "type" "sseimul")
9339 (set_attr "prefix_data16" "1,*")
9340 (set_attr "prefix" "orig,vex")
9341 (set_attr "mode" "<sseinsnmode>")])
9343 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9344 [(set (match_operand:V8DI 0 "register_operand")
9348 (match_operand:V16SI 1 "nonimmediate_operand")
9349 (parallel [(const_int 0) (const_int 2)
9350 (const_int 4) (const_int 6)
9351 (const_int 8) (const_int 10)
9352 (const_int 12) (const_int 14)])))
9355 (match_operand:V16SI 2 "nonimmediate_operand")
9356 (parallel [(const_int 0) (const_int 2)
9357 (const_int 4) (const_int 6)
9358 (const_int 8) (const_int 10)
9359 (const_int 12) (const_int 14)])))))]
9361 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9363 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9364 [(set (match_operand:V8DI 0 "register_operand" "=v")
9368 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9369 (parallel [(const_int 0) (const_int 2)
9370 (const_int 4) (const_int 6)
9371 (const_int 8) (const_int 10)
9372 (const_int 12) (const_int 14)])))
9375 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9376 (parallel [(const_int 0) (const_int 2)
9377 (const_int 4) (const_int 6)
9378 (const_int 8) (const_int 10)
9379 (const_int 12) (const_int 14)])))))]
9380 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9381 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9382 [(set_attr "isa" "avx512f")
9383 (set_attr "type" "sseimul")
9384 (set_attr "prefix_extra" "1")
9385 (set_attr "prefix" "evex")
9386 (set_attr "mode" "XI")])
9388 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9389 [(set (match_operand:V4DI 0 "register_operand")
9393 (match_operand:V8SI 1 "nonimmediate_operand")
9394 (parallel [(const_int 0) (const_int 2)
9395 (const_int 4) (const_int 6)])))
9398 (match_operand:V8SI 2 "nonimmediate_operand")
9399 (parallel [(const_int 0) (const_int 2)
9400 (const_int 4) (const_int 6)])))))]
9401 "TARGET_AVX2 && <mask_avx512vl_condition>"
9402 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9404 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9405 [(set (match_operand:V4DI 0 "register_operand" "=v")
9409 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9410 (parallel [(const_int 0) (const_int 2)
9411 (const_int 4) (const_int 6)])))
9414 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9415 (parallel [(const_int 0) (const_int 2)
9416 (const_int 4) (const_int 6)])))))]
9417 "TARGET_AVX2 && <mask_avx512vl_condition>
9418 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9419 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9420 [(set_attr "type" "sseimul")
9421 (set_attr "prefix" "maybe_evex")
9422 (set_attr "mode" "OI")])
9424 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9425 [(set (match_operand:V2DI 0 "register_operand")
9429 (match_operand:V4SI 1 "nonimmediate_operand")
9430 (parallel [(const_int 0) (const_int 2)])))
9433 (match_operand:V4SI 2 "nonimmediate_operand")
9434 (parallel [(const_int 0) (const_int 2)])))))]
9435 "TARGET_SSE2 && <mask_avx512vl_condition>"
9436 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9438 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9439 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9443 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9444 (parallel [(const_int 0) (const_int 2)])))
9447 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9448 (parallel [(const_int 0) (const_int 2)])))))]
9449 "TARGET_SSE2 && <mask_avx512vl_condition>
9450 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9452 pmuludq\t{%2, %0|%0, %2}
9453 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9454 [(set_attr "isa" "noavx,avx")
9455 (set_attr "type" "sseimul")
9456 (set_attr "prefix_data16" "1,*")
9457 (set_attr "prefix" "orig,maybe_evex")
9458 (set_attr "mode" "TI")])
9460 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9461 [(set (match_operand:V8DI 0 "register_operand")
9465 (match_operand:V16SI 1 "nonimmediate_operand")
9466 (parallel [(const_int 0) (const_int 2)
9467 (const_int 4) (const_int 6)
9468 (const_int 8) (const_int 10)
9469 (const_int 12) (const_int 14)])))
9472 (match_operand:V16SI 2 "nonimmediate_operand")
9473 (parallel [(const_int 0) (const_int 2)
9474 (const_int 4) (const_int 6)
9475 (const_int 8) (const_int 10)
9476 (const_int 12) (const_int 14)])))))]
9478 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9480 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9481 [(set (match_operand:V8DI 0 "register_operand" "=v")
9485 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9486 (parallel [(const_int 0) (const_int 2)
9487 (const_int 4) (const_int 6)
9488 (const_int 8) (const_int 10)
9489 (const_int 12) (const_int 14)])))
9492 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9493 (parallel [(const_int 0) (const_int 2)
9494 (const_int 4) (const_int 6)
9495 (const_int 8) (const_int 10)
9496 (const_int 12) (const_int 14)])))))]
9497 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9498 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9499 [(set_attr "isa" "avx512f")
9500 (set_attr "type" "sseimul")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "prefix" "evex")
9503 (set_attr "mode" "XI")])
9505 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9506 [(set (match_operand:V4DI 0 "register_operand")
9510 (match_operand:V8SI 1 "nonimmediate_operand")
9511 (parallel [(const_int 0) (const_int 2)
9512 (const_int 4) (const_int 6)])))
9515 (match_operand:V8SI 2 "nonimmediate_operand")
9516 (parallel [(const_int 0) (const_int 2)
9517 (const_int 4) (const_int 6)])))))]
9518 "TARGET_AVX2 && <mask_avx512vl_condition>"
9519 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9521 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9522 [(set (match_operand:V4DI 0 "register_operand" "=v")
9526 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9527 (parallel [(const_int 0) (const_int 2)
9528 (const_int 4) (const_int 6)])))
9531 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9532 (parallel [(const_int 0) (const_int 2)
9533 (const_int 4) (const_int 6)])))))]
9535 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9536 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9537 [(set_attr "type" "sseimul")
9538 (set_attr "prefix_extra" "1")
9539 (set_attr "prefix" "vex")
9540 (set_attr "mode" "OI")])
9542 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9543 [(set (match_operand:V2DI 0 "register_operand")
9547 (match_operand:V4SI 1 "nonimmediate_operand")
9548 (parallel [(const_int 0) (const_int 2)])))
9551 (match_operand:V4SI 2 "nonimmediate_operand")
9552 (parallel [(const_int 0) (const_int 2)])))))]
9553 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9554 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9556 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9557 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9561 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
9562 (parallel [(const_int 0) (const_int 2)])))
9565 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
9566 (parallel [(const_int 0) (const_int 2)])))))]
9567 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9568 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9570 pmuldq\t{%2, %0|%0, %2}
9571 pmuldq\t{%2, %0|%0, %2}
9572 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9573 [(set_attr "isa" "noavx,noavx,avx")
9574 (set_attr "type" "sseimul")
9575 (set_attr "prefix_data16" "1,1,*")
9576 (set_attr "prefix_extra" "1")
9577 (set_attr "prefix" "orig,orig,vex")
9578 (set_attr "mode" "TI")])
9580 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9581 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9582 (unspec:<sseunpackmode>
9583 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9584 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9585 UNSPEC_PMADDWD512))]
9586 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9587 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9588 [(set_attr "type" "sseiadd")
9589 (set_attr "prefix" "evex")
9590 (set_attr "mode" "XI")])
9592 (define_expand "avx2_pmaddwd"
9593 [(set (match_operand:V8SI 0 "register_operand")
9598 (match_operand:V16HI 1 "nonimmediate_operand")
9599 (parallel [(const_int 0) (const_int 2)
9600 (const_int 4) (const_int 6)
9601 (const_int 8) (const_int 10)
9602 (const_int 12) (const_int 14)])))
9605 (match_operand:V16HI 2 "nonimmediate_operand")
9606 (parallel [(const_int 0) (const_int 2)
9607 (const_int 4) (const_int 6)
9608 (const_int 8) (const_int 10)
9609 (const_int 12) (const_int 14)]))))
9612 (vec_select:V8HI (match_dup 1)
9613 (parallel [(const_int 1) (const_int 3)
9614 (const_int 5) (const_int 7)
9615 (const_int 9) (const_int 11)
9616 (const_int 13) (const_int 15)])))
9618 (vec_select:V8HI (match_dup 2)
9619 (parallel [(const_int 1) (const_int 3)
9620 (const_int 5) (const_int 7)
9621 (const_int 9) (const_int 11)
9622 (const_int 13) (const_int 15)]))))))]
9624 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9626 (define_insn "*avx2_pmaddwd"
9627 [(set (match_operand:V8SI 0 "register_operand" "=x")
9632 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9633 (parallel [(const_int 0) (const_int 2)
9634 (const_int 4) (const_int 6)
9635 (const_int 8) (const_int 10)
9636 (const_int 12) (const_int 14)])))
9639 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9640 (parallel [(const_int 0) (const_int 2)
9641 (const_int 4) (const_int 6)
9642 (const_int 8) (const_int 10)
9643 (const_int 12) (const_int 14)]))))
9646 (vec_select:V8HI (match_dup 1)
9647 (parallel [(const_int 1) (const_int 3)
9648 (const_int 5) (const_int 7)
9649 (const_int 9) (const_int 11)
9650 (const_int 13) (const_int 15)])))
9652 (vec_select:V8HI (match_dup 2)
9653 (parallel [(const_int 1) (const_int 3)
9654 (const_int 5) (const_int 7)
9655 (const_int 9) (const_int 11)
9656 (const_int 13) (const_int 15)]))))))]
9657 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9658 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9659 [(set_attr "type" "sseiadd")
9660 (set_attr "prefix" "vex")
9661 (set_attr "mode" "OI")])
9663 (define_expand "sse2_pmaddwd"
9664 [(set (match_operand:V4SI 0 "register_operand")
9669 (match_operand:V8HI 1 "nonimmediate_operand")
9670 (parallel [(const_int 0) (const_int 2)
9671 (const_int 4) (const_int 6)])))
9674 (match_operand:V8HI 2 "nonimmediate_operand")
9675 (parallel [(const_int 0) (const_int 2)
9676 (const_int 4) (const_int 6)]))))
9679 (vec_select:V4HI (match_dup 1)
9680 (parallel [(const_int 1) (const_int 3)
9681 (const_int 5) (const_int 7)])))
9683 (vec_select:V4HI (match_dup 2)
9684 (parallel [(const_int 1) (const_int 3)
9685 (const_int 5) (const_int 7)]))))))]
9687 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9689 (define_insn "*sse2_pmaddwd"
9690 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9695 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9696 (parallel [(const_int 0) (const_int 2)
9697 (const_int 4) (const_int 6)])))
9700 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9701 (parallel [(const_int 0) (const_int 2)
9702 (const_int 4) (const_int 6)]))))
9705 (vec_select:V4HI (match_dup 1)
9706 (parallel [(const_int 1) (const_int 3)
9707 (const_int 5) (const_int 7)])))
9709 (vec_select:V4HI (match_dup 2)
9710 (parallel [(const_int 1) (const_int 3)
9711 (const_int 5) (const_int 7)]))))))]
9712 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9714 pmaddwd\t{%2, %0|%0, %2}
9715 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9716 [(set_attr "isa" "noavx,avx")
9717 (set_attr "type" "sseiadd")
9718 (set_attr "atom_unit" "simul")
9719 (set_attr "prefix_data16" "1,*")
9720 (set_attr "prefix" "orig,vex")
9721 (set_attr "mode" "TI")])
9723 (define_insn "avx512dq_mul<mode>3<mask_name>"
9724 [(set (match_operand:VI8 0 "register_operand" "=v")
9726 (match_operand:VI8 1 "register_operand" "v")
9727 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9728 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9729 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9730 [(set_attr "type" "sseimul")
9731 (set_attr "prefix" "evex")
9732 (set_attr "mode" "<sseinsnmode>")])
9734 (define_expand "mul<mode>3<mask_name>"
9735 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9737 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9738 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9739 "TARGET_SSE2 && <mask_mode512bit_condition>"
9743 if (!nonimmediate_operand (operands[1], <MODE>mode))
9744 operands[1] = force_reg (<MODE>mode, operands[1]);
9745 if (!nonimmediate_operand (operands[2], <MODE>mode))
9746 operands[2] = force_reg (<MODE>mode, operands[2]);
9747 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9751 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9756 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9757 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9759 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
9760 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
9761 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9763 pmulld\t{%2, %0|%0, %2}
9764 pmulld\t{%2, %0|%0, %2}
9765 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9766 [(set_attr "isa" "noavx,noavx,avx")
9767 (set_attr "type" "sseimul")
9768 (set_attr "prefix_extra" "1")
9769 (set_attr "prefix" "<mask_prefix4>")
9770 (set_attr "btver2_decode" "vector,vector,vector")
9771 (set_attr "mode" "<sseinsnmode>")])
9773 (define_expand "mul<mode>3"
9774 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9775 (mult:VI8_AVX2_AVX512F
9776 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9777 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9780 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9784 (define_expand "vec_widen_<s>mult_hi_<mode>"
9785 [(match_operand:<sseunpackmode> 0 "register_operand")
9786 (any_extend:<sseunpackmode>
9787 (match_operand:VI124_AVX2 1 "register_operand"))
9788 (match_operand:VI124_AVX2 2 "register_operand")]
9791 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9796 (define_expand "vec_widen_<s>mult_lo_<mode>"
9797 [(match_operand:<sseunpackmode> 0 "register_operand")
9798 (any_extend:<sseunpackmode>
9799 (match_operand:VI124_AVX2 1 "register_operand"))
9800 (match_operand:VI124_AVX2 2 "register_operand")]
9803 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9808 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9809 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9810 (define_expand "vec_widen_smult_even_v4si"
9811 [(match_operand:V2DI 0 "register_operand")
9812 (match_operand:V4SI 1 "nonimmediate_operand")
9813 (match_operand:V4SI 2 "nonimmediate_operand")]
9816 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9821 (define_expand "vec_widen_<s>mult_odd_<mode>"
9822 [(match_operand:<sseunpackmode> 0 "register_operand")
9823 (any_extend:<sseunpackmode>
9824 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9825 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9828 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9833 (define_mode_attr SDOT_PMADD_SUF
9834 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9836 (define_expand "sdot_prod<mode>"
9837 [(match_operand:<sseunpackmode> 0 "register_operand")
9838 (match_operand:VI2_AVX2 1 "register_operand")
9839 (match_operand:VI2_AVX2 2 "register_operand")
9840 (match_operand:<sseunpackmode> 3 "register_operand")]
9843 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9844 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9845 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9846 gen_rtx_PLUS (<sseunpackmode>mode,
9851 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9852 ;; back together when madd is available.
9853 (define_expand "sdot_prodv4si"
9854 [(match_operand:V2DI 0 "register_operand")
9855 (match_operand:V4SI 1 "register_operand")
9856 (match_operand:V4SI 2 "register_operand")
9857 (match_operand:V2DI 3 "register_operand")]
9860 rtx t = gen_reg_rtx (V2DImode);
9861 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9862 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9866 (define_expand "usadv16qi"
9867 [(match_operand:V4SI 0 "register_operand")
9868 (match_operand:V16QI 1 "register_operand")
9869 (match_operand:V16QI 2 "nonimmediate_operand")
9870 (match_operand:V4SI 3 "nonimmediate_operand")]
9873 rtx t1 = gen_reg_rtx (V2DImode);
9874 rtx t2 = gen_reg_rtx (V4SImode);
9875 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9876 convert_move (t2, t1, 0);
9877 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9881 (define_expand "usadv32qi"
9882 [(match_operand:V8SI 0 "register_operand")
9883 (match_operand:V32QI 1 "register_operand")
9884 (match_operand:V32QI 2 "nonimmediate_operand")
9885 (match_operand:V8SI 3 "nonimmediate_operand")]
9888 rtx t1 = gen_reg_rtx (V4DImode);
9889 rtx t2 = gen_reg_rtx (V8SImode);
9890 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9891 convert_move (t2, t1, 0);
9892 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9896 (define_insn "ashr<mode>3"
9897 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9899 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9900 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9903 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9904 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9905 [(set_attr "isa" "noavx,avx")
9906 (set_attr "type" "sseishft")
9907 (set (attr "length_immediate")
9908 (if_then_else (match_operand 2 "const_int_operand")
9910 (const_string "0")))
9911 (set_attr "prefix_data16" "1,*")
9912 (set_attr "prefix" "orig,vex")
9913 (set_attr "mode" "<sseinsnmode>")])
9915 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9916 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9917 (ashiftrt:VI24_AVX512BW_1
9918 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9919 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9921 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9922 [(set_attr "type" "sseishft")
9923 (set (attr "length_immediate")
9924 (if_then_else (match_operand 2 "const_int_operand")
9926 (const_string "0")))
9927 (set_attr "mode" "<sseinsnmode>")])
9929 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
9930 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9932 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9933 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9935 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9936 [(set_attr "type" "sseishft")
9937 (set (attr "length_immediate")
9938 (if_then_else (match_operand 2 "const_int_operand")
9940 (const_string "0")))
9941 (set_attr "mode" "TI")])
9943 (define_insn "ashr<mode>3<mask_name>"
9944 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9945 (ashiftrt:VI248_AVX512BW_AVX512VL
9946 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9947 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9949 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9950 [(set_attr "type" "sseishft")
9951 (set (attr "length_immediate")
9952 (if_then_else (match_operand 2 "const_int_operand")
9954 (const_string "0")))
9955 (set_attr "mode" "<sseinsnmode>")])
9957 (define_insn "<shift_insn><mode>3<mask_name>"
9958 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9959 (any_lshift:VI2_AVX2_AVX512BW
9960 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9961 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9962 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9964 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9965 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9966 [(set_attr "isa" "noavx,avx")
9967 (set_attr "type" "sseishft")
9968 (set (attr "length_immediate")
9969 (if_then_else (match_operand 2 "const_int_operand")
9971 (const_string "0")))
9972 (set_attr "prefix_data16" "1,*")
9973 (set_attr "prefix" "orig,vex")
9974 (set_attr "mode" "<sseinsnmode>")])
9976 (define_insn "<shift_insn><mode>3<mask_name>"
9977 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
9978 (any_lshift:VI48_AVX2
9979 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
9980 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9981 "TARGET_SSE2 && <mask_mode512bit_condition>"
9983 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9984 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9985 [(set_attr "isa" "noavx,avx")
9986 (set_attr "type" "sseishft")
9987 (set (attr "length_immediate")
9988 (if_then_else (match_operand 2 "const_int_operand")
9990 (const_string "0")))
9991 (set_attr "prefix_data16" "1,*")
9992 (set_attr "prefix" "orig,vex")
9993 (set_attr "mode" "<sseinsnmode>")])
9995 (define_insn "<shift_insn><mode>3<mask_name>"
9996 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9997 (any_lshift:VI48_512
9998 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
9999 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10000 "TARGET_AVX512F && <mask_mode512bit_condition>"
10001 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10002 [(set_attr "isa" "avx512f")
10003 (set_attr "type" "sseishft")
10004 (set (attr "length_immediate")
10005 (if_then_else (match_operand 2 "const_int_operand")
10007 (const_string "0")))
10008 (set_attr "prefix" "evex")
10009 (set_attr "mode" "<sseinsnmode>")])
10012 (define_expand "vec_shl_<mode>"
10013 [(set (match_dup 3)
10015 (match_operand:VI_128 1 "register_operand")
10016 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10017 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10020 operands[1] = gen_lowpart (V1TImode, operands[1]);
10021 operands[3] = gen_reg_rtx (V1TImode);
10022 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10025 (define_insn "<sse2_avx2>_ashl<mode>3"
10026 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10028 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10029 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10032 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10034 switch (which_alternative)
10037 return "pslldq\t{%2, %0|%0, %2}";
10039 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10041 gcc_unreachable ();
10044 [(set_attr "isa" "noavx,avx")
10045 (set_attr "type" "sseishft")
10046 (set_attr "length_immediate" "1")
10047 (set_attr "prefix_data16" "1,*")
10048 (set_attr "prefix" "orig,vex")
10049 (set_attr "mode" "<sseinsnmode>")])
10051 (define_expand "vec_shr_<mode>"
10052 [(set (match_dup 3)
10054 (match_operand:VI_128 1 "register_operand")
10055 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10056 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10059 operands[1] = gen_lowpart (V1TImode, operands[1]);
10060 operands[3] = gen_reg_rtx (V1TImode);
10061 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10064 (define_insn "<sse2_avx2>_lshr<mode>3"
10065 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10066 (lshiftrt:VIMAX_AVX2
10067 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10068 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10071 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10073 switch (which_alternative)
10076 return "psrldq\t{%2, %0|%0, %2}";
10078 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10080 gcc_unreachable ();
10083 [(set_attr "isa" "noavx,avx")
10084 (set_attr "type" "sseishft")
10085 (set_attr "length_immediate" "1")
10086 (set_attr "atom_unit" "sishuf")
10087 (set_attr "prefix_data16" "1,*")
10088 (set_attr "prefix" "orig,vex")
10089 (set_attr "mode" "<sseinsnmode>")])
10091 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10092 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10093 (any_rotate:VI48_AVX512VL
10094 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10095 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10097 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10098 [(set_attr "prefix" "evex")
10099 (set_attr "mode" "<sseinsnmode>")])
10101 (define_insn "<avx512>_<rotate><mode><mask_name>"
10102 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10103 (any_rotate:VI48_AVX512VL
10104 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10105 (match_operand:SI 2 "const_0_to_255_operand")))]
10107 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10108 [(set_attr "prefix" "evex")
10109 (set_attr "mode" "<sseinsnmode>")])
10111 (define_expand "<code><mode>3"
10112 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10113 (maxmin:VI124_256_AVX512F_AVX512BW
10114 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10115 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10117 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10119 (define_insn "*avx2_<code><mode>3"
10120 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10122 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10123 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10124 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10125 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10126 [(set_attr "type" "sseiadd")
10127 (set_attr "prefix_extra" "1")
10128 (set_attr "prefix" "vex")
10129 (set_attr "mode" "OI")])
10131 (define_expand "<code><mode>3_mask"
10132 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10133 (vec_merge:VI48_AVX512VL
10134 (maxmin:VI48_AVX512VL
10135 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10136 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10137 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10138 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10140 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10142 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10143 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10144 (maxmin:VI48_AVX512VL
10145 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10146 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10147 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10148 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10149 [(set_attr "type" "sseiadd")
10150 (set_attr "prefix_extra" "1")
10151 (set_attr "prefix" "maybe_evex")
10152 (set_attr "mode" "<sseinsnmode>")])
10154 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10155 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10156 (maxmin:VI12_AVX512VL
10157 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10158 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10160 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10161 [(set_attr "type" "sseiadd")
10162 (set_attr "prefix" "evex")
10163 (set_attr "mode" "<sseinsnmode>")])
10165 (define_expand "<code><mode>3"
10166 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10167 (maxmin:VI8_AVX2_AVX512BW
10168 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10169 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10173 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10174 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10177 enum rtx_code code;
10182 xops[0] = operands[0];
10184 if (<CODE> == SMAX || <CODE> == UMAX)
10186 xops[1] = operands[1];
10187 xops[2] = operands[2];
10191 xops[1] = operands[2];
10192 xops[2] = operands[1];
10195 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10197 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10198 xops[4] = operands[1];
10199 xops[5] = operands[2];
10201 ok = ix86_expand_int_vcond (xops);
10207 (define_expand "<code><mode>3"
10208 [(set (match_operand:VI124_128 0 "register_operand")
10210 (match_operand:VI124_128 1 "nonimmediate_operand")
10211 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10214 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10215 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10221 xops[0] = operands[0];
10222 operands[1] = force_reg (<MODE>mode, operands[1]);
10223 operands[2] = force_reg (<MODE>mode, operands[2]);
10225 if (<CODE> == SMAX)
10227 xops[1] = operands[1];
10228 xops[2] = operands[2];
10232 xops[1] = operands[2];
10233 xops[2] = operands[1];
10236 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10237 xops[4] = operands[1];
10238 xops[5] = operands[2];
10240 ok = ix86_expand_int_vcond (xops);
10246 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10247 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10249 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10250 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10252 && <mask_mode512bit_condition>
10253 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10255 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10256 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10257 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10258 [(set_attr "isa" "noavx,noavx,avx")
10259 (set_attr "type" "sseiadd")
10260 (set_attr "prefix_extra" "1,1,*")
10261 (set_attr "prefix" "orig,orig,vex")
10262 (set_attr "mode" "TI")])
10264 (define_insn "*<code>v8hi3"
10265 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10267 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10268 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10269 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10271 p<maxmin_int>w\t{%2, %0|%0, %2}
10272 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10273 [(set_attr "isa" "noavx,avx")
10274 (set_attr "type" "sseiadd")
10275 (set_attr "prefix_data16" "1,*")
10276 (set_attr "prefix_extra" "*,1")
10277 (set_attr "prefix" "orig,vex")
10278 (set_attr "mode" "TI")])
10280 (define_expand "<code><mode>3"
10281 [(set (match_operand:VI124_128 0 "register_operand")
10283 (match_operand:VI124_128 1 "nonimmediate_operand")
10284 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10287 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10288 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10289 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10291 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10292 operands[1] = force_reg (<MODE>mode, operands[1]);
10293 if (rtx_equal_p (op3, op2))
10294 op3 = gen_reg_rtx (V8HImode);
10295 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10296 emit_insn (gen_addv8hi3 (op0, op3, op2));
10304 operands[1] = force_reg (<MODE>mode, operands[1]);
10305 operands[2] = force_reg (<MODE>mode, operands[2]);
10307 xops[0] = operands[0];
10309 if (<CODE> == UMAX)
10311 xops[1] = operands[1];
10312 xops[2] = operands[2];
10316 xops[1] = operands[2];
10317 xops[2] = operands[1];
10320 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10321 xops[4] = operands[1];
10322 xops[5] = operands[2];
10324 ok = ix86_expand_int_vcond (xops);
10330 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10331 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10333 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10334 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10336 && <mask_mode512bit_condition>
10337 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10339 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10340 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10341 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10342 [(set_attr "isa" "noavx,noavx,avx")
10343 (set_attr "type" "sseiadd")
10344 (set_attr "prefix_extra" "1,1,*")
10345 (set_attr "prefix" "orig,orig,vex")
10346 (set_attr "mode" "TI")])
10348 (define_insn "*<code>v16qi3"
10349 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10351 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10352 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10353 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10355 p<maxmin_int>b\t{%2, %0|%0, %2}
10356 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10357 [(set_attr "isa" "noavx,avx")
10358 (set_attr "type" "sseiadd")
10359 (set_attr "prefix_data16" "1,*")
10360 (set_attr "prefix_extra" "*,1")
10361 (set_attr "prefix" "orig,vex")
10362 (set_attr "mode" "TI")])
10364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10366 ;; Parallel integral comparisons
10368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10370 (define_expand "avx2_eq<mode>3"
10371 [(set (match_operand:VI_256 0 "register_operand")
10373 (match_operand:VI_256 1 "nonimmediate_operand")
10374 (match_operand:VI_256 2 "nonimmediate_operand")))]
10376 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10378 (define_insn "*avx2_eq<mode>3"
10379 [(set (match_operand:VI_256 0 "register_operand" "=x")
10381 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10382 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10383 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10384 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10385 [(set_attr "type" "ssecmp")
10386 (set_attr "prefix_extra" "1")
10387 (set_attr "prefix" "vex")
10388 (set_attr "mode" "OI")])
10390 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10391 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10392 (unspec:<avx512fmaskmode>
10393 [(match_operand:VI12_AVX512VL 1 "register_operand")
10394 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10395 UNSPEC_MASKED_EQ))]
10397 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10399 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10400 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10401 (unspec:<avx512fmaskmode>
10402 [(match_operand:VI48_AVX512VL 1 "register_operand")
10403 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10404 UNSPEC_MASKED_EQ))]
10406 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10408 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10409 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10410 (unspec:<avx512fmaskmode>
10411 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10412 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10413 UNSPEC_MASKED_EQ))]
10414 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10415 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10416 [(set_attr "type" "ssecmp")
10417 (set_attr "prefix_extra" "1")
10418 (set_attr "prefix" "evex")
10419 (set_attr "mode" "<sseinsnmode>")])
10421 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10422 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10423 (unspec:<avx512fmaskmode>
10424 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10425 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10426 UNSPEC_MASKED_EQ))]
10427 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10428 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10429 [(set_attr "type" "ssecmp")
10430 (set_attr "prefix_extra" "1")
10431 (set_attr "prefix" "evex")
10432 (set_attr "mode" "<sseinsnmode>")])
10434 (define_insn "*sse4_1_eqv2di3"
10435 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10437 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10438 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10439 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10441 pcmpeqq\t{%2, %0|%0, %2}
10442 pcmpeqq\t{%2, %0|%0, %2}
10443 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10444 [(set_attr "isa" "noavx,noavx,avx")
10445 (set_attr "type" "ssecmp")
10446 (set_attr "prefix_extra" "1")
10447 (set_attr "prefix" "orig,orig,vex")
10448 (set_attr "mode" "TI")])
10450 (define_insn "*sse2_eq<mode>3"
10451 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10453 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10454 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10455 "TARGET_SSE2 && !TARGET_XOP
10456 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10458 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10459 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10460 [(set_attr "isa" "noavx,avx")
10461 (set_attr "type" "ssecmp")
10462 (set_attr "prefix_data16" "1,*")
10463 (set_attr "prefix" "orig,vex")
10464 (set_attr "mode" "TI")])
10466 (define_expand "sse2_eq<mode>3"
10467 [(set (match_operand:VI124_128 0 "register_operand")
10469 (match_operand:VI124_128 1 "nonimmediate_operand")
10470 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10471 "TARGET_SSE2 && !TARGET_XOP "
10472 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10474 (define_expand "sse4_1_eqv2di3"
10475 [(set (match_operand:V2DI 0 "register_operand")
10477 (match_operand:V2DI 1 "nonimmediate_operand")
10478 (match_operand:V2DI 2 "nonimmediate_operand")))]
10480 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10482 (define_insn "sse4_2_gtv2di3"
10483 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10485 (match_operand:V2DI 1 "register_operand" "0,0,x")
10486 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10489 pcmpgtq\t{%2, %0|%0, %2}
10490 pcmpgtq\t{%2, %0|%0, %2}
10491 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10492 [(set_attr "isa" "noavx,noavx,avx")
10493 (set_attr "type" "ssecmp")
10494 (set_attr "prefix_extra" "1")
10495 (set_attr "prefix" "orig,orig,vex")
10496 (set_attr "mode" "TI")])
10498 (define_insn "avx2_gt<mode>3"
10499 [(set (match_operand:VI_256 0 "register_operand" "=x")
10501 (match_operand:VI_256 1 "register_operand" "x")
10502 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10504 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10505 [(set_attr "type" "ssecmp")
10506 (set_attr "prefix_extra" "1")
10507 (set_attr "prefix" "vex")
10508 (set_attr "mode" "OI")])
10510 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10511 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10512 (unspec:<avx512fmaskmode>
10513 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10514 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10516 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10517 [(set_attr "type" "ssecmp")
10518 (set_attr "prefix_extra" "1")
10519 (set_attr "prefix" "evex")
10520 (set_attr "mode" "<sseinsnmode>")])
10522 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10523 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10524 (unspec:<avx512fmaskmode>
10525 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10526 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10528 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10529 [(set_attr "type" "ssecmp")
10530 (set_attr "prefix_extra" "1")
10531 (set_attr "prefix" "evex")
10532 (set_attr "mode" "<sseinsnmode>")])
10534 (define_insn "sse2_gt<mode>3"
10535 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10537 (match_operand:VI124_128 1 "register_operand" "0,x")
10538 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10539 "TARGET_SSE2 && !TARGET_XOP"
10541 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10542 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10543 [(set_attr "isa" "noavx,avx")
10544 (set_attr "type" "ssecmp")
10545 (set_attr "prefix_data16" "1,*")
10546 (set_attr "prefix" "orig,vex")
10547 (set_attr "mode" "TI")])
10549 (define_expand "vcond<V_512:mode><VI_512:mode>"
10550 [(set (match_operand:V_512 0 "register_operand")
10551 (if_then_else:V_512
10552 (match_operator 3 ""
10553 [(match_operand:VI_512 4 "nonimmediate_operand")
10554 (match_operand:VI_512 5 "general_operand")])
10555 (match_operand:V_512 1)
10556 (match_operand:V_512 2)))]
10558 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10559 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10561 bool ok = ix86_expand_int_vcond (operands);
10566 (define_expand "vcond<V_256:mode><VI_256:mode>"
10567 [(set (match_operand:V_256 0 "register_operand")
10568 (if_then_else:V_256
10569 (match_operator 3 ""
10570 [(match_operand:VI_256 4 "nonimmediate_operand")
10571 (match_operand:VI_256 5 "general_operand")])
10572 (match_operand:V_256 1)
10573 (match_operand:V_256 2)))]
10575 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10576 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10578 bool ok = ix86_expand_int_vcond (operands);
10583 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10584 [(set (match_operand:V_128 0 "register_operand")
10585 (if_then_else:V_128
10586 (match_operator 3 ""
10587 [(match_operand:VI124_128 4 "nonimmediate_operand")
10588 (match_operand:VI124_128 5 "general_operand")])
10589 (match_operand:V_128 1)
10590 (match_operand:V_128 2)))]
10592 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10593 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10595 bool ok = ix86_expand_int_vcond (operands);
10600 (define_expand "vcond<VI8F_128:mode>v2di"
10601 [(set (match_operand:VI8F_128 0 "register_operand")
10602 (if_then_else:VI8F_128
10603 (match_operator 3 ""
10604 [(match_operand:V2DI 4 "nonimmediate_operand")
10605 (match_operand:V2DI 5 "general_operand")])
10606 (match_operand:VI8F_128 1)
10607 (match_operand:VI8F_128 2)))]
10610 bool ok = ix86_expand_int_vcond (operands);
10615 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10616 [(set (match_operand:V_512 0 "register_operand")
10617 (if_then_else:V_512
10618 (match_operator 3 ""
10619 [(match_operand:VI_512 4 "nonimmediate_operand")
10620 (match_operand:VI_512 5 "nonimmediate_operand")])
10621 (match_operand:V_512 1 "general_operand")
10622 (match_operand:V_512 2 "general_operand")))]
10624 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10625 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10627 bool ok = ix86_expand_int_vcond (operands);
10632 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10633 [(set (match_operand:V_256 0 "register_operand")
10634 (if_then_else:V_256
10635 (match_operator 3 ""
10636 [(match_operand:VI_256 4 "nonimmediate_operand")
10637 (match_operand:VI_256 5 "nonimmediate_operand")])
10638 (match_operand:V_256 1 "general_operand")
10639 (match_operand:V_256 2 "general_operand")))]
10641 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10642 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10644 bool ok = ix86_expand_int_vcond (operands);
10649 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10650 [(set (match_operand:V_128 0 "register_operand")
10651 (if_then_else:V_128
10652 (match_operator 3 ""
10653 [(match_operand:VI124_128 4 "nonimmediate_operand")
10654 (match_operand:VI124_128 5 "nonimmediate_operand")])
10655 (match_operand:V_128 1 "general_operand")
10656 (match_operand:V_128 2 "general_operand")))]
10658 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10659 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10661 bool ok = ix86_expand_int_vcond (operands);
10666 (define_expand "vcondu<VI8F_128:mode>v2di"
10667 [(set (match_operand:VI8F_128 0 "register_operand")
10668 (if_then_else:VI8F_128
10669 (match_operator 3 ""
10670 [(match_operand:V2DI 4 "nonimmediate_operand")
10671 (match_operand:V2DI 5 "nonimmediate_operand")])
10672 (match_operand:VI8F_128 1 "general_operand")
10673 (match_operand:VI8F_128 2 "general_operand")))]
10676 bool ok = ix86_expand_int_vcond (operands);
10681 (define_mode_iterator VEC_PERM_AVX2
10682 [V16QI V8HI V4SI V2DI V4SF V2DF
10683 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10684 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10685 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10686 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10687 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10688 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10690 (define_expand "vec_perm<mode>"
10691 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10692 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10693 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10694 (match_operand:<sseintvecmode> 3 "register_operand")]
10695 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10697 ix86_expand_vec_perm (operands);
10701 (define_mode_iterator VEC_PERM_CONST
10702 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10703 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10704 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10705 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10706 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10707 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10708 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10709 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10710 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10712 (define_expand "vec_perm_const<mode>"
10713 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10714 (match_operand:VEC_PERM_CONST 1 "register_operand")
10715 (match_operand:VEC_PERM_CONST 2 "register_operand")
10716 (match_operand:<sseintvecmode> 3)]
10719 if (ix86_expand_vec_perm_const (operands))
10725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10727 ;; Parallel bitwise logical operations
10729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10731 (define_expand "one_cmpl<mode>2"
10732 [(set (match_operand:VI 0 "register_operand")
10733 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10737 int i, n = GET_MODE_NUNITS (<MODE>mode);
10738 rtvec v = rtvec_alloc (n);
10740 for (i = 0; i < n; ++i)
10741 RTVEC_ELT (v, i) = constm1_rtx;
10743 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10746 (define_expand "<sse2_avx2>_andnot<mode>3"
10747 [(set (match_operand:VI_AVX2 0 "register_operand")
10749 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10750 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10753 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10754 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10755 (vec_merge:VI48_AVX512VL
10758 (match_operand:VI48_AVX512VL 1 "register_operand"))
10759 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10760 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10761 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10764 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10765 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10766 (vec_merge:VI12_AVX512VL
10769 (match_operand:VI12_AVX512VL 1 "register_operand"))
10770 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10771 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10772 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10775 (define_insn "*andnot<mode>3"
10776 [(set (match_operand:VI 0 "register_operand" "=x,v")
10778 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10779 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10782 static char buf[64];
10786 switch (get_attr_mode (insn))
10789 gcc_assert (TARGET_AVX512F);
10791 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10793 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10794 switch (<MODE>mode)
10798 if (TARGET_AVX512F)
10800 tmp = "pandn<ssemodesuffix>";
10807 if (TARGET_AVX512VL)
10809 tmp = "pandn<ssemodesuffix>";
10813 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10818 gcc_assert (TARGET_AVX512F);
10820 gcc_assert (TARGET_AVX);
10822 gcc_assert (TARGET_SSE);
10828 gcc_unreachable ();
10831 switch (which_alternative)
10834 ops = "%s\t{%%2, %%0|%%0, %%2}";
10837 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10840 gcc_unreachable ();
10843 snprintf (buf, sizeof (buf), ops, tmp);
10846 [(set_attr "isa" "noavx,avx")
10847 (set_attr "type" "sselog")
10848 (set (attr "prefix_data16")
10850 (and (eq_attr "alternative" "0")
10851 (eq_attr "mode" "TI"))
10853 (const_string "*")))
10854 (set_attr "prefix" "orig,vex")
10856 (cond [(and (match_test "<MODE_SIZE> == 16")
10857 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10858 (const_string "<ssePSmode>")
10859 (match_test "TARGET_AVX2")
10860 (const_string "<sseinsnmode>")
10861 (match_test "TARGET_AVX")
10863 (match_test "<MODE_SIZE> > 16")
10864 (const_string "V8SF")
10865 (const_string "<sseinsnmode>"))
10866 (ior (not (match_test "TARGET_SSE2"))
10867 (match_test "optimize_function_for_size_p (cfun)"))
10868 (const_string "V4SF")
10870 (const_string "<sseinsnmode>")))])
10872 (define_insn "*andnot<mode>3_mask"
10873 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10874 (vec_merge:VI48_AVX512VL
10877 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10878 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10879 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10880 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10882 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10883 [(set_attr "type" "sselog")
10884 (set_attr "prefix" "evex")
10885 (set_attr "mode" "<sseinsnmode>")])
10887 (define_insn "*andnot<mode>3_mask"
10888 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10889 (vec_merge:VI12_AVX512VL
10892 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
10893 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10894 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10895 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10897 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10898 [(set_attr "type" "sselog")
10899 (set_attr "prefix" "evex")
10900 (set_attr "mode" "<sseinsnmode>")])
10902 (define_expand "<code><mode>3"
10903 [(set (match_operand:VI 0 "register_operand")
10905 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10906 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10909 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10913 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10914 [(set (match_operand:VI 0 "register_operand" "=x,v")
10916 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10917 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10918 "TARGET_SSE && <mask_mode512bit_condition>
10919 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10921 static char buf[64];
10925 switch (get_attr_mode (insn))
10928 gcc_assert (TARGET_AVX512F);
10930 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10932 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10933 switch (<MODE>mode)
10937 if (TARGET_AVX512F)
10939 tmp = "p<logic><ssemodesuffix>";
10946 if (TARGET_AVX512VL)
10948 tmp = "p<logic><ssemodesuffix>";
10952 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10957 gcc_assert (TARGET_AVX512F);
10959 gcc_assert (TARGET_AVX);
10961 gcc_assert (TARGET_SSE);
10967 gcc_unreachable ();
10970 switch (which_alternative)
10973 ops = "%s\t{%%2, %%0|%%0, %%2}";
10976 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10979 gcc_unreachable ();
10982 snprintf (buf, sizeof (buf), ops, tmp);
10985 [(set_attr "isa" "noavx,avx")
10986 (set_attr "type" "sselog")
10987 (set (attr "prefix_data16")
10989 (and (eq_attr "alternative" "0")
10990 (eq_attr "mode" "TI"))
10992 (const_string "*")))
10993 (set_attr "prefix" "<mask_prefix3>")
10995 (cond [(and (match_test "<MODE_SIZE> == 16")
10996 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10997 (const_string "<ssePSmode>")
10998 (match_test "TARGET_AVX2")
10999 (const_string "<sseinsnmode>")
11000 (match_test "TARGET_AVX")
11002 (match_test "<MODE_SIZE> > 16")
11003 (const_string "V8SF")
11004 (const_string "<sseinsnmode>"))
11005 (ior (not (match_test "TARGET_SSE2"))
11006 (match_test "optimize_function_for_size_p (cfun)"))
11007 (const_string "V4SF")
11009 (const_string "<sseinsnmode>")))])
11011 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11012 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11013 (unspec:<avx512fmaskmode>
11014 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11015 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11018 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11019 [(set_attr "prefix" "evex")
11020 (set_attr "mode" "<sseinsnmode>")])
11022 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11023 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11024 (unspec:<avx512fmaskmode>
11025 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11026 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11029 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11030 [(set_attr "prefix" "evex")
11031 (set_attr "mode" "<sseinsnmode>")])
11033 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11034 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11035 (unspec:<avx512fmaskmode>
11036 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11037 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11040 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11041 [(set_attr "prefix" "evex")
11042 (set_attr "mode" "<sseinsnmode>")])
11044 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11045 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11046 (unspec:<avx512fmaskmode>
11047 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11048 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11051 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11052 [(set_attr "prefix" "evex")
11053 (set_attr "mode" "<sseinsnmode>")])
11055 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11057 ;; Parallel integral element swizzling
11059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11061 (define_expand "vec_pack_trunc_<mode>"
11062 [(match_operand:<ssepackmode> 0 "register_operand")
11063 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
11064 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
11067 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11068 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11069 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11073 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11074 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11075 (vec_concat:VI1_AVX512
11076 (ss_truncate:<ssehalfvecmode>
11077 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11078 (ss_truncate:<ssehalfvecmode>
11079 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11080 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11082 packsswb\t{%2, %0|%0, %2}
11083 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11084 [(set_attr "isa" "noavx,avx")
11085 (set_attr "type" "sselog")
11086 (set_attr "prefix_data16" "1,*")
11087 (set_attr "prefix" "orig,maybe_evex")
11088 (set_attr "mode" "<sseinsnmode>")])
11090 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11091 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11092 (vec_concat:VI2_AVX2
11093 (ss_truncate:<ssehalfvecmode>
11094 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11095 (ss_truncate:<ssehalfvecmode>
11096 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11097 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11099 packssdw\t{%2, %0|%0, %2}
11100 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11101 [(set_attr "isa" "noavx,avx")
11102 (set_attr "type" "sselog")
11103 (set_attr "prefix_data16" "1,*")
11104 (set_attr "prefix" "orig,vex")
11105 (set_attr "mode" "<sseinsnmode>")])
11107 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11108 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11109 (vec_concat:VI1_AVX512
11110 (us_truncate:<ssehalfvecmode>
11111 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11112 (us_truncate:<ssehalfvecmode>
11113 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11114 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11116 packuswb\t{%2, %0|%0, %2}
11117 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11118 [(set_attr "isa" "noavx,avx")
11119 (set_attr "type" "sselog")
11120 (set_attr "prefix_data16" "1,*")
11121 (set_attr "prefix" "orig,vex")
11122 (set_attr "mode" "<sseinsnmode>")])
11124 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11125 [(set (match_operand:V64QI 0 "register_operand" "=v")
11128 (match_operand:V64QI 1 "register_operand" "v")
11129 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11130 (parallel [(const_int 8) (const_int 72)
11131 (const_int 9) (const_int 73)
11132 (const_int 10) (const_int 74)
11133 (const_int 11) (const_int 75)
11134 (const_int 12) (const_int 76)
11135 (const_int 13) (const_int 77)
11136 (const_int 14) (const_int 78)
11137 (const_int 15) (const_int 79)
11138 (const_int 24) (const_int 88)
11139 (const_int 25) (const_int 89)
11140 (const_int 26) (const_int 90)
11141 (const_int 27) (const_int 91)
11142 (const_int 28) (const_int 92)
11143 (const_int 29) (const_int 93)
11144 (const_int 30) (const_int 94)
11145 (const_int 31) (const_int 95)
11146 (const_int 40) (const_int 104)
11147 (const_int 41) (const_int 105)
11148 (const_int 42) (const_int 106)
11149 (const_int 43) (const_int 107)
11150 (const_int 44) (const_int 108)
11151 (const_int 45) (const_int 109)
11152 (const_int 46) (const_int 110)
11153 (const_int 47) (const_int 111)
11154 (const_int 56) (const_int 120)
11155 (const_int 57) (const_int 121)
11156 (const_int 58) (const_int 122)
11157 (const_int 59) (const_int 123)
11158 (const_int 60) (const_int 124)
11159 (const_int 61) (const_int 125)
11160 (const_int 62) (const_int 126)
11161 (const_int 63) (const_int 127)])))]
11163 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11164 [(set_attr "type" "sselog")
11165 (set_attr "prefix" "evex")
11166 (set_attr "mode" "XI")])
11168 (define_insn "avx2_interleave_highv32qi<mask_name>"
11169 [(set (match_operand:V32QI 0 "register_operand" "=v")
11172 (match_operand:V32QI 1 "register_operand" "v")
11173 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11174 (parallel [(const_int 8) (const_int 40)
11175 (const_int 9) (const_int 41)
11176 (const_int 10) (const_int 42)
11177 (const_int 11) (const_int 43)
11178 (const_int 12) (const_int 44)
11179 (const_int 13) (const_int 45)
11180 (const_int 14) (const_int 46)
11181 (const_int 15) (const_int 47)
11182 (const_int 24) (const_int 56)
11183 (const_int 25) (const_int 57)
11184 (const_int 26) (const_int 58)
11185 (const_int 27) (const_int 59)
11186 (const_int 28) (const_int 60)
11187 (const_int 29) (const_int 61)
11188 (const_int 30) (const_int 62)
11189 (const_int 31) (const_int 63)])))]
11190 "TARGET_AVX2 && <mask_avx512vl_condition>"
11191 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11192 [(set_attr "type" "sselog")
11193 (set_attr "prefix" "<mask_prefix>")
11194 (set_attr "mode" "OI")])
11196 (define_insn "vec_interleave_highv16qi<mask_name>"
11197 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11200 (match_operand:V16QI 1 "register_operand" "0,v")
11201 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11202 (parallel [(const_int 8) (const_int 24)
11203 (const_int 9) (const_int 25)
11204 (const_int 10) (const_int 26)
11205 (const_int 11) (const_int 27)
11206 (const_int 12) (const_int 28)
11207 (const_int 13) (const_int 29)
11208 (const_int 14) (const_int 30)
11209 (const_int 15) (const_int 31)])))]
11210 "TARGET_SSE2 && <mask_avx512vl_condition>"
11212 punpckhbw\t{%2, %0|%0, %2}
11213 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11214 [(set_attr "isa" "noavx,avx")
11215 (set_attr "type" "sselog")
11216 (set_attr "prefix_data16" "1,*")
11217 (set_attr "prefix" "orig,<mask_prefix>")
11218 (set_attr "mode" "TI")])
11220 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11221 [(set (match_operand:V64QI 0 "register_operand" "=v")
11224 (match_operand:V64QI 1 "register_operand" "v")
11225 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11226 (parallel [(const_int 0) (const_int 64)
11227 (const_int 1) (const_int 65)
11228 (const_int 2) (const_int 66)
11229 (const_int 3) (const_int 67)
11230 (const_int 4) (const_int 68)
11231 (const_int 5) (const_int 69)
11232 (const_int 6) (const_int 70)
11233 (const_int 7) (const_int 71)
11234 (const_int 16) (const_int 80)
11235 (const_int 17) (const_int 81)
11236 (const_int 18) (const_int 82)
11237 (const_int 19) (const_int 83)
11238 (const_int 20) (const_int 84)
11239 (const_int 21) (const_int 85)
11240 (const_int 22) (const_int 86)
11241 (const_int 23) (const_int 87)
11242 (const_int 32) (const_int 96)
11243 (const_int 33) (const_int 97)
11244 (const_int 34) (const_int 98)
11245 (const_int 35) (const_int 99)
11246 (const_int 36) (const_int 100)
11247 (const_int 37) (const_int 101)
11248 (const_int 38) (const_int 102)
11249 (const_int 39) (const_int 103)
11250 (const_int 48) (const_int 112)
11251 (const_int 49) (const_int 113)
11252 (const_int 50) (const_int 114)
11253 (const_int 51) (const_int 115)
11254 (const_int 52) (const_int 116)
11255 (const_int 53) (const_int 117)
11256 (const_int 54) (const_int 118)
11257 (const_int 55) (const_int 119)])))]
11259 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11260 [(set_attr "type" "sselog")
11261 (set_attr "prefix" "evex")
11262 (set_attr "mode" "XI")])
11264 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11265 [(set (match_operand:V32QI 0 "register_operand" "=v")
11268 (match_operand:V32QI 1 "register_operand" "v")
11269 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11270 (parallel [(const_int 0) (const_int 32)
11271 (const_int 1) (const_int 33)
11272 (const_int 2) (const_int 34)
11273 (const_int 3) (const_int 35)
11274 (const_int 4) (const_int 36)
11275 (const_int 5) (const_int 37)
11276 (const_int 6) (const_int 38)
11277 (const_int 7) (const_int 39)
11278 (const_int 16) (const_int 48)
11279 (const_int 17) (const_int 49)
11280 (const_int 18) (const_int 50)
11281 (const_int 19) (const_int 51)
11282 (const_int 20) (const_int 52)
11283 (const_int 21) (const_int 53)
11284 (const_int 22) (const_int 54)
11285 (const_int 23) (const_int 55)])))]
11286 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11287 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11288 [(set_attr "type" "sselog")
11289 (set_attr "prefix" "maybe_vex")
11290 (set_attr "mode" "OI")])
11292 (define_insn "vec_interleave_lowv16qi<mask_name>"
11293 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11296 (match_operand:V16QI 1 "register_operand" "0,v")
11297 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11298 (parallel [(const_int 0) (const_int 16)
11299 (const_int 1) (const_int 17)
11300 (const_int 2) (const_int 18)
11301 (const_int 3) (const_int 19)
11302 (const_int 4) (const_int 20)
11303 (const_int 5) (const_int 21)
11304 (const_int 6) (const_int 22)
11305 (const_int 7) (const_int 23)])))]
11306 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11308 punpcklbw\t{%2, %0|%0, %2}
11309 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11310 [(set_attr "isa" "noavx,avx")
11311 (set_attr "type" "sselog")
11312 (set_attr "prefix_data16" "1,*")
11313 (set_attr "prefix" "orig,vex")
11314 (set_attr "mode" "TI")])
11316 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11317 [(set (match_operand:V32HI 0 "register_operand" "=v")
11320 (match_operand:V32HI 1 "register_operand" "v")
11321 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11322 (parallel [(const_int 4) (const_int 36)
11323 (const_int 5) (const_int 37)
11324 (const_int 6) (const_int 38)
11325 (const_int 7) (const_int 39)
11326 (const_int 12) (const_int 44)
11327 (const_int 13) (const_int 45)
11328 (const_int 14) (const_int 46)
11329 (const_int 15) (const_int 47)
11330 (const_int 20) (const_int 52)
11331 (const_int 21) (const_int 53)
11332 (const_int 22) (const_int 54)
11333 (const_int 23) (const_int 55)
11334 (const_int 28) (const_int 60)
11335 (const_int 29) (const_int 61)
11336 (const_int 30) (const_int 62)
11337 (const_int 31) (const_int 63)])))]
11339 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11340 [(set_attr "type" "sselog")
11341 (set_attr "prefix" "evex")
11342 (set_attr "mode" "XI")])
11344 (define_insn "avx2_interleave_highv16hi<mask_name>"
11345 [(set (match_operand:V16HI 0 "register_operand" "=v")
11348 (match_operand:V16HI 1 "register_operand" "v")
11349 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11350 (parallel [(const_int 4) (const_int 20)
11351 (const_int 5) (const_int 21)
11352 (const_int 6) (const_int 22)
11353 (const_int 7) (const_int 23)
11354 (const_int 12) (const_int 28)
11355 (const_int 13) (const_int 29)
11356 (const_int 14) (const_int 30)
11357 (const_int 15) (const_int 31)])))]
11358 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11359 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11360 [(set_attr "type" "sselog")
11361 (set_attr "prefix" "maybe_evex")
11362 (set_attr "mode" "OI")])
11364 (define_insn "vec_interleave_highv8hi<mask_name>"
11365 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11368 (match_operand:V8HI 1 "register_operand" "0,v")
11369 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11370 (parallel [(const_int 4) (const_int 12)
11371 (const_int 5) (const_int 13)
11372 (const_int 6) (const_int 14)
11373 (const_int 7) (const_int 15)])))]
11374 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11376 punpckhwd\t{%2, %0|%0, %2}
11377 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11378 [(set_attr "isa" "noavx,avx")
11379 (set_attr "type" "sselog")
11380 (set_attr "prefix_data16" "1,*")
11381 (set_attr "prefix" "orig,maybe_vex")
11382 (set_attr "mode" "TI")])
11384 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11385 [(set (match_operand:V32HI 0 "register_operand" "=v")
11388 (match_operand:V32HI 1 "register_operand" "v")
11389 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11390 (parallel [(const_int 0) (const_int 32)
11391 (const_int 1) (const_int 33)
11392 (const_int 2) (const_int 34)
11393 (const_int 3) (const_int 35)
11394 (const_int 8) (const_int 40)
11395 (const_int 9) (const_int 41)
11396 (const_int 10) (const_int 42)
11397 (const_int 11) (const_int 43)
11398 (const_int 16) (const_int 48)
11399 (const_int 17) (const_int 49)
11400 (const_int 18) (const_int 50)
11401 (const_int 19) (const_int 51)
11402 (const_int 24) (const_int 56)
11403 (const_int 25) (const_int 57)
11404 (const_int 26) (const_int 58)
11405 (const_int 27) (const_int 59)])))]
11407 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11408 [(set_attr "type" "sselog")
11409 (set_attr "prefix" "evex")
11410 (set_attr "mode" "XI")])
11412 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11413 [(set (match_operand:V16HI 0 "register_operand" "=v")
11416 (match_operand:V16HI 1 "register_operand" "v")
11417 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11418 (parallel [(const_int 0) (const_int 16)
11419 (const_int 1) (const_int 17)
11420 (const_int 2) (const_int 18)
11421 (const_int 3) (const_int 19)
11422 (const_int 8) (const_int 24)
11423 (const_int 9) (const_int 25)
11424 (const_int 10) (const_int 26)
11425 (const_int 11) (const_int 27)])))]
11426 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11427 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11428 [(set_attr "type" "sselog")
11429 (set_attr "prefix" "maybe_evex")
11430 (set_attr "mode" "OI")])
11432 (define_insn "vec_interleave_lowv8hi<mask_name>"
11433 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11436 (match_operand:V8HI 1 "register_operand" "0,v")
11437 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11438 (parallel [(const_int 0) (const_int 8)
11439 (const_int 1) (const_int 9)
11440 (const_int 2) (const_int 10)
11441 (const_int 3) (const_int 11)])))]
11442 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11444 punpcklwd\t{%2, %0|%0, %2}
11445 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11446 [(set_attr "isa" "noavx,avx")
11447 (set_attr "type" "sselog")
11448 (set_attr "prefix_data16" "1,*")
11449 (set_attr "prefix" "orig,maybe_evex")
11450 (set_attr "mode" "TI")])
11452 (define_insn "avx2_interleave_highv8si<mask_name>"
11453 [(set (match_operand:V8SI 0 "register_operand" "=v")
11456 (match_operand:V8SI 1 "register_operand" "v")
11457 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11458 (parallel [(const_int 2) (const_int 10)
11459 (const_int 3) (const_int 11)
11460 (const_int 6) (const_int 14)
11461 (const_int 7) (const_int 15)])))]
11462 "TARGET_AVX2 && <mask_avx512vl_condition>"
11463 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11464 [(set_attr "type" "sselog")
11465 (set_attr "prefix" "maybe_evex")
11466 (set_attr "mode" "OI")])
11468 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11469 [(set (match_operand:V16SI 0 "register_operand" "=v")
11472 (match_operand:V16SI 1 "register_operand" "v")
11473 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11474 (parallel [(const_int 2) (const_int 18)
11475 (const_int 3) (const_int 19)
11476 (const_int 6) (const_int 22)
11477 (const_int 7) (const_int 23)
11478 (const_int 10) (const_int 26)
11479 (const_int 11) (const_int 27)
11480 (const_int 14) (const_int 30)
11481 (const_int 15) (const_int 31)])))]
11483 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11484 [(set_attr "type" "sselog")
11485 (set_attr "prefix" "evex")
11486 (set_attr "mode" "XI")])
11489 (define_insn "vec_interleave_highv4si<mask_name>"
11490 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11493 (match_operand:V4SI 1 "register_operand" "0,v")
11494 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11495 (parallel [(const_int 2) (const_int 6)
11496 (const_int 3) (const_int 7)])))]
11497 "TARGET_SSE2 && <mask_avx512vl_condition>"
11499 punpckhdq\t{%2, %0|%0, %2}
11500 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11501 [(set_attr "isa" "noavx,avx")
11502 (set_attr "type" "sselog")
11503 (set_attr "prefix_data16" "1,*")
11504 (set_attr "prefix" "orig,maybe_vex")
11505 (set_attr "mode" "TI")])
11507 (define_insn "avx2_interleave_lowv8si<mask_name>"
11508 [(set (match_operand:V8SI 0 "register_operand" "=v")
11511 (match_operand:V8SI 1 "register_operand" "v")
11512 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11513 (parallel [(const_int 0) (const_int 8)
11514 (const_int 1) (const_int 9)
11515 (const_int 4) (const_int 12)
11516 (const_int 5) (const_int 13)])))]
11517 "TARGET_AVX2 && <mask_avx512vl_condition>"
11518 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11519 [(set_attr "type" "sselog")
11520 (set_attr "prefix" "maybe_evex")
11521 (set_attr "mode" "OI")])
11523 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11524 [(set (match_operand:V16SI 0 "register_operand" "=v")
11527 (match_operand:V16SI 1 "register_operand" "v")
11528 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11529 (parallel [(const_int 0) (const_int 16)
11530 (const_int 1) (const_int 17)
11531 (const_int 4) (const_int 20)
11532 (const_int 5) (const_int 21)
11533 (const_int 8) (const_int 24)
11534 (const_int 9) (const_int 25)
11535 (const_int 12) (const_int 28)
11536 (const_int 13) (const_int 29)])))]
11538 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11539 [(set_attr "type" "sselog")
11540 (set_attr "prefix" "evex")
11541 (set_attr "mode" "XI")])
11543 (define_insn "vec_interleave_lowv4si<mask_name>"
11544 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11547 (match_operand:V4SI 1 "register_operand" "0,v")
11548 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11549 (parallel [(const_int 0) (const_int 4)
11550 (const_int 1) (const_int 5)])))]
11551 "TARGET_SSE2 && <mask_avx512vl_condition>"
11553 punpckldq\t{%2, %0|%0, %2}
11554 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11555 [(set_attr "isa" "noavx,avx")
11556 (set_attr "type" "sselog")
11557 (set_attr "prefix_data16" "1,*")
11558 (set_attr "prefix" "orig,vex")
11559 (set_attr "mode" "TI")])
11561 (define_expand "vec_interleave_high<mode>"
11562 [(match_operand:VI_256 0 "register_operand" "=x")
11563 (match_operand:VI_256 1 "register_operand" "x")
11564 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11567 rtx t1 = gen_reg_rtx (<MODE>mode);
11568 rtx t2 = gen_reg_rtx (<MODE>mode);
11569 rtx t3 = gen_reg_rtx (V4DImode);
11570 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11571 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11572 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11573 gen_lowpart (V4DImode, t2),
11574 GEN_INT (1 + (3 << 4))));
11575 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11579 (define_expand "vec_interleave_low<mode>"
11580 [(match_operand:VI_256 0 "register_operand" "=x")
11581 (match_operand:VI_256 1 "register_operand" "x")
11582 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11585 rtx t1 = gen_reg_rtx (<MODE>mode);
11586 rtx t2 = gen_reg_rtx (<MODE>mode);
11587 rtx t3 = gen_reg_rtx (V4DImode);
11588 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11589 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11590 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11591 gen_lowpart (V4DImode, t2),
11592 GEN_INT (0 + (2 << 4))));
11593 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11597 ;; Modes handled by pinsr patterns.
11598 (define_mode_iterator PINSR_MODE
11599 [(V16QI "TARGET_SSE4_1") V8HI
11600 (V4SI "TARGET_SSE4_1")
11601 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11603 (define_mode_attr sse2p4_1
11604 [(V16QI "sse4_1") (V8HI "sse2")
11605 (V4SI "sse4_1") (V2DI "sse4_1")])
11607 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11608 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11609 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11610 (vec_merge:PINSR_MODE
11611 (vec_duplicate:PINSR_MODE
11612 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11613 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11614 (match_operand:SI 3 "const_int_operand")))]
11616 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11617 < GET_MODE_NUNITS (<MODE>mode))"
11619 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11621 switch (which_alternative)
11624 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11625 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11628 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11630 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11631 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11634 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11636 gcc_unreachable ();
11639 [(set_attr "isa" "noavx,noavx,avx,avx")
11640 (set_attr "type" "sselog")
11641 (set (attr "prefix_rex")
11643 (and (not (match_test "TARGET_AVX"))
11644 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11646 (const_string "*")))
11647 (set (attr "prefix_data16")
11649 (and (not (match_test "TARGET_AVX"))
11650 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11652 (const_string "*")))
11653 (set (attr "prefix_extra")
11655 (and (not (match_test "TARGET_AVX"))
11656 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11658 (const_string "1")))
11659 (set_attr "length_immediate" "1")
11660 (set_attr "prefix" "orig,orig,vex,vex")
11661 (set_attr "mode" "TI")])
11663 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11664 [(match_operand:AVX512_VEC 0 "register_operand")
11665 (match_operand:AVX512_VEC 1 "register_operand")
11666 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11667 (match_operand:SI 3 "const_0_to_3_operand")
11668 (match_operand:AVX512_VEC 4 "register_operand")
11669 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11673 mask = INTVAL (operands[3]);
11674 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11675 0xFFFF ^ (0xF000 >> mask * 4)
11676 : 0xFF ^ (0xC0 >> mask * 2);
11677 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11678 (operands[0], operands[1], operands[2], GEN_INT (selector),
11679 operands[4], operands[5]));
11683 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11684 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11685 (vec_merge:AVX512_VEC
11686 (match_operand:AVX512_VEC 1 "register_operand" "v")
11687 (vec_duplicate:AVX512_VEC
11688 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11689 (match_operand:SI 3 "const_int_operand" "n")))]
11693 int selector = INTVAL (operands[3]);
11695 if (selector == 0xFFF || selector == 0x3F)
11697 else if ( selector == 0xF0FF || selector == 0xCF)
11699 else if ( selector == 0xFF0F || selector == 0xF3)
11701 else if ( selector == 0xFFF0 || selector == 0xFC)
11704 gcc_unreachable ();
11706 operands[3] = GEN_INT (mask);
11708 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11710 [(set_attr "type" "sselog")
11711 (set_attr "length_immediate" "1")
11712 (set_attr "prefix" "evex")
11713 (set_attr "mode" "<sseinsnmode>")])
11715 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11716 [(match_operand:AVX512_VEC_2 0 "register_operand")
11717 (match_operand:AVX512_VEC_2 1 "register_operand")
11718 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11719 (match_operand:SI 3 "const_0_to_1_operand")
11720 (match_operand:AVX512_VEC_2 4 "register_operand")
11721 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11724 int mask = INTVAL (operands[3]);
11726 emit_insn (gen_vec_set_lo_<mode>_mask
11727 (operands[0], operands[1], operands[2],
11728 operands[4], operands[5]));
11730 emit_insn (gen_vec_set_hi_<mode>_mask
11731 (operands[0], operands[1], operands[2],
11732 operands[4], operands[5]));
11736 (define_insn "vec_set_lo_<mode><mask_name>"
11737 [(set (match_operand:V16FI 0 "register_operand" "=v")
11739 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11740 (vec_select:<ssehalfvecmode>
11741 (match_operand:V16FI 1 "register_operand" "v")
11742 (parallel [(const_int 8) (const_int 9)
11743 (const_int 10) (const_int 11)
11744 (const_int 12) (const_int 13)
11745 (const_int 14) (const_int 15)]))))]
11747 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11748 [(set_attr "type" "sselog")
11749 (set_attr "length_immediate" "1")
11750 (set_attr "prefix" "evex")
11751 (set_attr "mode" "<sseinsnmode>")])
11753 (define_insn "vec_set_hi_<mode><mask_name>"
11754 [(set (match_operand:V16FI 0 "register_operand" "=v")
11756 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11757 (vec_select:<ssehalfvecmode>
11758 (match_operand:V16FI 1 "register_operand" "v")
11759 (parallel [(const_int 0) (const_int 1)
11760 (const_int 2) (const_int 3)
11761 (const_int 4) (const_int 5)
11762 (const_int 6) (const_int 7)]))))]
11764 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "length_immediate" "1")
11767 (set_attr "prefix" "evex")
11768 (set_attr "mode" "<sseinsnmode>")])
11770 (define_insn "vec_set_lo_<mode><mask_name>"
11771 [(set (match_operand:V8FI 0 "register_operand" "=v")
11773 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11774 (vec_select:<ssehalfvecmode>
11775 (match_operand:V8FI 1 "register_operand" "v")
11776 (parallel [(const_int 4) (const_int 5)
11777 (const_int 6) (const_int 7)]))))]
11779 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11780 [(set_attr "type" "sselog")
11781 (set_attr "length_immediate" "1")
11782 (set_attr "prefix" "evex")
11783 (set_attr "mode" "XI")])
11785 (define_insn "vec_set_hi_<mode><mask_name>"
11786 [(set (match_operand:V8FI 0 "register_operand" "=v")
11788 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11789 (vec_select:<ssehalfvecmode>
11790 (match_operand:V8FI 1 "register_operand" "v")
11791 (parallel [(const_int 0) (const_int 1)
11792 (const_int 2) (const_int 3)]))))]
11794 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11795 [(set_attr "type" "sselog")
11796 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "evex")
11798 (set_attr "mode" "XI")])
11800 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11801 [(match_operand:VI8F_256 0 "register_operand")
11802 (match_operand:VI8F_256 1 "register_operand")
11803 (match_operand:VI8F_256 2 "nonimmediate_operand")
11804 (match_operand:SI 3 "const_0_to_3_operand")
11805 (match_operand:VI8F_256 4 "register_operand")
11806 (match_operand:QI 5 "register_operand")]
11809 int mask = INTVAL (operands[3]);
11810 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11811 (operands[0], operands[1], operands[2],
11812 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11813 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11814 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11815 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11816 operands[4], operands[5]));
11820 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11821 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11822 (vec_select:VI8F_256
11823 (vec_concat:<ssedoublemode>
11824 (match_operand:VI8F_256 1 "register_operand" "v")
11825 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11826 (parallel [(match_operand 3 "const_0_to_3_operand")
11827 (match_operand 4 "const_0_to_3_operand")
11828 (match_operand 5 "const_4_to_7_operand")
11829 (match_operand 6 "const_4_to_7_operand")])))]
11831 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11832 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11835 mask = INTVAL (operands[3]) / 2;
11836 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11837 operands[3] = GEN_INT (mask);
11838 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11840 [(set_attr "type" "sselog")
11841 (set_attr "length_immediate" "1")
11842 (set_attr "prefix" "evex")
11843 (set_attr "mode" "XI")])
11845 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11846 [(match_operand:V8FI 0 "register_operand")
11847 (match_operand:V8FI 1 "register_operand")
11848 (match_operand:V8FI 2 "nonimmediate_operand")
11849 (match_operand:SI 3 "const_0_to_255_operand")
11850 (match_operand:V8FI 4 "register_operand")
11851 (match_operand:QI 5 "register_operand")]
11854 int mask = INTVAL (operands[3]);
11855 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11856 (operands[0], operands[1], operands[2],
11857 GEN_INT (((mask >> 0) & 3) * 2),
11858 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11859 GEN_INT (((mask >> 2) & 3) * 2),
11860 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11861 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11862 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11863 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11864 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11865 operands[4], operands[5]));
11869 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11870 [(set (match_operand:V8FI 0 "register_operand" "=v")
11872 (vec_concat:<ssedoublemode>
11873 (match_operand:V8FI 1 "register_operand" "v")
11874 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11875 (parallel [(match_operand 3 "const_0_to_7_operand")
11876 (match_operand 4 "const_0_to_7_operand")
11877 (match_operand 5 "const_0_to_7_operand")
11878 (match_operand 6 "const_0_to_7_operand")
11879 (match_operand 7 "const_8_to_15_operand")
11880 (match_operand 8 "const_8_to_15_operand")
11881 (match_operand 9 "const_8_to_15_operand")
11882 (match_operand 10 "const_8_to_15_operand")])))]
11884 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11885 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11886 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11887 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11890 mask = INTVAL (operands[3]) / 2;
11891 mask |= INTVAL (operands[5]) / 2 << 2;
11892 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11893 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11894 operands[3] = GEN_INT (mask);
11896 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11898 [(set_attr "type" "sselog")
11899 (set_attr "length_immediate" "1")
11900 (set_attr "prefix" "evex")
11901 (set_attr "mode" "<sseinsnmode>")])
11903 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11904 [(match_operand:VI4F_256 0 "register_operand")
11905 (match_operand:VI4F_256 1 "register_operand")
11906 (match_operand:VI4F_256 2 "nonimmediate_operand")
11907 (match_operand:SI 3 "const_0_to_3_operand")
11908 (match_operand:VI4F_256 4 "register_operand")
11909 (match_operand:QI 5 "register_operand")]
11912 int mask = INTVAL (operands[3]);
11913 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11914 (operands[0], operands[1], operands[2],
11915 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11916 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11917 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11918 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11919 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11920 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11921 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11922 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11923 operands[4], operands[5]));
11927 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11928 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11929 (vec_select:VI4F_256
11930 (vec_concat:<ssedoublemode>
11931 (match_operand:VI4F_256 1 "register_operand" "v")
11932 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11933 (parallel [(match_operand 3 "const_0_to_7_operand")
11934 (match_operand 4 "const_0_to_7_operand")
11935 (match_operand 5 "const_0_to_7_operand")
11936 (match_operand 6 "const_0_to_7_operand")
11937 (match_operand 7 "const_8_to_15_operand")
11938 (match_operand 8 "const_8_to_15_operand")
11939 (match_operand 9 "const_8_to_15_operand")
11940 (match_operand 10 "const_8_to_15_operand")])))]
11942 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11943 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11944 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11945 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11946 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11947 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11950 mask = INTVAL (operands[3]) / 4;
11951 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11952 operands[3] = GEN_INT (mask);
11954 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11956 [(set_attr "type" "sselog")
11957 (set_attr "length_immediate" "1")
11958 (set_attr "prefix" "evex")
11959 (set_attr "mode" "<sseinsnmode>")])
11961 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11962 [(match_operand:V16FI 0 "register_operand")
11963 (match_operand:V16FI 1 "register_operand")
11964 (match_operand:V16FI 2 "nonimmediate_operand")
11965 (match_operand:SI 3 "const_0_to_255_operand")
11966 (match_operand:V16FI 4 "register_operand")
11967 (match_operand:HI 5 "register_operand")]
11970 int mask = INTVAL (operands[3]);
11971 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
11972 (operands[0], operands[1], operands[2],
11973 GEN_INT (((mask >> 0) & 3) * 4),
11974 GEN_INT (((mask >> 0) & 3) * 4 + 1),
11975 GEN_INT (((mask >> 0) & 3) * 4 + 2),
11976 GEN_INT (((mask >> 0) & 3) * 4 + 3),
11977 GEN_INT (((mask >> 2) & 3) * 4),
11978 GEN_INT (((mask >> 2) & 3) * 4 + 1),
11979 GEN_INT (((mask >> 2) & 3) * 4 + 2),
11980 GEN_INT (((mask >> 2) & 3) * 4 + 3),
11981 GEN_INT (((mask >> 4) & 3) * 4 + 16),
11982 GEN_INT (((mask >> 4) & 3) * 4 + 17),
11983 GEN_INT (((mask >> 4) & 3) * 4 + 18),
11984 GEN_INT (((mask >> 4) & 3) * 4 + 19),
11985 GEN_INT (((mask >> 6) & 3) * 4 + 16),
11986 GEN_INT (((mask >> 6) & 3) * 4 + 17),
11987 GEN_INT (((mask >> 6) & 3) * 4 + 18),
11988 GEN_INT (((mask >> 6) & 3) * 4 + 19),
11989 operands[4], operands[5]));
11993 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
11994 [(set (match_operand:V16FI 0 "register_operand" "=v")
11996 (vec_concat:<ssedoublemode>
11997 (match_operand:V16FI 1 "register_operand" "v")
11998 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
11999 (parallel [(match_operand 3 "const_0_to_15_operand")
12000 (match_operand 4 "const_0_to_15_operand")
12001 (match_operand 5 "const_0_to_15_operand")
12002 (match_operand 6 "const_0_to_15_operand")
12003 (match_operand 7 "const_0_to_15_operand")
12004 (match_operand 8 "const_0_to_15_operand")
12005 (match_operand 9 "const_0_to_15_operand")
12006 (match_operand 10 "const_0_to_15_operand")
12007 (match_operand 11 "const_16_to_31_operand")
12008 (match_operand 12 "const_16_to_31_operand")
12009 (match_operand 13 "const_16_to_31_operand")
12010 (match_operand 14 "const_16_to_31_operand")
12011 (match_operand 15 "const_16_to_31_operand")
12012 (match_operand 16 "const_16_to_31_operand")
12013 (match_operand 17 "const_16_to_31_operand")
12014 (match_operand 18 "const_16_to_31_operand")])))]
12016 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12017 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12018 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12019 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12020 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12021 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12022 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12023 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12024 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12025 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12026 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12027 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12030 mask = INTVAL (operands[3]) / 4;
12031 mask |= INTVAL (operands[7]) / 4 << 2;
12032 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12033 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12034 operands[3] = GEN_INT (mask);
12036 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12038 [(set_attr "type" "sselog")
12039 (set_attr "length_immediate" "1")
12040 (set_attr "prefix" "evex")
12041 (set_attr "mode" "<sseinsnmode>")])
12043 (define_expand "avx512f_pshufdv3_mask"
12044 [(match_operand:V16SI 0 "register_operand")
12045 (match_operand:V16SI 1 "nonimmediate_operand")
12046 (match_operand:SI 2 "const_0_to_255_operand")
12047 (match_operand:V16SI 3 "register_operand")
12048 (match_operand:HI 4 "register_operand")]
12051 int mask = INTVAL (operands[2]);
12052 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12053 GEN_INT ((mask >> 0) & 3),
12054 GEN_INT ((mask >> 2) & 3),
12055 GEN_INT ((mask >> 4) & 3),
12056 GEN_INT ((mask >> 6) & 3),
12057 GEN_INT (((mask >> 0) & 3) + 4),
12058 GEN_INT (((mask >> 2) & 3) + 4),
12059 GEN_INT (((mask >> 4) & 3) + 4),
12060 GEN_INT (((mask >> 6) & 3) + 4),
12061 GEN_INT (((mask >> 0) & 3) + 8),
12062 GEN_INT (((mask >> 2) & 3) + 8),
12063 GEN_INT (((mask >> 4) & 3) + 8),
12064 GEN_INT (((mask >> 6) & 3) + 8),
12065 GEN_INT (((mask >> 0) & 3) + 12),
12066 GEN_INT (((mask >> 2) & 3) + 12),
12067 GEN_INT (((mask >> 4) & 3) + 12),
12068 GEN_INT (((mask >> 6) & 3) + 12),
12069 operands[3], operands[4]));
12073 (define_insn "avx512f_pshufd_1<mask_name>"
12074 [(set (match_operand:V16SI 0 "register_operand" "=v")
12076 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12077 (parallel [(match_operand 2 "const_0_to_3_operand")
12078 (match_operand 3 "const_0_to_3_operand")
12079 (match_operand 4 "const_0_to_3_operand")
12080 (match_operand 5 "const_0_to_3_operand")
12081 (match_operand 6 "const_4_to_7_operand")
12082 (match_operand 7 "const_4_to_7_operand")
12083 (match_operand 8 "const_4_to_7_operand")
12084 (match_operand 9 "const_4_to_7_operand")
12085 (match_operand 10 "const_8_to_11_operand")
12086 (match_operand 11 "const_8_to_11_operand")
12087 (match_operand 12 "const_8_to_11_operand")
12088 (match_operand 13 "const_8_to_11_operand")
12089 (match_operand 14 "const_12_to_15_operand")
12090 (match_operand 15 "const_12_to_15_operand")
12091 (match_operand 16 "const_12_to_15_operand")
12092 (match_operand 17 "const_12_to_15_operand")])))]
12094 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12095 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12096 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12097 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12098 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12099 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12100 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12101 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12102 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12103 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12104 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12105 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12108 mask |= INTVAL (operands[2]) << 0;
12109 mask |= INTVAL (operands[3]) << 2;
12110 mask |= INTVAL (operands[4]) << 4;
12111 mask |= INTVAL (operands[5]) << 6;
12112 operands[2] = GEN_INT (mask);
12114 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12116 [(set_attr "type" "sselog1")
12117 (set_attr "prefix" "evex")
12118 (set_attr "length_immediate" "1")
12119 (set_attr "mode" "XI")])
12121 (define_expand "avx512vl_pshufdv3_mask"
12122 [(match_operand:V8SI 0 "register_operand")
12123 (match_operand:V8SI 1 "nonimmediate_operand")
12124 (match_operand:SI 2 "const_0_to_255_operand")
12125 (match_operand:V8SI 3 "register_operand")
12126 (match_operand:QI 4 "register_operand")]
12129 int mask = INTVAL (operands[2]);
12130 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12131 GEN_INT ((mask >> 0) & 3),
12132 GEN_INT ((mask >> 2) & 3),
12133 GEN_INT ((mask >> 4) & 3),
12134 GEN_INT ((mask >> 6) & 3),
12135 GEN_INT (((mask >> 0) & 3) + 4),
12136 GEN_INT (((mask >> 2) & 3) + 4),
12137 GEN_INT (((mask >> 4) & 3) + 4),
12138 GEN_INT (((mask >> 6) & 3) + 4),
12139 operands[3], operands[4]));
12143 (define_expand "avx2_pshufdv3"
12144 [(match_operand:V8SI 0 "register_operand")
12145 (match_operand:V8SI 1 "nonimmediate_operand")
12146 (match_operand:SI 2 "const_0_to_255_operand")]
12149 int mask = INTVAL (operands[2]);
12150 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12151 GEN_INT ((mask >> 0) & 3),
12152 GEN_INT ((mask >> 2) & 3),
12153 GEN_INT ((mask >> 4) & 3),
12154 GEN_INT ((mask >> 6) & 3),
12155 GEN_INT (((mask >> 0) & 3) + 4),
12156 GEN_INT (((mask >> 2) & 3) + 4),
12157 GEN_INT (((mask >> 4) & 3) + 4),
12158 GEN_INT (((mask >> 6) & 3) + 4)));
12162 (define_insn "avx2_pshufd_1<mask_name>"
12163 [(set (match_operand:V8SI 0 "register_operand" "=v")
12165 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12166 (parallel [(match_operand 2 "const_0_to_3_operand")
12167 (match_operand 3 "const_0_to_3_operand")
12168 (match_operand 4 "const_0_to_3_operand")
12169 (match_operand 5 "const_0_to_3_operand")
12170 (match_operand 6 "const_4_to_7_operand")
12171 (match_operand 7 "const_4_to_7_operand")
12172 (match_operand 8 "const_4_to_7_operand")
12173 (match_operand 9 "const_4_to_7_operand")])))]
12175 && <mask_avx512vl_condition>
12176 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12177 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12178 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12179 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12182 mask |= INTVAL (operands[2]) << 0;
12183 mask |= INTVAL (operands[3]) << 2;
12184 mask |= INTVAL (operands[4]) << 4;
12185 mask |= INTVAL (operands[5]) << 6;
12186 operands[2] = GEN_INT (mask);
12188 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12190 [(set_attr "type" "sselog1")
12191 (set_attr "prefix" "maybe_evex")
12192 (set_attr "length_immediate" "1")
12193 (set_attr "mode" "OI")])
12195 (define_expand "avx512vl_pshufd_mask"
12196 [(match_operand:V4SI 0 "register_operand")
12197 (match_operand:V4SI 1 "nonimmediate_operand")
12198 (match_operand:SI 2 "const_0_to_255_operand")
12199 (match_operand:V4SI 3 "register_operand")
12200 (match_operand:QI 4 "register_operand")]
12203 int mask = INTVAL (operands[2]);
12204 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12205 GEN_INT ((mask >> 0) & 3),
12206 GEN_INT ((mask >> 2) & 3),
12207 GEN_INT ((mask >> 4) & 3),
12208 GEN_INT ((mask >> 6) & 3),
12209 operands[3], operands[4]));
12213 (define_expand "sse2_pshufd"
12214 [(match_operand:V4SI 0 "register_operand")
12215 (match_operand:V4SI 1 "nonimmediate_operand")
12216 (match_operand:SI 2 "const_int_operand")]
12219 int mask = INTVAL (operands[2]);
12220 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12221 GEN_INT ((mask >> 0) & 3),
12222 GEN_INT ((mask >> 2) & 3),
12223 GEN_INT ((mask >> 4) & 3),
12224 GEN_INT ((mask >> 6) & 3)));
12228 (define_insn "sse2_pshufd_1<mask_name>"
12229 [(set (match_operand:V4SI 0 "register_operand" "=v")
12231 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12232 (parallel [(match_operand 2 "const_0_to_3_operand")
12233 (match_operand 3 "const_0_to_3_operand")
12234 (match_operand 4 "const_0_to_3_operand")
12235 (match_operand 5 "const_0_to_3_operand")])))]
12236 "TARGET_SSE2 && <mask_avx512vl_condition>"
12239 mask |= INTVAL (operands[2]) << 0;
12240 mask |= INTVAL (operands[3]) << 2;
12241 mask |= INTVAL (operands[4]) << 4;
12242 mask |= INTVAL (operands[5]) << 6;
12243 operands[2] = GEN_INT (mask);
12245 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12247 [(set_attr "type" "sselog1")
12248 (set_attr "prefix_data16" "1")
12249 (set_attr "prefix" "<mask_prefix2>")
12250 (set_attr "length_immediate" "1")
12251 (set_attr "mode" "TI")])
12253 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12254 [(set (match_operand:V32HI 0 "register_operand" "=v")
12256 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12257 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12260 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12261 [(set_attr "type" "sselog")
12262 (set_attr "prefix" "evex")
12263 (set_attr "mode" "XI")])
12265 (define_expand "avx512vl_pshuflwv3_mask"
12266 [(match_operand:V16HI 0 "register_operand")
12267 (match_operand:V16HI 1 "nonimmediate_operand")
12268 (match_operand:SI 2 "const_0_to_255_operand")
12269 (match_operand:V16HI 3 "register_operand")
12270 (match_operand:HI 4 "register_operand")]
12271 "TARGET_AVX512VL && TARGET_AVX512BW"
12273 int mask = INTVAL (operands[2]);
12274 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12275 GEN_INT ((mask >> 0) & 3),
12276 GEN_INT ((mask >> 2) & 3),
12277 GEN_INT ((mask >> 4) & 3),
12278 GEN_INT ((mask >> 6) & 3),
12279 GEN_INT (((mask >> 0) & 3) + 8),
12280 GEN_INT (((mask >> 2) & 3) + 8),
12281 GEN_INT (((mask >> 4) & 3) + 8),
12282 GEN_INT (((mask >> 6) & 3) + 8),
12283 operands[3], operands[4]));
12287 (define_expand "avx2_pshuflwv3"
12288 [(match_operand:V16HI 0 "register_operand")
12289 (match_operand:V16HI 1 "nonimmediate_operand")
12290 (match_operand:SI 2 "const_0_to_255_operand")]
12293 int mask = INTVAL (operands[2]);
12294 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12295 GEN_INT ((mask >> 0) & 3),
12296 GEN_INT ((mask >> 2) & 3),
12297 GEN_INT ((mask >> 4) & 3),
12298 GEN_INT ((mask >> 6) & 3),
12299 GEN_INT (((mask >> 0) & 3) + 8),
12300 GEN_INT (((mask >> 2) & 3) + 8),
12301 GEN_INT (((mask >> 4) & 3) + 8),
12302 GEN_INT (((mask >> 6) & 3) + 8)));
12306 (define_insn "avx2_pshuflw_1<mask_name>"
12307 [(set (match_operand:V16HI 0 "register_operand" "=v")
12309 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12310 (parallel [(match_operand 2 "const_0_to_3_operand")
12311 (match_operand 3 "const_0_to_3_operand")
12312 (match_operand 4 "const_0_to_3_operand")
12313 (match_operand 5 "const_0_to_3_operand")
12318 (match_operand 6 "const_8_to_11_operand")
12319 (match_operand 7 "const_8_to_11_operand")
12320 (match_operand 8 "const_8_to_11_operand")
12321 (match_operand 9 "const_8_to_11_operand")
12325 (const_int 15)])))]
12327 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12328 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12329 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12330 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12331 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12334 mask |= INTVAL (operands[2]) << 0;
12335 mask |= INTVAL (operands[3]) << 2;
12336 mask |= INTVAL (operands[4]) << 4;
12337 mask |= INTVAL (operands[5]) << 6;
12338 operands[2] = GEN_INT (mask);
12340 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12342 [(set_attr "type" "sselog")
12343 (set_attr "prefix" "maybe_evex")
12344 (set_attr "length_immediate" "1")
12345 (set_attr "mode" "OI")])
12347 (define_expand "avx512vl_pshuflw_mask"
12348 [(match_operand:V8HI 0 "register_operand")
12349 (match_operand:V8HI 1 "nonimmediate_operand")
12350 (match_operand:SI 2 "const_0_to_255_operand")
12351 (match_operand:V8HI 3 "register_operand")
12352 (match_operand:QI 4 "register_operand")]
12353 "TARGET_AVX512VL && TARGET_AVX512BW"
12355 int mask = INTVAL (operands[2]);
12356 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12357 GEN_INT ((mask >> 0) & 3),
12358 GEN_INT ((mask >> 2) & 3),
12359 GEN_INT ((mask >> 4) & 3),
12360 GEN_INT ((mask >> 6) & 3),
12361 operands[3], operands[4]));
12365 (define_expand "sse2_pshuflw"
12366 [(match_operand:V8HI 0 "register_operand")
12367 (match_operand:V8HI 1 "nonimmediate_operand")
12368 (match_operand:SI 2 "const_int_operand")]
12371 int mask = INTVAL (operands[2]);
12372 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12373 GEN_INT ((mask >> 0) & 3),
12374 GEN_INT ((mask >> 2) & 3),
12375 GEN_INT ((mask >> 4) & 3),
12376 GEN_INT ((mask >> 6) & 3)));
12380 (define_insn "sse2_pshuflw_1<mask_name>"
12381 [(set (match_operand:V8HI 0 "register_operand" "=v")
12383 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12384 (parallel [(match_operand 2 "const_0_to_3_operand")
12385 (match_operand 3 "const_0_to_3_operand")
12386 (match_operand 4 "const_0_to_3_operand")
12387 (match_operand 5 "const_0_to_3_operand")
12392 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12395 mask |= INTVAL (operands[2]) << 0;
12396 mask |= INTVAL (operands[3]) << 2;
12397 mask |= INTVAL (operands[4]) << 4;
12398 mask |= INTVAL (operands[5]) << 6;
12399 operands[2] = GEN_INT (mask);
12401 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12403 [(set_attr "type" "sselog")
12404 (set_attr "prefix_data16" "0")
12405 (set_attr "prefix_rep" "1")
12406 (set_attr "prefix" "maybe_vex")
12407 (set_attr "length_immediate" "1")
12408 (set_attr "mode" "TI")])
12410 (define_expand "avx2_pshufhwv3"
12411 [(match_operand:V16HI 0 "register_operand")
12412 (match_operand:V16HI 1 "nonimmediate_operand")
12413 (match_operand:SI 2 "const_0_to_255_operand")]
12416 int mask = INTVAL (operands[2]);
12417 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12418 GEN_INT (((mask >> 0) & 3) + 4),
12419 GEN_INT (((mask >> 2) & 3) + 4),
12420 GEN_INT (((mask >> 4) & 3) + 4),
12421 GEN_INT (((mask >> 6) & 3) + 4),
12422 GEN_INT (((mask >> 0) & 3) + 12),
12423 GEN_INT (((mask >> 2) & 3) + 12),
12424 GEN_INT (((mask >> 4) & 3) + 12),
12425 GEN_INT (((mask >> 6) & 3) + 12)));
12429 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12430 [(set (match_operand:V32HI 0 "register_operand" "=v")
12432 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12433 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12436 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12437 [(set_attr "type" "sselog")
12438 (set_attr "prefix" "evex")
12439 (set_attr "mode" "XI")])
12441 (define_expand "avx512vl_pshufhwv3_mask"
12442 [(match_operand:V16HI 0 "register_operand")
12443 (match_operand:V16HI 1 "nonimmediate_operand")
12444 (match_operand:SI 2 "const_0_to_255_operand")
12445 (match_operand:V16HI 3 "register_operand")
12446 (match_operand:HI 4 "register_operand")]
12447 "TARGET_AVX512VL && TARGET_AVX512BW"
12449 int mask = INTVAL (operands[2]);
12450 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12451 GEN_INT (((mask >> 0) & 3) + 4),
12452 GEN_INT (((mask >> 2) & 3) + 4),
12453 GEN_INT (((mask >> 4) & 3) + 4),
12454 GEN_INT (((mask >> 6) & 3) + 4),
12455 GEN_INT (((mask >> 0) & 3) + 12),
12456 GEN_INT (((mask >> 2) & 3) + 12),
12457 GEN_INT (((mask >> 4) & 3) + 12),
12458 GEN_INT (((mask >> 6) & 3) + 12),
12459 operands[3], operands[4]));
12463 (define_insn "avx2_pshufhw_1<mask_name>"
12464 [(set (match_operand:V16HI 0 "register_operand" "=v")
12466 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12467 (parallel [(const_int 0)
12471 (match_operand 2 "const_4_to_7_operand")
12472 (match_operand 3 "const_4_to_7_operand")
12473 (match_operand 4 "const_4_to_7_operand")
12474 (match_operand 5 "const_4_to_7_operand")
12479 (match_operand 6 "const_12_to_15_operand")
12480 (match_operand 7 "const_12_to_15_operand")
12481 (match_operand 8 "const_12_to_15_operand")
12482 (match_operand 9 "const_12_to_15_operand")])))]
12484 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12485 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12486 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12487 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12488 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12491 mask |= (INTVAL (operands[2]) - 4) << 0;
12492 mask |= (INTVAL (operands[3]) - 4) << 2;
12493 mask |= (INTVAL (operands[4]) - 4) << 4;
12494 mask |= (INTVAL (operands[5]) - 4) << 6;
12495 operands[2] = GEN_INT (mask);
12497 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12499 [(set_attr "type" "sselog")
12500 (set_attr "prefix" "maybe_evex")
12501 (set_attr "length_immediate" "1")
12502 (set_attr "mode" "OI")])
12504 (define_expand "avx512vl_pshufhw_mask"
12505 [(match_operand:V8HI 0 "register_operand")
12506 (match_operand:V8HI 1 "nonimmediate_operand")
12507 (match_operand:SI 2 "const_0_to_255_operand")
12508 (match_operand:V8HI 3 "register_operand")
12509 (match_operand:QI 4 "register_operand")]
12510 "TARGET_AVX512VL && TARGET_AVX512BW"
12512 int mask = INTVAL (operands[2]);
12513 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12514 GEN_INT (((mask >> 0) & 3) + 4),
12515 GEN_INT (((mask >> 2) & 3) + 4),
12516 GEN_INT (((mask >> 4) & 3) + 4),
12517 GEN_INT (((mask >> 6) & 3) + 4),
12518 operands[3], operands[4]));
12522 (define_expand "sse2_pshufhw"
12523 [(match_operand:V8HI 0 "register_operand")
12524 (match_operand:V8HI 1 "nonimmediate_operand")
12525 (match_operand:SI 2 "const_int_operand")]
12528 int mask = INTVAL (operands[2]);
12529 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12530 GEN_INT (((mask >> 0) & 3) + 4),
12531 GEN_INT (((mask >> 2) & 3) + 4),
12532 GEN_INT (((mask >> 4) & 3) + 4),
12533 GEN_INT (((mask >> 6) & 3) + 4)));
12537 (define_insn "sse2_pshufhw_1<mask_name>"
12538 [(set (match_operand:V8HI 0 "register_operand" "=v")
12540 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12541 (parallel [(const_int 0)
12545 (match_operand 2 "const_4_to_7_operand")
12546 (match_operand 3 "const_4_to_7_operand")
12547 (match_operand 4 "const_4_to_7_operand")
12548 (match_operand 5 "const_4_to_7_operand")])))]
12549 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12552 mask |= (INTVAL (operands[2]) - 4) << 0;
12553 mask |= (INTVAL (operands[3]) - 4) << 2;
12554 mask |= (INTVAL (operands[4]) - 4) << 4;
12555 mask |= (INTVAL (operands[5]) - 4) << 6;
12556 operands[2] = GEN_INT (mask);
12558 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12560 [(set_attr "type" "sselog")
12561 (set_attr "prefix_rep" "1")
12562 (set_attr "prefix_data16" "0")
12563 (set_attr "prefix" "maybe_vex")
12564 (set_attr "length_immediate" "1")
12565 (set_attr "mode" "TI")])
12567 (define_expand "sse2_loadd"
12568 [(set (match_operand:V4SI 0 "register_operand")
12570 (vec_duplicate:V4SI
12571 (match_operand:SI 1 "nonimmediate_operand"))
12575 "operands[2] = CONST0_RTX (V4SImode);")
12577 (define_insn "sse2_loadld"
12578 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12580 (vec_duplicate:V4SI
12581 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12582 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12586 %vmovd\t{%2, %0|%0, %2}
12587 %vmovd\t{%2, %0|%0, %2}
12588 movss\t{%2, %0|%0, %2}
12589 movss\t{%2, %0|%0, %2}
12590 vmovss\t{%2, %1, %0|%0, %1, %2}"
12591 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12592 (set_attr "type" "ssemov")
12593 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12594 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12596 (define_insn "*vec_extract<mode>"
12597 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12598 (vec_select:<ssescalarmode>
12599 (match_operand:VI12_128 1 "register_operand" "x,x")
12601 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12604 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12605 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12606 [(set_attr "type" "sselog1")
12607 (set (attr "prefix_data16")
12609 (and (eq_attr "alternative" "0")
12610 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12612 (const_string "*")))
12613 (set (attr "prefix_extra")
12615 (and (eq_attr "alternative" "0")
12616 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12618 (const_string "1")))
12619 (set_attr "length_immediate" "1")
12620 (set_attr "prefix" "maybe_vex")
12621 (set_attr "mode" "TI")])
12623 (define_insn "*vec_extractv8hi_sse2"
12624 [(set (match_operand:HI 0 "register_operand" "=r")
12626 (match_operand:V8HI 1 "register_operand" "x")
12628 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12629 "TARGET_SSE2 && !TARGET_SSE4_1"
12630 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12631 [(set_attr "type" "sselog1")
12632 (set_attr "prefix_data16" "1")
12633 (set_attr "length_immediate" "1")
12634 (set_attr "mode" "TI")])
12636 (define_insn "*vec_extractv16qi_zext"
12637 [(set (match_operand:SWI48 0 "register_operand" "=r")
12640 (match_operand:V16QI 1 "register_operand" "x")
12642 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12644 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12645 [(set_attr "type" "sselog1")
12646 (set_attr "prefix_extra" "1")
12647 (set_attr "length_immediate" "1")
12648 (set_attr "prefix" "maybe_vex")
12649 (set_attr "mode" "TI")])
12651 (define_insn "*vec_extractv8hi_zext"
12652 [(set (match_operand:SWI48 0 "register_operand" "=r")
12655 (match_operand:V8HI 1 "register_operand" "x")
12657 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12659 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12660 [(set_attr "type" "sselog1")
12661 (set_attr "prefix_data16" "1")
12662 (set_attr "length_immediate" "1")
12663 (set_attr "prefix" "maybe_vex")
12664 (set_attr "mode" "TI")])
12666 (define_insn "*vec_extract<mode>_mem"
12667 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12668 (vec_select:<ssescalarmode>
12669 (match_operand:VI12_128 1 "memory_operand" "o")
12671 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12675 (define_insn "*vec_extract<ssevecmodelower>_0"
12676 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12678 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12679 (parallel [(const_int 0)])))]
12680 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12682 [(set_attr "isa" "*,sse4,*,*")])
12684 (define_insn_and_split "*vec_extractv4si_0_zext"
12685 [(set (match_operand:DI 0 "register_operand" "=r")
12688 (match_operand:V4SI 1 "register_operand" "x")
12689 (parallel [(const_int 0)]))))]
12690 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12692 "&& reload_completed"
12693 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12694 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12696 (define_insn "*vec_extractv2di_0_sse"
12697 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12699 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12700 (parallel [(const_int 0)])))]
12701 "TARGET_SSE && !TARGET_64BIT
12702 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12706 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12708 (match_operand:<ssevecmode> 1 "register_operand")
12709 (parallel [(const_int 0)])))]
12710 "TARGET_SSE && reload_completed"
12711 [(set (match_dup 0) (match_dup 1))]
12712 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12714 (define_insn "*vec_extractv4si"
12715 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12717 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12718 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12721 switch (which_alternative)
12724 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12728 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12729 return "psrldq\t{%2, %0|%0, %2}";
12732 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12733 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12736 gcc_unreachable ();
12739 [(set_attr "isa" "*,noavx,noavx,avx")
12740 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12741 (set_attr "prefix_extra" "1,*,*,*")
12742 (set_attr "length_immediate" "1")
12743 (set_attr "prefix" "maybe_vex,orig,orig,vex")
12744 (set_attr "mode" "TI")])
12746 (define_insn "*vec_extractv4si_zext"
12747 [(set (match_operand:DI 0 "register_operand" "=r")
12750 (match_operand:V4SI 1 "register_operand" "x")
12751 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12752 "TARGET_64BIT && TARGET_SSE4_1"
12753 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12754 [(set_attr "type" "sselog1")
12755 (set_attr "prefix_extra" "1")
12756 (set_attr "length_immediate" "1")
12757 (set_attr "prefix" "maybe_vex")
12758 (set_attr "mode" "TI")])
12760 (define_insn "*vec_extractv4si_mem"
12761 [(set (match_operand:SI 0 "register_operand" "=x,r")
12763 (match_operand:V4SI 1 "memory_operand" "o,o")
12764 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12768 (define_insn_and_split "*vec_extractv4si_zext_mem"
12769 [(set (match_operand:DI 0 "register_operand" "=x,r")
12772 (match_operand:V4SI 1 "memory_operand" "o,o")
12773 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12774 "TARGET_64BIT && TARGET_SSE"
12776 "&& reload_completed"
12777 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12779 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12782 (define_insn "*vec_extractv2di_1"
12783 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12785 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12786 (parallel [(const_int 1)])))]
12787 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12789 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12790 %vmovhps\t{%1, %0|%0, %1}
12791 psrldq\t{$8, %0|%0, 8}
12792 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12793 movhlps\t{%1, %0|%0, %1}
12796 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12797 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12798 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12799 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12800 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12801 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12802 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12805 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12806 (vec_select:<ssescalarmode>
12807 (match_operand:VI_128 1 "memory_operand")
12809 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12810 "TARGET_SSE && reload_completed"
12811 [(set (match_dup 0) (match_dup 1))]
12813 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12815 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12818 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
12819 ;; vector modes into vec_extract*.
12821 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12822 (match_operand:SWI48x 1 "register_operand"))]
12823 "can_create_pseudo_p ()
12824 && GET_CODE (operands[1]) == SUBREG
12825 && REG_P (SUBREG_REG (operands[1]))
12826 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
12827 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
12828 == MODE_VECTOR_FLOAT))
12829 && SUBREG_BYTE (operands[1]) == 0
12831 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
12832 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
12834 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
12835 && TARGET_AVX512F))
12836 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
12837 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
12838 (parallel [(const_int 0)])))]
12841 operands[1] = SUBREG_REG (operands[1]);
12842 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
12845 if (<MODE>mode == SImode)
12847 tmp = gen_reg_rtx (V8SImode);
12848 emit_insn (gen_vec_extract_lo_v16si (tmp,
12849 gen_lowpart (V16SImode,
12854 tmp = gen_reg_rtx (V4DImode);
12855 emit_insn (gen_vec_extract_lo_v8di (tmp,
12856 gen_lowpart (V8DImode,
12862 tmp = gen_reg_rtx (<ssevecmode>mode);
12863 if (<MODE>mode == SImode)
12864 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
12867 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
12872 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
12877 (define_insn "*vec_concatv2si_sse4_1"
12878 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
12880 (match_operand:SI 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,rm, 0,rm")
12881 (match_operand:SI 2 "vector_move_operand" " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
12884 pinsrd\t{$1, %2, %0|%0, %2, 1}
12885 pinsrd\t{$1, %2, %0|%0, %2, 1}
12886 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12887 punpckldq\t{%2, %0|%0, %2}
12888 punpckldq\t{%2, %0|%0, %2}
12889 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12890 %vmovd\t{%1, %0|%0, %1}
12891 punpckldq\t{%2, %0|%0, %2}
12892 movd\t{%1, %0|%0, %1}"
12893 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
12894 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12895 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
12896 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
12897 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
12898 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
12900 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12901 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12902 ;; alternatives pretty much forces the MMX alternative to be chosen.
12903 (define_insn "*vec_concatv2si"
12904 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12906 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12907 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12908 "TARGET_SSE && !TARGET_SSE4_1"
12910 punpckldq\t{%2, %0|%0, %2}
12911 movd\t{%1, %0|%0, %1}
12912 movd\t{%1, %0|%0, %1}
12913 unpcklps\t{%2, %0|%0, %2}
12914 movss\t{%1, %0|%0, %1}
12915 punpckldq\t{%2, %0|%0, %2}
12916 movd\t{%1, %0|%0, %1}"
12917 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12918 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12919 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12921 (define_insn "*vec_concatv4si"
12922 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12924 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12925 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12928 punpcklqdq\t{%2, %0|%0, %2}
12929 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12930 movlhps\t{%2, %0|%0, %2}
12931 movhps\t{%2, %0|%0, %q2}
12932 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12933 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12934 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12935 (set_attr "prefix" "orig,vex,orig,orig,vex")
12936 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12938 ;; movd instead of movq is required to handle broken assemblers.
12939 (define_insn "vec_concatv2di"
12940 [(set (match_operand:V2DI 0 "register_operand"
12941 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
12943 (match_operand:DI 1 "nonimmediate_operand"
12944 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
12945 (match_operand:DI 2 "vector_move_operand"
12946 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
12949 pinsrq\t{$1, %2, %0|%0, %2, 1}
12950 pinsrq\t{$1, %2, %0|%0, %2, 1}
12951 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12952 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12953 %vmovq\t{%1, %0|%0, %1}
12954 movq2dq\t{%1, %0|%0, %1}
12955 punpcklqdq\t{%2, %0|%0, %2}
12956 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12957 movlhps\t{%2, %0|%0, %2}
12958 movhps\t{%2, %0|%0, %2}
12959 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12960 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12963 (eq_attr "alternative" "0,1,2,6,7")
12964 (const_string "sselog")
12965 (const_string "ssemov")))
12966 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
12967 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
12968 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
12969 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
12970 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
12972 (define_expand "vec_unpacks_lo_<mode>"
12973 [(match_operand:<sseunpackmode> 0 "register_operand")
12974 (match_operand:VI124_AVX512F 1 "register_operand")]
12976 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
12978 (define_expand "vec_unpacks_hi_<mode>"
12979 [(match_operand:<sseunpackmode> 0 "register_operand")
12980 (match_operand:VI124_AVX512F 1 "register_operand")]
12982 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
12984 (define_expand "vec_unpacku_lo_<mode>"
12985 [(match_operand:<sseunpackmode> 0 "register_operand")
12986 (match_operand:VI124_AVX512F 1 "register_operand")]
12988 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
12990 (define_expand "vec_unpacku_hi_<mode>"
12991 [(match_operand:<sseunpackmode> 0 "register_operand")
12992 (match_operand:VI124_AVX512F 1 "register_operand")]
12994 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
12996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13002 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13003 [(set (match_operand:VI12_AVX2 0 "register_operand")
13004 (truncate:VI12_AVX2
13005 (lshiftrt:<ssedoublemode>
13006 (plus:<ssedoublemode>
13007 (plus:<ssedoublemode>
13008 (zero_extend:<ssedoublemode>
13009 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13010 (zero_extend:<ssedoublemode>
13011 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
13012 (match_dup <mask_expand_op3>))
13014 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13017 if (<mask_applied>)
13019 operands[3] = CONST1_RTX(<MODE>mode);
13020 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13022 if (<mask_applied>)
13024 operands[5] = operands[3];
13029 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13030 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13031 (truncate:VI12_AVX2
13032 (lshiftrt:<ssedoublemode>
13033 (plus:<ssedoublemode>
13034 (plus:<ssedoublemode>
13035 (zero_extend:<ssedoublemode>
13036 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
13037 (zero_extend:<ssedoublemode>
13038 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13039 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13041 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13042 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13044 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13045 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13046 [(set_attr "isa" "noavx,avx")
13047 (set_attr "type" "sseiadd")
13048 (set_attr "prefix_data16" "1,*")
13049 (set_attr "prefix" "orig,<mask_prefix>")
13050 (set_attr "mode" "<sseinsnmode>")])
13052 ;; The correct representation for this is absolutely enormous, and
13053 ;; surely not generally useful.
13054 (define_insn "<sse2_avx2>_psadbw"
13055 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13056 (unspec:VI8_AVX2_AVX512BW
13057 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13058 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
13062 psadbw\t{%2, %0|%0, %2}
13063 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13064 [(set_attr "isa" "noavx,avx")
13065 (set_attr "type" "sseiadd")
13066 (set_attr "atom_unit" "simul")
13067 (set_attr "prefix_data16" "1,*")
13068 (set_attr "prefix" "orig,maybe_evex")
13069 (set_attr "mode" "<sseinsnmode>")])
13071 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13072 [(set (match_operand:SI 0 "register_operand" "=r")
13074 [(match_operand:VF_128_256 1 "register_operand" "x")]
13077 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13078 [(set_attr "type" "ssemov")
13079 (set_attr "prefix" "maybe_vex")
13080 (set_attr "mode" "<MODE>")])
13082 (define_insn "avx2_pmovmskb"
13083 [(set (match_operand:SI 0 "register_operand" "=r")
13084 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13087 "vpmovmskb\t{%1, %0|%0, %1}"
13088 [(set_attr "type" "ssemov")
13089 (set_attr "prefix" "vex")
13090 (set_attr "mode" "DI")])
13092 (define_insn "sse2_pmovmskb"
13093 [(set (match_operand:SI 0 "register_operand" "=r")
13094 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13097 "%vpmovmskb\t{%1, %0|%0, %1}"
13098 [(set_attr "type" "ssemov")
13099 (set_attr "prefix_data16" "1")
13100 (set_attr "prefix" "maybe_vex")
13101 (set_attr "mode" "SI")])
13103 (define_expand "sse2_maskmovdqu"
13104 [(set (match_operand:V16QI 0 "memory_operand")
13105 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13106 (match_operand:V16QI 2 "register_operand")
13111 (define_insn "*sse2_maskmovdqu"
13112 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13113 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13114 (match_operand:V16QI 2 "register_operand" "x")
13115 (mem:V16QI (match_dup 0))]
13119 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13120 that requires %v to be at the beginning of the opcode name. */
13121 if (Pmode != word_mode)
13122 fputs ("\taddr32", asm_out_file);
13123 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13125 [(set_attr "type" "ssemov")
13126 (set_attr "prefix_data16" "1")
13127 (set (attr "length_address")
13128 (symbol_ref ("Pmode != word_mode")))
13129 ;; The implicit %rdi operand confuses default length_vex computation.
13130 (set (attr "length_vex")
13131 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13132 (set_attr "prefix" "maybe_vex")
13133 (set_attr "mode" "TI")])
13135 (define_insn "sse_ldmxcsr"
13136 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13140 [(set_attr "type" "sse")
13141 (set_attr "atom_sse_attr" "mxcsr")
13142 (set_attr "prefix" "maybe_vex")
13143 (set_attr "memory" "load")])
13145 (define_insn "sse_stmxcsr"
13146 [(set (match_operand:SI 0 "memory_operand" "=m")
13147 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13150 [(set_attr "type" "sse")
13151 (set_attr "atom_sse_attr" "mxcsr")
13152 (set_attr "prefix" "maybe_vex")
13153 (set_attr "memory" "store")])
13155 (define_insn "sse2_clflush"
13156 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13160 [(set_attr "type" "sse")
13161 (set_attr "atom_sse_attr" "fence")
13162 (set_attr "memory" "unknown")])
13164 ;; As per AMD and Intel ISA manuals, the first operand is extensions
13165 ;; and it goes to %ecx. The second operand received is hints and it goes
13167 (define_insn "sse3_mwait"
13168 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
13169 (match_operand:SI 1 "register_operand" "a")]
13172 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13173 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13174 ;; we only need to set up 32bit registers.
13176 [(set_attr "length" "3")])
13178 (define_insn "sse3_monitor_<mode>"
13179 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13180 (match_operand:SI 1 "register_operand" "c")
13181 (match_operand:SI 2 "register_operand" "d")]
13184 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13185 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13186 ;; zero extended to 64bit, we only need to set up 32bit registers.
13188 [(set (attr "length")
13189 (symbol_ref ("(Pmode != word_mode) + 3")))])
13191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13193 ;; SSSE3 instructions
13195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13197 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13199 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13200 [(set (match_operand:V16HI 0 "register_operand" "=x")
13205 (ssse3_plusminus:HI
13207 (match_operand:V16HI 1 "register_operand" "x")
13208 (parallel [(const_int 0)]))
13209 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13210 (ssse3_plusminus:HI
13211 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13212 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13214 (ssse3_plusminus:HI
13215 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13216 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13217 (ssse3_plusminus:HI
13218 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13219 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13222 (ssse3_plusminus:HI
13223 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13224 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13225 (ssse3_plusminus:HI
13226 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13227 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13229 (ssse3_plusminus:HI
13230 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13231 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13232 (ssse3_plusminus:HI
13233 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13234 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13238 (ssse3_plusminus:HI
13240 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13241 (parallel [(const_int 0)]))
13242 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13243 (ssse3_plusminus:HI
13244 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13245 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13247 (ssse3_plusminus:HI
13248 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13249 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13250 (ssse3_plusminus:HI
13251 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13252 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13255 (ssse3_plusminus:HI
13256 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13257 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13258 (ssse3_plusminus:HI
13259 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13260 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13262 (ssse3_plusminus:HI
13263 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13264 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13265 (ssse3_plusminus:HI
13266 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13267 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13269 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13270 [(set_attr "type" "sseiadd")
13271 (set_attr "prefix_extra" "1")
13272 (set_attr "prefix" "vex")
13273 (set_attr "mode" "OI")])
13275 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13276 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13280 (ssse3_plusminus:HI
13282 (match_operand:V8HI 1 "register_operand" "0,x")
13283 (parallel [(const_int 0)]))
13284 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13285 (ssse3_plusminus:HI
13286 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13287 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13289 (ssse3_plusminus:HI
13290 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13291 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13292 (ssse3_plusminus:HI
13293 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13294 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13297 (ssse3_plusminus:HI
13299 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13300 (parallel [(const_int 0)]))
13301 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13302 (ssse3_plusminus:HI
13303 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13304 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13306 (ssse3_plusminus:HI
13307 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13308 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13309 (ssse3_plusminus:HI
13310 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13311 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13314 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13315 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13316 [(set_attr "isa" "noavx,avx")
13317 (set_attr "type" "sseiadd")
13318 (set_attr "atom_unit" "complex")
13319 (set_attr "prefix_data16" "1,*")
13320 (set_attr "prefix_extra" "1")
13321 (set_attr "prefix" "orig,vex")
13322 (set_attr "mode" "TI")])
13324 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13325 [(set (match_operand:V4HI 0 "register_operand" "=y")
13328 (ssse3_plusminus:HI
13330 (match_operand:V4HI 1 "register_operand" "0")
13331 (parallel [(const_int 0)]))
13332 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13333 (ssse3_plusminus:HI
13334 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13335 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13337 (ssse3_plusminus:HI
13339 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13340 (parallel [(const_int 0)]))
13341 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13342 (ssse3_plusminus:HI
13343 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13344 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13346 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13347 [(set_attr "type" "sseiadd")
13348 (set_attr "atom_unit" "complex")
13349 (set_attr "prefix_extra" "1")
13350 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13351 (set_attr "mode" "DI")])
13353 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13354 [(set (match_operand:V8SI 0 "register_operand" "=x")
13360 (match_operand:V8SI 1 "register_operand" "x")
13361 (parallel [(const_int 0)]))
13362 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13364 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13365 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13368 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13369 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13371 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13372 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13377 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13378 (parallel [(const_int 0)]))
13379 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13381 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13382 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13385 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13386 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13388 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13389 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13391 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13392 [(set_attr "type" "sseiadd")
13393 (set_attr "prefix_extra" "1")
13394 (set_attr "prefix" "vex")
13395 (set_attr "mode" "OI")])
13397 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13398 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13403 (match_operand:V4SI 1 "register_operand" "0,x")
13404 (parallel [(const_int 0)]))
13405 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13407 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13408 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13412 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
13413 (parallel [(const_int 0)]))
13414 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13416 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13417 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13420 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13421 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13422 [(set_attr "isa" "noavx,avx")
13423 (set_attr "type" "sseiadd")
13424 (set_attr "atom_unit" "complex")
13425 (set_attr "prefix_data16" "1,*")
13426 (set_attr "prefix_extra" "1")
13427 (set_attr "prefix" "orig,vex")
13428 (set_attr "mode" "TI")])
13430 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13431 [(set (match_operand:V2SI 0 "register_operand" "=y")
13435 (match_operand:V2SI 1 "register_operand" "0")
13436 (parallel [(const_int 0)]))
13437 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13440 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13441 (parallel [(const_int 0)]))
13442 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13444 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13445 [(set_attr "type" "sseiadd")
13446 (set_attr "atom_unit" "complex")
13447 (set_attr "prefix_extra" "1")
13448 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13449 (set_attr "mode" "DI")])
13451 (define_insn "avx2_pmaddubsw256"
13452 [(set (match_operand:V16HI 0 "register_operand" "=x")
13457 (match_operand:V32QI 1 "register_operand" "x")
13458 (parallel [(const_int 0) (const_int 2)
13459 (const_int 4) (const_int 6)
13460 (const_int 8) (const_int 10)
13461 (const_int 12) (const_int 14)
13462 (const_int 16) (const_int 18)
13463 (const_int 20) (const_int 22)
13464 (const_int 24) (const_int 26)
13465 (const_int 28) (const_int 30)])))
13468 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13469 (parallel [(const_int 0) (const_int 2)
13470 (const_int 4) (const_int 6)
13471 (const_int 8) (const_int 10)
13472 (const_int 12) (const_int 14)
13473 (const_int 16) (const_int 18)
13474 (const_int 20) (const_int 22)
13475 (const_int 24) (const_int 26)
13476 (const_int 28) (const_int 30)]))))
13479 (vec_select:V16QI (match_dup 1)
13480 (parallel [(const_int 1) (const_int 3)
13481 (const_int 5) (const_int 7)
13482 (const_int 9) (const_int 11)
13483 (const_int 13) (const_int 15)
13484 (const_int 17) (const_int 19)
13485 (const_int 21) (const_int 23)
13486 (const_int 25) (const_int 27)
13487 (const_int 29) (const_int 31)])))
13489 (vec_select:V16QI (match_dup 2)
13490 (parallel [(const_int 1) (const_int 3)
13491 (const_int 5) (const_int 7)
13492 (const_int 9) (const_int 11)
13493 (const_int 13) (const_int 15)
13494 (const_int 17) (const_int 19)
13495 (const_int 21) (const_int 23)
13496 (const_int 25) (const_int 27)
13497 (const_int 29) (const_int 31)]))))))]
13499 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13500 [(set_attr "type" "sseiadd")
13501 (set_attr "prefix_extra" "1")
13502 (set_attr "prefix" "vex")
13503 (set_attr "mode" "OI")])
13505 ;; The correct representation for this is absolutely enormous, and
13506 ;; surely not generally useful.
13507 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13508 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13509 (unspec:VI2_AVX512VL
13510 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13511 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13512 UNSPEC_PMADDUBSW512))]
13514 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13515 [(set_attr "type" "sseiadd")
13516 (set_attr "prefix" "evex")
13517 (set_attr "mode" "XI")])
13519 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13520 [(set (match_operand:V32HI 0 "register_operand" "=v")
13527 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13529 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13531 (const_vector:V32HI [(const_int 1) (const_int 1)
13532 (const_int 1) (const_int 1)
13533 (const_int 1) (const_int 1)
13534 (const_int 1) (const_int 1)
13535 (const_int 1) (const_int 1)
13536 (const_int 1) (const_int 1)
13537 (const_int 1) (const_int 1)
13538 (const_int 1) (const_int 1)
13539 (const_int 1) (const_int 1)
13540 (const_int 1) (const_int 1)
13541 (const_int 1) (const_int 1)
13542 (const_int 1) (const_int 1)
13543 (const_int 1) (const_int 1)
13544 (const_int 1) (const_int 1)
13545 (const_int 1) (const_int 1)
13546 (const_int 1) (const_int 1)]))
13549 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13550 [(set_attr "type" "sseimul")
13551 (set_attr "prefix" "evex")
13552 (set_attr "mode" "XI")])
13554 (define_insn "ssse3_pmaddubsw128"
13555 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13560 (match_operand:V16QI 1 "register_operand" "0,x")
13561 (parallel [(const_int 0) (const_int 2)
13562 (const_int 4) (const_int 6)
13563 (const_int 8) (const_int 10)
13564 (const_int 12) (const_int 14)])))
13567 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13568 (parallel [(const_int 0) (const_int 2)
13569 (const_int 4) (const_int 6)
13570 (const_int 8) (const_int 10)
13571 (const_int 12) (const_int 14)]))))
13574 (vec_select:V8QI (match_dup 1)
13575 (parallel [(const_int 1) (const_int 3)
13576 (const_int 5) (const_int 7)
13577 (const_int 9) (const_int 11)
13578 (const_int 13) (const_int 15)])))
13580 (vec_select:V8QI (match_dup 2)
13581 (parallel [(const_int 1) (const_int 3)
13582 (const_int 5) (const_int 7)
13583 (const_int 9) (const_int 11)
13584 (const_int 13) (const_int 15)]))))))]
13587 pmaddubsw\t{%2, %0|%0, %2}
13588 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13589 [(set_attr "isa" "noavx,avx")
13590 (set_attr "type" "sseiadd")
13591 (set_attr "atom_unit" "simul")
13592 (set_attr "prefix_data16" "1,*")
13593 (set_attr "prefix_extra" "1")
13594 (set_attr "prefix" "orig,vex")
13595 (set_attr "mode" "TI")])
13597 (define_insn "ssse3_pmaddubsw"
13598 [(set (match_operand:V4HI 0 "register_operand" "=y")
13603 (match_operand:V8QI 1 "register_operand" "0")
13604 (parallel [(const_int 0) (const_int 2)
13605 (const_int 4) (const_int 6)])))
13608 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13609 (parallel [(const_int 0) (const_int 2)
13610 (const_int 4) (const_int 6)]))))
13613 (vec_select:V4QI (match_dup 1)
13614 (parallel [(const_int 1) (const_int 3)
13615 (const_int 5) (const_int 7)])))
13617 (vec_select:V4QI (match_dup 2)
13618 (parallel [(const_int 1) (const_int 3)
13619 (const_int 5) (const_int 7)]))))))]
13621 "pmaddubsw\t{%2, %0|%0, %2}"
13622 [(set_attr "type" "sseiadd")
13623 (set_attr "atom_unit" "simul")
13624 (set_attr "prefix_extra" "1")
13625 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13626 (set_attr "mode" "DI")])
13628 (define_mode_iterator PMULHRSW
13629 [V4HI V8HI (V16HI "TARGET_AVX2")])
13631 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13632 [(set (match_operand:PMULHRSW 0 "register_operand")
13633 (vec_merge:PMULHRSW
13635 (lshiftrt:<ssedoublemode>
13636 (plus:<ssedoublemode>
13637 (lshiftrt:<ssedoublemode>
13638 (mult:<ssedoublemode>
13639 (sign_extend:<ssedoublemode>
13640 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13641 (sign_extend:<ssedoublemode>
13642 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13646 (match_operand:PMULHRSW 3 "register_operand")
13647 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13648 "TARGET_AVX512BW && TARGET_AVX512VL"
13650 operands[5] = CONST1_RTX(<MODE>mode);
13651 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13654 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13655 [(set (match_operand:PMULHRSW 0 "register_operand")
13657 (lshiftrt:<ssedoublemode>
13658 (plus:<ssedoublemode>
13659 (lshiftrt:<ssedoublemode>
13660 (mult:<ssedoublemode>
13661 (sign_extend:<ssedoublemode>
13662 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13663 (sign_extend:<ssedoublemode>
13664 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13670 operands[3] = CONST1_RTX(<MODE>mode);
13671 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13674 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13675 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13677 (lshiftrt:<ssedoublemode>
13678 (plus:<ssedoublemode>
13679 (lshiftrt:<ssedoublemode>
13680 (mult:<ssedoublemode>
13681 (sign_extend:<ssedoublemode>
13682 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13683 (sign_extend:<ssedoublemode>
13684 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13686 (match_operand:VI2_AVX2 3 "const1_operand"))
13688 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13689 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13691 pmulhrsw\t{%2, %0|%0, %2}
13692 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13693 [(set_attr "isa" "noavx,avx")
13694 (set_attr "type" "sseimul")
13695 (set_attr "prefix_data16" "1,*")
13696 (set_attr "prefix_extra" "1")
13697 (set_attr "prefix" "orig,maybe_evex")
13698 (set_attr "mode" "<sseinsnmode>")])
13700 (define_insn "*ssse3_pmulhrswv4hi3"
13701 [(set (match_operand:V4HI 0 "register_operand" "=y")
13708 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13710 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13712 (match_operand:V4HI 3 "const1_operand"))
13714 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13715 "pmulhrsw\t{%2, %0|%0, %2}"
13716 [(set_attr "type" "sseimul")
13717 (set_attr "prefix_extra" "1")
13718 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13719 (set_attr "mode" "DI")])
13721 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13722 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13724 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13725 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
13727 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13729 pshufb\t{%2, %0|%0, %2}
13730 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13731 [(set_attr "isa" "noavx,avx")
13732 (set_attr "type" "sselog1")
13733 (set_attr "prefix_data16" "1,*")
13734 (set_attr "prefix_extra" "1")
13735 (set_attr "prefix" "orig,maybe_evex")
13736 (set_attr "btver2_decode" "vector,vector")
13737 (set_attr "mode" "<sseinsnmode>")])
13739 (define_insn "ssse3_pshufbv8qi3"
13740 [(set (match_operand:V8QI 0 "register_operand" "=y")
13741 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13742 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13745 "pshufb\t{%2, %0|%0, %2}";
13746 [(set_attr "type" "sselog1")
13747 (set_attr "prefix_extra" "1")
13748 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13749 (set_attr "mode" "DI")])
13751 (define_insn "<ssse3_avx2>_psign<mode>3"
13752 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13754 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13755 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13759 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13760 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13761 [(set_attr "isa" "noavx,avx")
13762 (set_attr "type" "sselog1")
13763 (set_attr "prefix_data16" "1,*")
13764 (set_attr "prefix_extra" "1")
13765 (set_attr "prefix" "orig,vex")
13766 (set_attr "mode" "<sseinsnmode>")])
13768 (define_insn "ssse3_psign<mode>3"
13769 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13771 [(match_operand:MMXMODEI 1 "register_operand" "0")
13772 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13775 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13776 [(set_attr "type" "sselog1")
13777 (set_attr "prefix_extra" "1")
13778 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13779 (set_attr "mode" "DI")])
13781 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
13782 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13783 (vec_merge:VI1_AVX512
13785 [(match_operand:VI1_AVX512 1 "register_operand" "v")
13786 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
13787 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13789 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
13790 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13791 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13793 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13794 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13796 [(set_attr "type" "sseishft")
13797 (set_attr "atom_unit" "sishuf")
13798 (set_attr "prefix_extra" "1")
13799 (set_attr "length_immediate" "1")
13800 (set_attr "prefix" "evex")
13801 (set_attr "mode" "<sseinsnmode>")])
13803 (define_insn "<ssse3_avx2>_palignr<mode>"
13804 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13805 (unspec:SSESCALARMODE
13806 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13807 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13808 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13812 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13814 switch (which_alternative)
13817 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13819 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13821 gcc_unreachable ();
13824 [(set_attr "isa" "noavx,avx")
13825 (set_attr "type" "sseishft")
13826 (set_attr "atom_unit" "sishuf")
13827 (set_attr "prefix_data16" "1,*")
13828 (set_attr "prefix_extra" "1")
13829 (set_attr "length_immediate" "1")
13830 (set_attr "prefix" "orig,vex")
13831 (set_attr "mode" "<sseinsnmode>")])
13833 (define_insn "ssse3_palignrdi"
13834 [(set (match_operand:DI 0 "register_operand" "=y")
13835 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13836 (match_operand:DI 2 "nonimmediate_operand" "ym")
13837 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13841 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13842 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13844 [(set_attr "type" "sseishft")
13845 (set_attr "atom_unit" "sishuf")
13846 (set_attr "prefix_extra" "1")
13847 (set_attr "length_immediate" "1")
13848 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13849 (set_attr "mode" "DI")])
13851 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13852 ;; modes for abs instruction on pre AVX-512 targets.
13853 (define_mode_iterator VI1248_AVX512VL_AVX512BW
13854 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13855 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13856 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13857 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13859 (define_insn "*abs<mode>2"
13860 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13861 (abs:VI1248_AVX512VL_AVX512BW
13862 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
13864 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
13865 [(set_attr "type" "sselog1")
13866 (set_attr "prefix_data16" "1")
13867 (set_attr "prefix_extra" "1")
13868 (set_attr "prefix" "maybe_vex")
13869 (set_attr "mode" "<sseinsnmode>")])
13871 (define_insn "abs<mode>2_mask"
13872 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13873 (vec_merge:VI48_AVX512VL
13875 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13876 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13877 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13879 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13880 [(set_attr "type" "sselog1")
13881 (set_attr "prefix" "evex")
13882 (set_attr "mode" "<sseinsnmode>")])
13884 (define_insn "abs<mode>2_mask"
13885 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13886 (vec_merge:VI12_AVX512VL
13888 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
13889 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
13890 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13892 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13893 [(set_attr "type" "sselog1")
13894 (set_attr "prefix" "evex")
13895 (set_attr "mode" "<sseinsnmode>")])
13897 (define_expand "abs<mode>2"
13898 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
13899 (abs:VI1248_AVX512VL_AVX512BW
13900 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
13905 ix86_expand_sse2_abs (operands[0], operands[1]);
13910 (define_insn "abs<mode>2"
13911 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13913 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13915 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13916 [(set_attr "type" "sselog1")
13917 (set_attr "prefix_rep" "0")
13918 (set_attr "prefix_extra" "1")
13919 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13920 (set_attr "mode" "DI")])
13922 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13924 ;; AMD SSE4A instructions
13926 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13928 (define_insn "sse4a_movnt<mode>"
13929 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13931 [(match_operand:MODEF 1 "register_operand" "x")]
13934 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13935 [(set_attr "type" "ssemov")
13936 (set_attr "mode" "<MODE>")])
13938 (define_insn "sse4a_vmmovnt<mode>"
13939 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13940 (unspec:<ssescalarmode>
13941 [(vec_select:<ssescalarmode>
13942 (match_operand:VF_128 1 "register_operand" "x")
13943 (parallel [(const_int 0)]))]
13946 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13947 [(set_attr "type" "ssemov")
13948 (set_attr "mode" "<ssescalarmode>")])
13950 (define_insn "sse4a_extrqi"
13951 [(set (match_operand:V2DI 0 "register_operand" "=x")
13952 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13953 (match_operand 2 "const_0_to_255_operand")
13954 (match_operand 3 "const_0_to_255_operand")]
13957 "extrq\t{%3, %2, %0|%0, %2, %3}"
13958 [(set_attr "type" "sse")
13959 (set_attr "prefix_data16" "1")
13960 (set_attr "length_immediate" "2")
13961 (set_attr "mode" "TI")])
13963 (define_insn "sse4a_extrq"
13964 [(set (match_operand:V2DI 0 "register_operand" "=x")
13965 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13966 (match_operand:V16QI 2 "register_operand" "x")]
13969 "extrq\t{%2, %0|%0, %2}"
13970 [(set_attr "type" "sse")
13971 (set_attr "prefix_data16" "1")
13972 (set_attr "mode" "TI")])
13974 (define_insn "sse4a_insertqi"
13975 [(set (match_operand:V2DI 0 "register_operand" "=x")
13976 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13977 (match_operand:V2DI 2 "register_operand" "x")
13978 (match_operand 3 "const_0_to_255_operand")
13979 (match_operand 4 "const_0_to_255_operand")]
13982 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
13983 [(set_attr "type" "sseins")
13984 (set_attr "prefix_data16" "0")
13985 (set_attr "prefix_rep" "1")
13986 (set_attr "length_immediate" "2")
13987 (set_attr "mode" "TI")])
13989 (define_insn "sse4a_insertq"
13990 [(set (match_operand:V2DI 0 "register_operand" "=x")
13991 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13992 (match_operand:V2DI 2 "register_operand" "x")]
13995 "insertq\t{%2, %0|%0, %2}"
13996 [(set_attr "type" "sseins")
13997 (set_attr "prefix_data16" "0")
13998 (set_attr "prefix_rep" "1")
13999 (set_attr "mode" "TI")])
14001 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14003 ;; Intel SSE4.1 instructions
14005 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14007 ;; Mapping of immediate bits for blend instructions
14008 (define_mode_attr blendbits
14009 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14011 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14012 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14013 (vec_merge:VF_128_256
14014 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14015 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14016 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14019 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14020 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14021 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14022 [(set_attr "isa" "noavx,noavx,avx")
14023 (set_attr "type" "ssemov")
14024 (set_attr "length_immediate" "1")
14025 (set_attr "prefix_data16" "1,1,*")
14026 (set_attr "prefix_extra" "1")
14027 (set_attr "prefix" "orig,orig,vex")
14028 (set_attr "mode" "<MODE>")])
14030 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14031 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14033 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14034 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14035 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14039 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14040 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14041 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14042 [(set_attr "isa" "noavx,noavx,avx")
14043 (set_attr "type" "ssemov")
14044 (set_attr "length_immediate" "1")
14045 (set_attr "prefix_data16" "1,1,*")
14046 (set_attr "prefix_extra" "1")
14047 (set_attr "prefix" "orig,orig,vex")
14048 (set_attr "btver2_decode" "vector,vector,vector")
14049 (set_attr "mode" "<MODE>")])
14051 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14052 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14054 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14055 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14056 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14060 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14061 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14062 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14063 [(set_attr "isa" "noavx,noavx,avx")
14064 (set_attr "type" "ssemul")
14065 (set_attr "length_immediate" "1")
14066 (set_attr "prefix_data16" "1,1,*")
14067 (set_attr "prefix_extra" "1")
14068 (set_attr "prefix" "orig,orig,vex")
14069 (set_attr "btver2_decode" "vector,vector,vector")
14070 (set_attr "mode" "<MODE>")])
14072 ;; Mode attribute used by `vmovntdqa' pattern
14073 (define_mode_attr vi8_sse4_1_avx2_avx512
14074 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14076 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14077 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14078 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14081 "%vmovntdqa\t{%1, %0|%0, %1}"
14082 [(set_attr "type" "ssemov")
14083 (set_attr "prefix_extra" "1,1,*")
14084 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14085 (set_attr "mode" "<sseinsnmode>")])
14087 (define_insn "<sse4_1_avx2>_mpsadbw"
14088 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14090 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14091 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14092 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14096 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14097 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14098 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14099 [(set_attr "isa" "noavx,noavx,avx")
14100 (set_attr "type" "sselog1")
14101 (set_attr "length_immediate" "1")
14102 (set_attr "prefix_extra" "1")
14103 (set_attr "prefix" "orig,orig,vex")
14104 (set_attr "btver2_decode" "vector,vector,vector")
14105 (set_attr "mode" "<sseinsnmode>")])
14107 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14108 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14109 (vec_concat:VI2_AVX2
14110 (us_truncate:<ssehalfvecmode>
14111 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14112 (us_truncate:<ssehalfvecmode>
14113 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
14114 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14116 packusdw\t{%2, %0|%0, %2}
14117 packusdw\t{%2, %0|%0, %2}
14118 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14119 [(set_attr "isa" "noavx,noavx,avx")
14120 (set_attr "type" "sselog")
14121 (set_attr "prefix_extra" "1")
14122 (set_attr "prefix" "orig,orig,maybe_evex")
14123 (set_attr "mode" "<sseinsnmode>")])
14125 (define_insn "<sse4_1_avx2>_pblendvb"
14126 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14128 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14129 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14130 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14134 pblendvb\t{%3, %2, %0|%0, %2, %3}
14135 pblendvb\t{%3, %2, %0|%0, %2, %3}
14136 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14137 [(set_attr "isa" "noavx,noavx,avx")
14138 (set_attr "type" "ssemov")
14139 (set_attr "prefix_extra" "1")
14140 (set_attr "length_immediate" "*,*,1")
14141 (set_attr "prefix" "orig,orig,vex")
14142 (set_attr "btver2_decode" "vector,vector,vector")
14143 (set_attr "mode" "<sseinsnmode>")])
14145 (define_insn "sse4_1_pblendw"
14146 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14148 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14149 (match_operand:V8HI 1 "register_operand" "0,0,x")
14150 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14153 pblendw\t{%3, %2, %0|%0, %2, %3}
14154 pblendw\t{%3, %2, %0|%0, %2, %3}
14155 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14156 [(set_attr "isa" "noavx,noavx,avx")
14157 (set_attr "type" "ssemov")
14158 (set_attr "prefix_extra" "1")
14159 (set_attr "length_immediate" "1")
14160 (set_attr "prefix" "orig,orig,vex")
14161 (set_attr "mode" "TI")])
14163 ;; The builtin uses an 8-bit immediate. Expand that.
14164 (define_expand "avx2_pblendw"
14165 [(set (match_operand:V16HI 0 "register_operand")
14167 (match_operand:V16HI 2 "nonimmediate_operand")
14168 (match_operand:V16HI 1 "register_operand")
14169 (match_operand:SI 3 "const_0_to_255_operand")))]
14172 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14173 operands[3] = GEN_INT (val << 8 | val);
14176 (define_insn "*avx2_pblendw"
14177 [(set (match_operand:V16HI 0 "register_operand" "=x")
14179 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14180 (match_operand:V16HI 1 "register_operand" "x")
14181 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14184 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14185 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14187 [(set_attr "type" "ssemov")
14188 (set_attr "prefix_extra" "1")
14189 (set_attr "length_immediate" "1")
14190 (set_attr "prefix" "vex")
14191 (set_attr "mode" "OI")])
14193 (define_insn "avx2_pblendd<mode>"
14194 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14195 (vec_merge:VI4_AVX2
14196 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14197 (match_operand:VI4_AVX2 1 "register_operand" "x")
14198 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14200 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14201 [(set_attr "type" "ssemov")
14202 (set_attr "prefix_extra" "1")
14203 (set_attr "length_immediate" "1")
14204 (set_attr "prefix" "vex")
14205 (set_attr "mode" "<sseinsnmode>")])
14207 (define_insn "sse4_1_phminposuw"
14208 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14209 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
14210 UNSPEC_PHMINPOSUW))]
14212 "%vphminposuw\t{%1, %0|%0, %1}"
14213 [(set_attr "type" "sselog1")
14214 (set_attr "prefix_extra" "1")
14215 (set_attr "prefix" "maybe_vex")
14216 (set_attr "mode" "TI")])
14218 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14219 [(set (match_operand:V16HI 0 "register_operand" "=v")
14221 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14222 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14223 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14224 [(set_attr "type" "ssemov")
14225 (set_attr "prefix_extra" "1")
14226 (set_attr "prefix" "maybe_evex")
14227 (set_attr "mode" "OI")])
14229 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14230 [(set (match_operand:V32HI 0 "register_operand" "=v")
14232 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14234 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14235 [(set_attr "type" "ssemov")
14236 (set_attr "prefix_extra" "1")
14237 (set_attr "prefix" "evex")
14238 (set_attr "mode" "XI")])
14240 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14241 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14244 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14245 (parallel [(const_int 0) (const_int 1)
14246 (const_int 2) (const_int 3)
14247 (const_int 4) (const_int 5)
14248 (const_int 6) (const_int 7)]))))]
14249 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14250 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14251 [(set_attr "type" "ssemov")
14252 (set_attr "ssememalign" "64")
14253 (set_attr "prefix_extra" "1")
14254 (set_attr "prefix" "maybe_vex")
14255 (set_attr "mode" "TI")])
14257 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14258 [(set (match_operand:V16SI 0 "register_operand" "=v")
14260 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14262 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14263 [(set_attr "type" "ssemov")
14264 (set_attr "prefix" "evex")
14265 (set_attr "mode" "XI")])
14267 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14268 [(set (match_operand:V8SI 0 "register_operand" "=v")
14271 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14272 (parallel [(const_int 0) (const_int 1)
14273 (const_int 2) (const_int 3)
14274 (const_int 4) (const_int 5)
14275 (const_int 6) (const_int 7)]))))]
14276 "TARGET_AVX2 && <mask_avx512vl_condition>"
14277 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14278 [(set_attr "type" "ssemov")
14279 (set_attr "prefix_extra" "1")
14280 (set_attr "prefix" "maybe_evex")
14281 (set_attr "mode" "OI")])
14283 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14284 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14287 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14288 (parallel [(const_int 0) (const_int 1)
14289 (const_int 2) (const_int 3)]))))]
14290 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14291 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14292 [(set_attr "type" "ssemov")
14293 (set_attr "ssememalign" "32")
14294 (set_attr "prefix_extra" "1")
14295 (set_attr "prefix" "maybe_vex")
14296 (set_attr "mode" "TI")])
14298 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14299 [(set (match_operand:V16SI 0 "register_operand" "=v")
14301 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14303 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14304 [(set_attr "type" "ssemov")
14305 (set_attr "prefix" "evex")
14306 (set_attr "mode" "XI")])
14308 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14309 [(set (match_operand:V8SI 0 "register_operand" "=v")
14311 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14312 "TARGET_AVX2 && <mask_avx512vl_condition>"
14313 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14314 [(set_attr "type" "ssemov")
14315 (set_attr "prefix_extra" "1")
14316 (set_attr "prefix" "maybe_evex")
14317 (set_attr "mode" "OI")])
14319 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14320 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14323 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14324 (parallel [(const_int 0) (const_int 1)
14325 (const_int 2) (const_int 3)]))))]
14326 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14327 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14328 [(set_attr "type" "ssemov")
14329 (set_attr "ssememalign" "64")
14330 (set_attr "prefix_extra" "1")
14331 (set_attr "prefix" "maybe_vex")
14332 (set_attr "mode" "TI")])
14334 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14335 [(set (match_operand:V8DI 0 "register_operand" "=v")
14338 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14339 (parallel [(const_int 0) (const_int 1)
14340 (const_int 2) (const_int 3)
14341 (const_int 4) (const_int 5)
14342 (const_int 6) (const_int 7)]))))]
14344 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14345 [(set_attr "type" "ssemov")
14346 (set_attr "prefix" "evex")
14347 (set_attr "mode" "XI")])
14349 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14350 [(set (match_operand:V4DI 0 "register_operand" "=v")
14353 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14354 (parallel [(const_int 0) (const_int 1)
14355 (const_int 2) (const_int 3)]))))]
14356 "TARGET_AVX2 && <mask_avx512vl_condition>"
14357 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14358 [(set_attr "type" "ssemov")
14359 (set_attr "prefix_extra" "1")
14360 (set_attr "prefix" "maybe_evex")
14361 (set_attr "mode" "OI")])
14363 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14364 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14367 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14368 (parallel [(const_int 0) (const_int 1)]))))]
14369 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14370 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14371 [(set_attr "type" "ssemov")
14372 (set_attr "ssememalign" "16")
14373 (set_attr "prefix_extra" "1")
14374 (set_attr "prefix" "maybe_vex")
14375 (set_attr "mode" "TI")])
14377 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14378 [(set (match_operand:V8DI 0 "register_operand" "=v")
14380 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14382 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14383 [(set_attr "type" "ssemov")
14384 (set_attr "prefix" "evex")
14385 (set_attr "mode" "XI")])
14387 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14388 [(set (match_operand:V4DI 0 "register_operand" "=v")
14391 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14392 (parallel [(const_int 0) (const_int 1)
14393 (const_int 2) (const_int 3)]))))]
14394 "TARGET_AVX2 && <mask_avx512vl_condition>"
14395 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14396 [(set_attr "type" "ssemov")
14397 (set_attr "prefix_extra" "1")
14398 (set_attr "prefix" "maybe_evex")
14399 (set_attr "mode" "OI")])
14401 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14402 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14405 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14406 (parallel [(const_int 0) (const_int 1)]))))]
14407 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14408 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14409 [(set_attr "type" "ssemov")
14410 (set_attr "ssememalign" "32")
14411 (set_attr "prefix_extra" "1")
14412 (set_attr "prefix" "maybe_vex")
14413 (set_attr "mode" "TI")])
14415 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14416 [(set (match_operand:V8DI 0 "register_operand" "=v")
14418 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14420 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14421 [(set_attr "type" "ssemov")
14422 (set_attr "prefix" "evex")
14423 (set_attr "mode" "XI")])
14425 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14426 [(set (match_operand:V4DI 0 "register_operand" "=v")
14428 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14429 "TARGET_AVX2 && <mask_avx512vl_condition>"
14430 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14431 [(set_attr "type" "ssemov")
14432 (set_attr "prefix" "maybe_evex")
14433 (set_attr "prefix_extra" "1")
14434 (set_attr "mode" "OI")])
14436 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14437 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14440 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14441 (parallel [(const_int 0) (const_int 1)]))))]
14442 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14443 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14444 [(set_attr "type" "ssemov")
14445 (set_attr "ssememalign" "64")
14446 (set_attr "prefix_extra" "1")
14447 (set_attr "prefix" "maybe_vex")
14448 (set_attr "mode" "TI")])
14450 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14451 ;; setting FLAGS_REG. But it is not a really compare instruction.
14452 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14453 [(set (reg:CC FLAGS_REG)
14454 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14455 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14458 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14459 [(set_attr "type" "ssecomi")
14460 (set_attr "prefix_extra" "1")
14461 (set_attr "prefix" "vex")
14462 (set_attr "mode" "<MODE>")])
14464 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14465 ;; But it is not a really compare instruction.
14466 (define_insn "avx_ptest256"
14467 [(set (reg:CC FLAGS_REG)
14468 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14469 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14472 "vptest\t{%1, %0|%0, %1}"
14473 [(set_attr "type" "ssecomi")
14474 (set_attr "prefix_extra" "1")
14475 (set_attr "prefix" "vex")
14476 (set_attr "btver2_decode" "vector")
14477 (set_attr "mode" "OI")])
14479 (define_insn "sse4_1_ptest"
14480 [(set (reg:CC FLAGS_REG)
14481 (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
14482 (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
14485 "%vptest\t{%1, %0|%0, %1}"
14486 [(set_attr "type" "ssecomi")
14487 (set_attr "prefix_extra" "1")
14488 (set_attr "prefix" "maybe_vex")
14489 (set_attr "mode" "TI")])
14491 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14492 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14494 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
14495 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14498 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14499 [(set_attr "type" "ssecvt")
14500 (set (attr "prefix_data16")
14502 (match_test "TARGET_AVX")
14504 (const_string "1")))
14505 (set_attr "prefix_extra" "1")
14506 (set_attr "length_immediate" "1")
14507 (set_attr "prefix" "maybe_vex")
14508 (set_attr "mode" "<MODE>")])
14510 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14511 [(match_operand:<sseintvecmode> 0 "register_operand")
14512 (match_operand:VF1_128_256 1 "nonimmediate_operand")
14513 (match_operand:SI 2 "const_0_to_15_operand")]
14516 rtx tmp = gen_reg_rtx (<MODE>mode);
14519 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14522 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14526 (define_expand "avx512f_roundpd512"
14527 [(match_operand:V8DF 0 "register_operand")
14528 (match_operand:V8DF 1 "nonimmediate_operand")
14529 (match_operand:SI 2 "const_0_to_15_operand")]
14532 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14536 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14537 [(match_operand:<ssepackfltmode> 0 "register_operand")
14538 (match_operand:VF2 1 "nonimmediate_operand")
14539 (match_operand:VF2 2 "nonimmediate_operand")
14540 (match_operand:SI 3 "const_0_to_15_operand")]
14545 if (<MODE>mode == V2DFmode
14546 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14548 rtx tmp2 = gen_reg_rtx (V4DFmode);
14550 tmp0 = gen_reg_rtx (V4DFmode);
14551 tmp1 = force_reg (V2DFmode, operands[1]);
14553 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14554 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14555 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14559 tmp0 = gen_reg_rtx (<MODE>mode);
14560 tmp1 = gen_reg_rtx (<MODE>mode);
14563 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14566 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14569 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14574 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14575 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
14578 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
14579 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
14581 (match_operand:VF_128 1 "register_operand" "0,0,x")
14585 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14586 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14587 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14588 [(set_attr "isa" "noavx,noavx,avx")
14589 (set_attr "type" "ssecvt")
14590 (set_attr "length_immediate" "1")
14591 (set_attr "prefix_data16" "1,1,*")
14592 (set_attr "prefix_extra" "1")
14593 (set_attr "prefix" "orig,orig,vex")
14594 (set_attr "mode" "<MODE>")])
14596 (define_expand "round<mode>2"
14597 [(set (match_dup 4)
14599 (match_operand:VF 1 "register_operand")
14601 (set (match_operand:VF 0 "register_operand")
14603 [(match_dup 4) (match_dup 5)]
14605 "TARGET_ROUND && !flag_trapping_math"
14607 machine_mode scalar_mode;
14608 const struct real_format *fmt;
14609 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14610 rtx half, vec_half;
14612 scalar_mode = GET_MODE_INNER (<MODE>mode);
14614 /* load nextafter (0.5, 0.0) */
14615 fmt = REAL_MODE_FORMAT (scalar_mode);
14616 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14617 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14618 half = const_double_from_real_value (pred_half, scalar_mode);
14620 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14621 vec_half = force_reg (<MODE>mode, vec_half);
14623 operands[3] = gen_reg_rtx (<MODE>mode);
14624 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14626 operands[4] = gen_reg_rtx (<MODE>mode);
14627 operands[5] = GEN_INT (ROUND_TRUNC);
14630 (define_expand "round<mode>2_sfix"
14631 [(match_operand:<sseintvecmode> 0 "register_operand")
14632 (match_operand:VF1_128_256 1 "register_operand")]
14633 "TARGET_ROUND && !flag_trapping_math"
14635 rtx tmp = gen_reg_rtx (<MODE>mode);
14637 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14640 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14644 (define_expand "round<mode>2_vec_pack_sfix"
14645 [(match_operand:<ssepackfltmode> 0 "register_operand")
14646 (match_operand:VF2 1 "register_operand")
14647 (match_operand:VF2 2 "register_operand")]
14648 "TARGET_ROUND && !flag_trapping_math"
14652 if (<MODE>mode == V2DFmode
14653 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14655 rtx tmp2 = gen_reg_rtx (V4DFmode);
14657 tmp0 = gen_reg_rtx (V4DFmode);
14658 tmp1 = force_reg (V2DFmode, operands[1]);
14660 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14661 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14662 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14666 tmp0 = gen_reg_rtx (<MODE>mode);
14667 tmp1 = gen_reg_rtx (<MODE>mode);
14669 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14670 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14673 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14678 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14680 ;; Intel SSE4.2 string/text processing instructions
14682 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14684 (define_insn_and_split "sse4_2_pcmpestr"
14685 [(set (match_operand:SI 0 "register_operand" "=c,c")
14687 [(match_operand:V16QI 2 "register_operand" "x,x")
14688 (match_operand:SI 3 "register_operand" "a,a")
14689 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14690 (match_operand:SI 5 "register_operand" "d,d")
14691 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14693 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14701 (set (reg:CC FLAGS_REG)
14710 && can_create_pseudo_p ()"
14715 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14716 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14717 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14720 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14721 operands[3], operands[4],
14722 operands[5], operands[6]));
14724 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14725 operands[3], operands[4],
14726 operands[5], operands[6]));
14727 if (flags && !(ecx || xmm0))
14728 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14729 operands[2], operands[3],
14730 operands[4], operands[5],
14732 if (!(flags || ecx || xmm0))
14733 emit_note (NOTE_INSN_DELETED);
14737 [(set_attr "type" "sselog")
14738 (set_attr "prefix_data16" "1")
14739 (set_attr "prefix_extra" "1")
14740 (set_attr "ssememalign" "8")
14741 (set_attr "length_immediate" "1")
14742 (set_attr "memory" "none,load")
14743 (set_attr "mode" "TI")])
14745 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14746 [(set (match_operand:SI 0 "register_operand" "=c")
14748 [(match_operand:V16QI 2 "register_operand" "x")
14749 (match_operand:SI 3 "register_operand" "a")
14751 [(match_operand:V16QI 4 "memory_operand" "m")]
14753 (match_operand:SI 5 "register_operand" "d")
14754 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14756 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14760 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14764 (set (reg:CC FLAGS_REG)
14768 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14773 && can_create_pseudo_p ()"
14778 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14779 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14780 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14783 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14784 operands[3], operands[4],
14785 operands[5], operands[6]));
14787 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14788 operands[3], operands[4],
14789 operands[5], operands[6]));
14790 if (flags && !(ecx || xmm0))
14791 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14792 operands[2], operands[3],
14793 operands[4], operands[5],
14795 if (!(flags || ecx || xmm0))
14796 emit_note (NOTE_INSN_DELETED);
14800 [(set_attr "type" "sselog")
14801 (set_attr "prefix_data16" "1")
14802 (set_attr "prefix_extra" "1")
14803 (set_attr "ssememalign" "8")
14804 (set_attr "length_immediate" "1")
14805 (set_attr "memory" "load")
14806 (set_attr "mode" "TI")])
14808 (define_insn "sse4_2_pcmpestri"
14809 [(set (match_operand:SI 0 "register_operand" "=c,c")
14811 [(match_operand:V16QI 1 "register_operand" "x,x")
14812 (match_operand:SI 2 "register_operand" "a,a")
14813 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14814 (match_operand:SI 4 "register_operand" "d,d")
14815 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14817 (set (reg:CC FLAGS_REG)
14826 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14827 [(set_attr "type" "sselog")
14828 (set_attr "prefix_data16" "1")
14829 (set_attr "prefix_extra" "1")
14830 (set_attr "prefix" "maybe_vex")
14831 (set_attr "ssememalign" "8")
14832 (set_attr "length_immediate" "1")
14833 (set_attr "btver2_decode" "vector")
14834 (set_attr "memory" "none,load")
14835 (set_attr "mode" "TI")])
14837 (define_insn "sse4_2_pcmpestrm"
14838 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14840 [(match_operand:V16QI 1 "register_operand" "x,x")
14841 (match_operand:SI 2 "register_operand" "a,a")
14842 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14843 (match_operand:SI 4 "register_operand" "d,d")
14844 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14846 (set (reg:CC FLAGS_REG)
14855 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14856 [(set_attr "type" "sselog")
14857 (set_attr "prefix_data16" "1")
14858 (set_attr "prefix_extra" "1")
14859 (set_attr "ssememalign" "8")
14860 (set_attr "length_immediate" "1")
14861 (set_attr "prefix" "maybe_vex")
14862 (set_attr "btver2_decode" "vector")
14863 (set_attr "memory" "none,load")
14864 (set_attr "mode" "TI")])
14866 (define_insn "sse4_2_pcmpestr_cconly"
14867 [(set (reg:CC FLAGS_REG)
14869 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14870 (match_operand:SI 3 "register_operand" "a,a,a,a")
14871 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14872 (match_operand:SI 5 "register_operand" "d,d,d,d")
14873 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14875 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14876 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14879 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14880 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14881 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14882 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14883 [(set_attr "type" "sselog")
14884 (set_attr "prefix_data16" "1")
14885 (set_attr "prefix_extra" "1")
14886 (set_attr "ssememalign" "8")
14887 (set_attr "length_immediate" "1")
14888 (set_attr "memory" "none,load,none,load")
14889 (set_attr "btver2_decode" "vector,vector,vector,vector")
14890 (set_attr "prefix" "maybe_vex")
14891 (set_attr "mode" "TI")])
14893 (define_insn_and_split "sse4_2_pcmpistr"
14894 [(set (match_operand:SI 0 "register_operand" "=c,c")
14896 [(match_operand:V16QI 2 "register_operand" "x,x")
14897 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14898 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14900 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14906 (set (reg:CC FLAGS_REG)
14913 && can_create_pseudo_p ()"
14918 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14919 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14920 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14923 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14924 operands[3], operands[4]));
14926 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14927 operands[3], operands[4]));
14928 if (flags && !(ecx || xmm0))
14929 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14930 operands[2], operands[3],
14932 if (!(flags || ecx || xmm0))
14933 emit_note (NOTE_INSN_DELETED);
14937 [(set_attr "type" "sselog")
14938 (set_attr "prefix_data16" "1")
14939 (set_attr "prefix_extra" "1")
14940 (set_attr "ssememalign" "8")
14941 (set_attr "length_immediate" "1")
14942 (set_attr "memory" "none,load")
14943 (set_attr "mode" "TI")])
14945 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14946 [(set (match_operand:SI 0 "register_operand" "=c")
14948 [(match_operand:V16QI 2 "register_operand" "x")
14950 [(match_operand:V16QI 3 "memory_operand" "m")]
14952 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14954 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14957 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14960 (set (reg:CC FLAGS_REG)
14963 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14967 && can_create_pseudo_p ()"
14972 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14973 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14974 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14977 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14978 operands[3], operands[4]));
14980 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14981 operands[3], operands[4]));
14982 if (flags && !(ecx || xmm0))
14983 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14984 operands[2], operands[3],
14986 if (!(flags || ecx || xmm0))
14987 emit_note (NOTE_INSN_DELETED);
14991 [(set_attr "type" "sselog")
14992 (set_attr "prefix_data16" "1")
14993 (set_attr "prefix_extra" "1")
14994 (set_attr "ssememalign" "8")
14995 (set_attr "length_immediate" "1")
14996 (set_attr "memory" "load")
14997 (set_attr "mode" "TI")])
14999 (define_insn "sse4_2_pcmpistri"
15000 [(set (match_operand:SI 0 "register_operand" "=c,c")
15002 [(match_operand:V16QI 1 "register_operand" "x,x")
15003 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15004 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15006 (set (reg:CC FLAGS_REG)
15013 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15014 [(set_attr "type" "sselog")
15015 (set_attr "prefix_data16" "1")
15016 (set_attr "prefix_extra" "1")
15017 (set_attr "ssememalign" "8")
15018 (set_attr "length_immediate" "1")
15019 (set_attr "prefix" "maybe_vex")
15020 (set_attr "memory" "none,load")
15021 (set_attr "btver2_decode" "vector")
15022 (set_attr "mode" "TI")])
15024 (define_insn "sse4_2_pcmpistrm"
15025 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15027 [(match_operand:V16QI 1 "register_operand" "x,x")
15028 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15029 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15031 (set (reg:CC FLAGS_REG)
15038 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15039 [(set_attr "type" "sselog")
15040 (set_attr "prefix_data16" "1")
15041 (set_attr "prefix_extra" "1")
15042 (set_attr "ssememalign" "8")
15043 (set_attr "length_immediate" "1")
15044 (set_attr "prefix" "maybe_vex")
15045 (set_attr "memory" "none,load")
15046 (set_attr "btver2_decode" "vector")
15047 (set_attr "mode" "TI")])
15049 (define_insn "sse4_2_pcmpistr_cconly"
15050 [(set (reg:CC FLAGS_REG)
15052 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15053 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15054 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15056 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15057 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15060 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15061 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15062 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15063 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15064 [(set_attr "type" "sselog")
15065 (set_attr "prefix_data16" "1")
15066 (set_attr "prefix_extra" "1")
15067 (set_attr "ssememalign" "8")
15068 (set_attr "length_immediate" "1")
15069 (set_attr "memory" "none,load,none,load")
15070 (set_attr "prefix" "maybe_vex")
15071 (set_attr "btver2_decode" "vector,vector,vector,vector")
15072 (set_attr "mode" "TI")])
15074 ;; Packed float variants
15075 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15076 [(V8DI "V8SF") (V16SI "V16SF")])
15078 (define_expand "avx512pf_gatherpf<mode>sf"
15080 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15081 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15083 [(match_operand 2 "vsib_address_operand")
15084 (match_operand:VI48_512 1 "register_operand")
15085 (match_operand:SI 3 "const1248_operand")]))
15086 (match_operand:SI 4 "const_2_to_3_operand")]
15087 UNSPEC_GATHER_PREFETCH)]
15091 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15092 operands[3]), UNSPEC_VSIBADDR);
15095 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15097 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15098 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15100 [(match_operand:P 2 "vsib_address_operand" "Tv")
15101 (match_operand:VI48_512 1 "register_operand" "v")
15102 (match_operand:SI 3 "const1248_operand" "n")]
15104 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15105 UNSPEC_GATHER_PREFETCH)]
15108 switch (INTVAL (operands[4]))
15111 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15113 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15115 gcc_unreachable ();
15118 [(set_attr "type" "sse")
15119 (set_attr "prefix" "evex")
15120 (set_attr "mode" "XI")])
15122 (define_insn "*avx512pf_gatherpf<mode>sf"
15125 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15127 [(match_operand:P 1 "vsib_address_operand" "Tv")
15128 (match_operand:VI48_512 0 "register_operand" "v")
15129 (match_operand:SI 2 "const1248_operand" "n")]
15131 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15132 UNSPEC_GATHER_PREFETCH)]
15135 switch (INTVAL (operands[3]))
15138 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15140 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15142 gcc_unreachable ();
15145 [(set_attr "type" "sse")
15146 (set_attr "prefix" "evex")
15147 (set_attr "mode" "XI")])
15149 ;; Packed double variants
15150 (define_expand "avx512pf_gatherpf<mode>df"
15152 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15155 [(match_operand 2 "vsib_address_operand")
15156 (match_operand:VI4_256_8_512 1 "register_operand")
15157 (match_operand:SI 3 "const1248_operand")]))
15158 (match_operand:SI 4 "const_2_to_3_operand")]
15159 UNSPEC_GATHER_PREFETCH)]
15163 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15164 operands[3]), UNSPEC_VSIBADDR);
15167 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15169 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15170 (match_operator:V8DF 5 "vsib_mem_operator"
15172 [(match_operand:P 2 "vsib_address_operand" "Tv")
15173 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15174 (match_operand:SI 3 "const1248_operand" "n")]
15176 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15177 UNSPEC_GATHER_PREFETCH)]
15180 switch (INTVAL (operands[4]))
15183 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15185 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15187 gcc_unreachable ();
15190 [(set_attr "type" "sse")
15191 (set_attr "prefix" "evex")
15192 (set_attr "mode" "XI")])
15194 (define_insn "*avx512pf_gatherpf<mode>df"
15197 (match_operator:V8DF 4 "vsib_mem_operator"
15199 [(match_operand:P 1 "vsib_address_operand" "Tv")
15200 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15201 (match_operand:SI 2 "const1248_operand" "n")]
15203 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15204 UNSPEC_GATHER_PREFETCH)]
15207 switch (INTVAL (operands[3]))
15210 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15212 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15214 gcc_unreachable ();
15217 [(set_attr "type" "sse")
15218 (set_attr "prefix" "evex")
15219 (set_attr "mode" "XI")])
15221 ;; Packed float variants
15222 (define_expand "avx512pf_scatterpf<mode>sf"
15224 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15225 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15227 [(match_operand 2 "vsib_address_operand")
15228 (match_operand:VI48_512 1 "register_operand")
15229 (match_operand:SI 3 "const1248_operand")]))
15230 (match_operand:SI 4 "const2367_operand")]
15231 UNSPEC_SCATTER_PREFETCH)]
15235 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15236 operands[3]), UNSPEC_VSIBADDR);
15239 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15241 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15242 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15244 [(match_operand:P 2 "vsib_address_operand" "Tv")
15245 (match_operand:VI48_512 1 "register_operand" "v")
15246 (match_operand:SI 3 "const1248_operand" "n")]
15248 (match_operand:SI 4 "const2367_operand" "n")]
15249 UNSPEC_SCATTER_PREFETCH)]
15252 switch (INTVAL (operands[4]))
15256 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15259 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15261 gcc_unreachable ();
15264 [(set_attr "type" "sse")
15265 (set_attr "prefix" "evex")
15266 (set_attr "mode" "XI")])
15268 (define_insn "*avx512pf_scatterpf<mode>sf"
15271 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15273 [(match_operand:P 1 "vsib_address_operand" "Tv")
15274 (match_operand:VI48_512 0 "register_operand" "v")
15275 (match_operand:SI 2 "const1248_operand" "n")]
15277 (match_operand:SI 3 "const2367_operand" "n")]
15278 UNSPEC_SCATTER_PREFETCH)]
15281 switch (INTVAL (operands[3]))
15285 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15288 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15290 gcc_unreachable ();
15293 [(set_attr "type" "sse")
15294 (set_attr "prefix" "evex")
15295 (set_attr "mode" "XI")])
15297 ;; Packed double variants
15298 (define_expand "avx512pf_scatterpf<mode>df"
15300 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15303 [(match_operand 2 "vsib_address_operand")
15304 (match_operand:VI4_256_8_512 1 "register_operand")
15305 (match_operand:SI 3 "const1248_operand")]))
15306 (match_operand:SI 4 "const2367_operand")]
15307 UNSPEC_SCATTER_PREFETCH)]
15311 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15312 operands[3]), UNSPEC_VSIBADDR);
15315 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15317 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15318 (match_operator:V8DF 5 "vsib_mem_operator"
15320 [(match_operand:P 2 "vsib_address_operand" "Tv")
15321 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15322 (match_operand:SI 3 "const1248_operand" "n")]
15324 (match_operand:SI 4 "const2367_operand" "n")]
15325 UNSPEC_SCATTER_PREFETCH)]
15328 switch (INTVAL (operands[4]))
15332 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15335 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15337 gcc_unreachable ();
15340 [(set_attr "type" "sse")
15341 (set_attr "prefix" "evex")
15342 (set_attr "mode" "XI")])
15344 (define_insn "*avx512pf_scatterpf<mode>df"
15347 (match_operator:V8DF 4 "vsib_mem_operator"
15349 [(match_operand:P 1 "vsib_address_operand" "Tv")
15350 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15351 (match_operand:SI 2 "const1248_operand" "n")]
15353 (match_operand:SI 3 "const2367_operand" "n")]
15354 UNSPEC_SCATTER_PREFETCH)]
15357 switch (INTVAL (operands[3]))
15361 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15364 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15366 gcc_unreachable ();
15369 [(set_attr "type" "sse")
15370 (set_attr "prefix" "evex")
15371 (set_attr "mode" "XI")])
15373 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15374 [(set (match_operand:VF_512 0 "register_operand" "=v")
15376 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15379 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15380 [(set_attr "prefix" "evex")
15381 (set_attr "type" "sse")
15382 (set_attr "mode" "<MODE>")])
15384 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15385 [(set (match_operand:VF_512 0 "register_operand" "=v")
15387 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15390 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15391 [(set_attr "prefix" "evex")
15392 (set_attr "type" "sse")
15393 (set_attr "mode" "<MODE>")])
15395 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15396 [(set (match_operand:VF_128 0 "register_operand" "=v")
15399 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15401 (match_operand:VF_128 2 "register_operand" "v")
15404 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15405 [(set_attr "length_immediate" "1")
15406 (set_attr "prefix" "evex")
15407 (set_attr "type" "sse")
15408 (set_attr "mode" "<MODE>")])
15410 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15411 [(set (match_operand:VF_512 0 "register_operand" "=v")
15413 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15416 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15417 [(set_attr "prefix" "evex")
15418 (set_attr "type" "sse")
15419 (set_attr "mode" "<MODE>")])
15421 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15422 [(set (match_operand:VF_128 0 "register_operand" "=v")
15425 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15427 (match_operand:VF_128 2 "register_operand" "v")
15430 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15431 [(set_attr "length_immediate" "1")
15432 (set_attr "type" "sse")
15433 (set_attr "prefix" "evex")
15434 (set_attr "mode" "<MODE>")])
15436 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15438 ;; XOP instructions
15440 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15442 (define_code_iterator xop_plus [plus ss_plus])
15444 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15445 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15447 ;; XOP parallel integer multiply/add instructions.
15449 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15450 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15453 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15454 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15455 (match_operand:VI24_128 3 "register_operand" "x")))]
15457 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15458 [(set_attr "type" "ssemuladd")
15459 (set_attr "mode" "TI")])
15461 (define_insn "xop_p<macs>dql"
15462 [(set (match_operand:V2DI 0 "register_operand" "=x")
15467 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15468 (parallel [(const_int 0) (const_int 2)])))
15471 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15472 (parallel [(const_int 0) (const_int 2)]))))
15473 (match_operand:V2DI 3 "register_operand" "x")))]
15475 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15476 [(set_attr "type" "ssemuladd")
15477 (set_attr "mode" "TI")])
15479 (define_insn "xop_p<macs>dqh"
15480 [(set (match_operand:V2DI 0 "register_operand" "=x")
15485 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15486 (parallel [(const_int 1) (const_int 3)])))
15489 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15490 (parallel [(const_int 1) (const_int 3)]))))
15491 (match_operand:V2DI 3 "register_operand" "x")))]
15493 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15494 [(set_attr "type" "ssemuladd")
15495 (set_attr "mode" "TI")])
15497 ;; XOP parallel integer multiply/add instructions for the intrinisics
15498 (define_insn "xop_p<macs>wd"
15499 [(set (match_operand:V4SI 0 "register_operand" "=x")
15504 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15505 (parallel [(const_int 1) (const_int 3)
15506 (const_int 5) (const_int 7)])))
15509 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15510 (parallel [(const_int 1) (const_int 3)
15511 (const_int 5) (const_int 7)]))))
15512 (match_operand:V4SI 3 "register_operand" "x")))]
15514 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15515 [(set_attr "type" "ssemuladd")
15516 (set_attr "mode" "TI")])
15518 (define_insn "xop_p<madcs>wd"
15519 [(set (match_operand:V4SI 0 "register_operand" "=x")
15525 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15526 (parallel [(const_int 0) (const_int 2)
15527 (const_int 4) (const_int 6)])))
15530 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15531 (parallel [(const_int 0) (const_int 2)
15532 (const_int 4) (const_int 6)]))))
15537 (parallel [(const_int 1) (const_int 3)
15538 (const_int 5) (const_int 7)])))
15542 (parallel [(const_int 1) (const_int 3)
15543 (const_int 5) (const_int 7)])))))
15544 (match_operand:V4SI 3 "register_operand" "x")))]
15546 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15547 [(set_attr "type" "ssemuladd")
15548 (set_attr "mode" "TI")])
15550 ;; XOP parallel XMM conditional moves
15551 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15552 [(set (match_operand:V 0 "register_operand" "=x,x")
15554 (match_operand:V 3 "nonimmediate_operand" "x,m")
15555 (match_operand:V 1 "register_operand" "x,x")
15556 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15558 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15559 [(set_attr "type" "sse4arg")])
15561 ;; XOP horizontal add/subtract instructions
15562 (define_insn "xop_phadd<u>bw"
15563 [(set (match_operand:V8HI 0 "register_operand" "=x")
15567 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15568 (parallel [(const_int 0) (const_int 2)
15569 (const_int 4) (const_int 6)
15570 (const_int 8) (const_int 10)
15571 (const_int 12) (const_int 14)])))
15575 (parallel [(const_int 1) (const_int 3)
15576 (const_int 5) (const_int 7)
15577 (const_int 9) (const_int 11)
15578 (const_int 13) (const_int 15)])))))]
15580 "vphadd<u>bw\t{%1, %0|%0, %1}"
15581 [(set_attr "type" "sseiadd1")])
15583 (define_insn "xop_phadd<u>bd"
15584 [(set (match_operand:V4SI 0 "register_operand" "=x")
15589 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15590 (parallel [(const_int 0) (const_int 4)
15591 (const_int 8) (const_int 12)])))
15595 (parallel [(const_int 1) (const_int 5)
15596 (const_int 9) (const_int 13)]))))
15601 (parallel [(const_int 2) (const_int 6)
15602 (const_int 10) (const_int 14)])))
15606 (parallel [(const_int 3) (const_int 7)
15607 (const_int 11) (const_int 15)]))))))]
15609 "vphadd<u>bd\t{%1, %0|%0, %1}"
15610 [(set_attr "type" "sseiadd1")])
15612 (define_insn "xop_phadd<u>bq"
15613 [(set (match_operand:V2DI 0 "register_operand" "=x")
15619 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15620 (parallel [(const_int 0) (const_int 8)])))
15624 (parallel [(const_int 1) (const_int 9)]))))
15629 (parallel [(const_int 2) (const_int 10)])))
15633 (parallel [(const_int 3) (const_int 11)])))))
15639 (parallel [(const_int 4) (const_int 12)])))
15643 (parallel [(const_int 5) (const_int 13)]))))
15648 (parallel [(const_int 6) (const_int 14)])))
15652 (parallel [(const_int 7) (const_int 15)])))))))]
15654 "vphadd<u>bq\t{%1, %0|%0, %1}"
15655 [(set_attr "type" "sseiadd1")])
15657 (define_insn "xop_phadd<u>wd"
15658 [(set (match_operand:V4SI 0 "register_operand" "=x")
15662 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15663 (parallel [(const_int 0) (const_int 2)
15664 (const_int 4) (const_int 6)])))
15668 (parallel [(const_int 1) (const_int 3)
15669 (const_int 5) (const_int 7)])))))]
15671 "vphadd<u>wd\t{%1, %0|%0, %1}"
15672 [(set_attr "type" "sseiadd1")])
15674 (define_insn "xop_phadd<u>wq"
15675 [(set (match_operand:V2DI 0 "register_operand" "=x")
15680 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15681 (parallel [(const_int 0) (const_int 4)])))
15685 (parallel [(const_int 1) (const_int 5)]))))
15690 (parallel [(const_int 2) (const_int 6)])))
15694 (parallel [(const_int 3) (const_int 7)]))))))]
15696 "vphadd<u>wq\t{%1, %0|%0, %1}"
15697 [(set_attr "type" "sseiadd1")])
15699 (define_insn "xop_phadd<u>dq"
15700 [(set (match_operand:V2DI 0 "register_operand" "=x")
15704 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15705 (parallel [(const_int 0) (const_int 2)])))
15709 (parallel [(const_int 1) (const_int 3)])))))]
15711 "vphadd<u>dq\t{%1, %0|%0, %1}"
15712 [(set_attr "type" "sseiadd1")])
15714 (define_insn "xop_phsubbw"
15715 [(set (match_operand:V8HI 0 "register_operand" "=x")
15719 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15720 (parallel [(const_int 0) (const_int 2)
15721 (const_int 4) (const_int 6)
15722 (const_int 8) (const_int 10)
15723 (const_int 12) (const_int 14)])))
15727 (parallel [(const_int 1) (const_int 3)
15728 (const_int 5) (const_int 7)
15729 (const_int 9) (const_int 11)
15730 (const_int 13) (const_int 15)])))))]
15732 "vphsubbw\t{%1, %0|%0, %1}"
15733 [(set_attr "type" "sseiadd1")])
15735 (define_insn "xop_phsubwd"
15736 [(set (match_operand:V4SI 0 "register_operand" "=x")
15740 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15741 (parallel [(const_int 0) (const_int 2)
15742 (const_int 4) (const_int 6)])))
15746 (parallel [(const_int 1) (const_int 3)
15747 (const_int 5) (const_int 7)])))))]
15749 "vphsubwd\t{%1, %0|%0, %1}"
15750 [(set_attr "type" "sseiadd1")])
15752 (define_insn "xop_phsubdq"
15753 [(set (match_operand:V2DI 0 "register_operand" "=x")
15757 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15758 (parallel [(const_int 0) (const_int 2)])))
15762 (parallel [(const_int 1) (const_int 3)])))))]
15764 "vphsubdq\t{%1, %0|%0, %1}"
15765 [(set_attr "type" "sseiadd1")])
15767 ;; XOP permute instructions
15768 (define_insn "xop_pperm"
15769 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15771 [(match_operand:V16QI 1 "register_operand" "x,x")
15772 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15773 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15774 UNSPEC_XOP_PERMUTE))]
15775 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15776 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15777 [(set_attr "type" "sse4arg")
15778 (set_attr "mode" "TI")])
15780 ;; XOP pack instructions that combine two vectors into a smaller vector
15781 (define_insn "xop_pperm_pack_v2di_v4si"
15782 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15785 (match_operand:V2DI 1 "register_operand" "x,x"))
15787 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15788 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15789 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15790 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15791 [(set_attr "type" "sse4arg")
15792 (set_attr "mode" "TI")])
15794 (define_insn "xop_pperm_pack_v4si_v8hi"
15795 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15798 (match_operand:V4SI 1 "register_operand" "x,x"))
15800 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15801 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15802 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15803 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15804 [(set_attr "type" "sse4arg")
15805 (set_attr "mode" "TI")])
15807 (define_insn "xop_pperm_pack_v8hi_v16qi"
15808 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15811 (match_operand:V8HI 1 "register_operand" "x,x"))
15813 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15814 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15815 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15816 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15817 [(set_attr "type" "sse4arg")
15818 (set_attr "mode" "TI")])
15820 ;; XOP packed rotate instructions
15821 (define_expand "rotl<mode>3"
15822 [(set (match_operand:VI_128 0 "register_operand")
15824 (match_operand:VI_128 1 "nonimmediate_operand")
15825 (match_operand:SI 2 "general_operand")))]
15828 /* If we were given a scalar, convert it to parallel */
15829 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15831 rtvec vs = rtvec_alloc (<ssescalarnum>);
15832 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15833 rtx reg = gen_reg_rtx (<MODE>mode);
15834 rtx op2 = operands[2];
15837 if (GET_MODE (op2) != <ssescalarmode>mode)
15839 op2 = gen_reg_rtx (<ssescalarmode>mode);
15840 convert_move (op2, operands[2], false);
15843 for (i = 0; i < <ssescalarnum>; i++)
15844 RTVEC_ELT (vs, i) = op2;
15846 emit_insn (gen_vec_init<mode> (reg, par));
15847 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15852 (define_expand "rotr<mode>3"
15853 [(set (match_operand:VI_128 0 "register_operand")
15855 (match_operand:VI_128 1 "nonimmediate_operand")
15856 (match_operand:SI 2 "general_operand")))]
15859 /* If we were given a scalar, convert it to parallel */
15860 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15862 rtvec vs = rtvec_alloc (<ssescalarnum>);
15863 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15864 rtx neg = gen_reg_rtx (<MODE>mode);
15865 rtx reg = gen_reg_rtx (<MODE>mode);
15866 rtx op2 = operands[2];
15869 if (GET_MODE (op2) != <ssescalarmode>mode)
15871 op2 = gen_reg_rtx (<ssescalarmode>mode);
15872 convert_move (op2, operands[2], false);
15875 for (i = 0; i < <ssescalarnum>; i++)
15876 RTVEC_ELT (vs, i) = op2;
15878 emit_insn (gen_vec_init<mode> (reg, par));
15879 emit_insn (gen_neg<mode>2 (neg, reg));
15880 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15885 (define_insn "xop_rotl<mode>3"
15886 [(set (match_operand:VI_128 0 "register_operand" "=x")
15888 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15889 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15891 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15892 [(set_attr "type" "sseishft")
15893 (set_attr "length_immediate" "1")
15894 (set_attr "mode" "TI")])
15896 (define_insn "xop_rotr<mode>3"
15897 [(set (match_operand:VI_128 0 "register_operand" "=x")
15899 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15900 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15904 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15905 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15907 [(set_attr "type" "sseishft")
15908 (set_attr "length_immediate" "1")
15909 (set_attr "mode" "TI")])
15911 (define_expand "vrotr<mode>3"
15912 [(match_operand:VI_128 0 "register_operand")
15913 (match_operand:VI_128 1 "register_operand")
15914 (match_operand:VI_128 2 "register_operand")]
15917 rtx reg = gen_reg_rtx (<MODE>mode);
15918 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15919 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15923 (define_expand "vrotl<mode>3"
15924 [(match_operand:VI_128 0 "register_operand")
15925 (match_operand:VI_128 1 "register_operand")
15926 (match_operand:VI_128 2 "register_operand")]
15929 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15933 (define_insn "xop_vrotl<mode>3"
15934 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15935 (if_then_else:VI_128
15937 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15940 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15944 (neg:VI_128 (match_dup 2)))))]
15945 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15946 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15947 [(set_attr "type" "sseishft")
15948 (set_attr "prefix_data16" "0")
15949 (set_attr "prefix_extra" "2")
15950 (set_attr "mode" "TI")])
15952 ;; XOP packed shift instructions.
15953 (define_expand "vlshr<mode>3"
15954 [(set (match_operand:VI12_128 0 "register_operand")
15956 (match_operand:VI12_128 1 "register_operand")
15957 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15960 rtx neg = gen_reg_rtx (<MODE>mode);
15961 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15962 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15966 (define_expand "vlshr<mode>3"
15967 [(set (match_operand:VI48_128 0 "register_operand")
15969 (match_operand:VI48_128 1 "register_operand")
15970 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15971 "TARGET_AVX2 || TARGET_XOP"
15975 rtx neg = gen_reg_rtx (<MODE>mode);
15976 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15977 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15982 (define_expand "vlshr<mode>3"
15983 [(set (match_operand:VI48_512 0 "register_operand")
15985 (match_operand:VI48_512 1 "register_operand")
15986 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15989 (define_expand "vlshr<mode>3"
15990 [(set (match_operand:VI48_256 0 "register_operand")
15992 (match_operand:VI48_256 1 "register_operand")
15993 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15996 (define_expand "vashrv8hi3<mask_name>"
15997 [(set (match_operand:V8HI 0 "register_operand")
15999 (match_operand:V8HI 1 "register_operand")
16000 (match_operand:V8HI 2 "nonimmediate_operand")))]
16001 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16005 rtx neg = gen_reg_rtx (V8HImode);
16006 emit_insn (gen_negv8hi2 (neg, operands[2]));
16007 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16012 (define_expand "vashrv16qi3"
16013 [(set (match_operand:V16QI 0 "register_operand")
16015 (match_operand:V16QI 1 "register_operand")
16016 (match_operand:V16QI 2 "nonimmediate_operand")))]
16019 rtx neg = gen_reg_rtx (V16QImode);
16020 emit_insn (gen_negv16qi2 (neg, operands[2]));
16021 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16025 (define_expand "vashrv2di3<mask_name>"
16026 [(set (match_operand:V2DI 0 "register_operand")
16028 (match_operand:V2DI 1 "register_operand")
16029 (match_operand:V2DI 2 "nonimmediate_operand")))]
16030 "TARGET_XOP || TARGET_AVX512VL"
16034 rtx neg = gen_reg_rtx (V2DImode);
16035 emit_insn (gen_negv2di2 (neg, operands[2]));
16036 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16041 (define_expand "vashrv4si3"
16042 [(set (match_operand:V4SI 0 "register_operand")
16043 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16044 (match_operand:V4SI 2 "nonimmediate_operand")))]
16045 "TARGET_AVX2 || TARGET_XOP"
16049 rtx neg = gen_reg_rtx (V4SImode);
16050 emit_insn (gen_negv4si2 (neg, operands[2]));
16051 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16056 (define_expand "vashrv16si3"
16057 [(set (match_operand:V16SI 0 "register_operand")
16058 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16059 (match_operand:V16SI 2 "nonimmediate_operand")))]
16062 (define_expand "vashrv8si3"
16063 [(set (match_operand:V8SI 0 "register_operand")
16064 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16065 (match_operand:V8SI 2 "nonimmediate_operand")))]
16068 (define_expand "vashl<mode>3"
16069 [(set (match_operand:VI12_128 0 "register_operand")
16071 (match_operand:VI12_128 1 "register_operand")
16072 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16075 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16079 (define_expand "vashl<mode>3"
16080 [(set (match_operand:VI48_128 0 "register_operand")
16082 (match_operand:VI48_128 1 "register_operand")
16083 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16084 "TARGET_AVX2 || TARGET_XOP"
16088 operands[2] = force_reg (<MODE>mode, operands[2]);
16089 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16094 (define_expand "vashl<mode>3"
16095 [(set (match_operand:VI48_512 0 "register_operand")
16097 (match_operand:VI48_512 1 "register_operand")
16098 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16101 (define_expand "vashl<mode>3"
16102 [(set (match_operand:VI48_256 0 "register_operand")
16104 (match_operand:VI48_256 1 "register_operand")
16105 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16108 (define_insn "xop_sha<mode>3"
16109 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16110 (if_then_else:VI_128
16112 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16115 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16119 (neg:VI_128 (match_dup 2)))))]
16120 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16121 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16122 [(set_attr "type" "sseishft")
16123 (set_attr "prefix_data16" "0")
16124 (set_attr "prefix_extra" "2")
16125 (set_attr "mode" "TI")])
16127 (define_insn "xop_shl<mode>3"
16128 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16129 (if_then_else:VI_128
16131 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16134 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16138 (neg:VI_128 (match_dup 2)))))]
16139 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16140 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16141 [(set_attr "type" "sseishft")
16142 (set_attr "prefix_data16" "0")
16143 (set_attr "prefix_extra" "2")
16144 (set_attr "mode" "TI")])
16146 (define_expand "<shift_insn><mode>3"
16147 [(set (match_operand:VI1_AVX512 0 "register_operand")
16148 (any_shift:VI1_AVX512
16149 (match_operand:VI1_AVX512 1 "register_operand")
16150 (match_operand:SI 2 "nonmemory_operand")))]
16153 if (TARGET_XOP && <MODE>mode == V16QImode)
16155 bool negate = false;
16156 rtx (*gen) (rtx, rtx, rtx);
16160 if (<CODE> != ASHIFT)
16162 if (CONST_INT_P (operands[2]))
16163 operands[2] = GEN_INT (-INTVAL (operands[2]));
16167 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16168 for (i = 0; i < 16; i++)
16169 XVECEXP (par, 0, i) = operands[2];
16171 tmp = gen_reg_rtx (V16QImode);
16172 emit_insn (gen_vec_initv16qi (tmp, par));
16175 emit_insn (gen_negv16qi2 (tmp, tmp));
16177 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16178 emit_insn (gen (operands[0], operands[1], tmp));
16181 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16185 (define_expand "ashrv2di3"
16186 [(set (match_operand:V2DI 0 "register_operand")
16188 (match_operand:V2DI 1 "register_operand")
16189 (match_operand:DI 2 "nonmemory_operand")))]
16190 "TARGET_XOP || TARGET_AVX512VL"
16192 if (!TARGET_AVX512VL)
16194 rtx reg = gen_reg_rtx (V2DImode);
16196 bool negate = false;
16199 if (CONST_INT_P (operands[2]))
16200 operands[2] = GEN_INT (-INTVAL (operands[2]));
16204 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16205 for (i = 0; i < 2; i++)
16206 XVECEXP (par, 0, i) = operands[2];
16208 emit_insn (gen_vec_initv2di (reg, par));
16211 emit_insn (gen_negv2di2 (reg, reg));
16213 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16218 ;; XOP FRCZ support
16219 (define_insn "xop_frcz<mode>2"
16220 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16222 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16225 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16226 [(set_attr "type" "ssecvt1")
16227 (set_attr "mode" "<MODE>")])
16229 (define_expand "xop_vmfrcz<mode>2"
16230 [(set (match_operand:VF_128 0 "register_operand")
16233 [(match_operand:VF_128 1 "nonimmediate_operand")]
16238 "operands[2] = CONST0_RTX (<MODE>mode);")
16240 (define_insn "*xop_vmfrcz<mode>2"
16241 [(set (match_operand:VF_128 0 "register_operand" "=x")
16244 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16246 (match_operand:VF_128 2 "const0_operand")
16249 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16250 [(set_attr "type" "ssecvt1")
16251 (set_attr "mode" "<MODE>")])
16253 (define_insn "xop_maskcmp<mode>3"
16254 [(set (match_operand:VI_128 0 "register_operand" "=x")
16255 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16256 [(match_operand:VI_128 2 "register_operand" "x")
16257 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16259 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16260 [(set_attr "type" "sse4arg")
16261 (set_attr "prefix_data16" "0")
16262 (set_attr "prefix_rep" "0")
16263 (set_attr "prefix_extra" "2")
16264 (set_attr "length_immediate" "1")
16265 (set_attr "mode" "TI")])
16267 (define_insn "xop_maskcmp_uns<mode>3"
16268 [(set (match_operand:VI_128 0 "register_operand" "=x")
16269 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16270 [(match_operand:VI_128 2 "register_operand" "x")
16271 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16273 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16274 [(set_attr "type" "ssecmp")
16275 (set_attr "prefix_data16" "0")
16276 (set_attr "prefix_rep" "0")
16277 (set_attr "prefix_extra" "2")
16278 (set_attr "length_immediate" "1")
16279 (set_attr "mode" "TI")])
16281 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16282 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16283 ;; the exact instruction generated for the intrinsic.
16284 (define_insn "xop_maskcmp_uns2<mode>3"
16285 [(set (match_operand:VI_128 0 "register_operand" "=x")
16287 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16288 [(match_operand:VI_128 2 "register_operand" "x")
16289 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16290 UNSPEC_XOP_UNSIGNED_CMP))]
16292 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16293 [(set_attr "type" "ssecmp")
16294 (set_attr "prefix_data16" "0")
16295 (set_attr "prefix_extra" "2")
16296 (set_attr "length_immediate" "1")
16297 (set_attr "mode" "TI")])
16299 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16300 ;; being added here to be complete.
16301 (define_insn "xop_pcom_tf<mode>3"
16302 [(set (match_operand:VI_128 0 "register_operand" "=x")
16304 [(match_operand:VI_128 1 "register_operand" "x")
16305 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16306 (match_operand:SI 3 "const_int_operand" "n")]
16307 UNSPEC_XOP_TRUEFALSE))]
16310 return ((INTVAL (operands[3]) != 0)
16311 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16312 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16314 [(set_attr "type" "ssecmp")
16315 (set_attr "prefix_data16" "0")
16316 (set_attr "prefix_extra" "2")
16317 (set_attr "length_immediate" "1")
16318 (set_attr "mode" "TI")])
16320 (define_insn "xop_vpermil2<mode>3"
16321 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16323 [(match_operand:VF_128_256 1 "register_operand" "x")
16324 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16325 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16326 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16329 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16330 [(set_attr "type" "sse4arg")
16331 (set_attr "length_immediate" "1")
16332 (set_attr "mode" "<MODE>")])
16334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16336 (define_insn "aesenc"
16337 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16338 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16339 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16343 aesenc\t{%2, %0|%0, %2}
16344 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16345 [(set_attr "isa" "noavx,avx")
16346 (set_attr "type" "sselog1")
16347 (set_attr "prefix_extra" "1")
16348 (set_attr "prefix" "orig,vex")
16349 (set_attr "btver2_decode" "double,double")
16350 (set_attr "mode" "TI")])
16352 (define_insn "aesenclast"
16353 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16354 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16355 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16356 UNSPEC_AESENCLAST))]
16359 aesenclast\t{%2, %0|%0, %2}
16360 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16361 [(set_attr "isa" "noavx,avx")
16362 (set_attr "type" "sselog1")
16363 (set_attr "prefix_extra" "1")
16364 (set_attr "prefix" "orig,vex")
16365 (set_attr "btver2_decode" "double,double")
16366 (set_attr "mode" "TI")])
16368 (define_insn "aesdec"
16369 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16370 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16371 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16375 aesdec\t{%2, %0|%0, %2}
16376 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16377 [(set_attr "isa" "noavx,avx")
16378 (set_attr "type" "sselog1")
16379 (set_attr "prefix_extra" "1")
16380 (set_attr "prefix" "orig,vex")
16381 (set_attr "btver2_decode" "double,double")
16382 (set_attr "mode" "TI")])
16384 (define_insn "aesdeclast"
16385 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16386 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16387 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16388 UNSPEC_AESDECLAST))]
16391 aesdeclast\t{%2, %0|%0, %2}
16392 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16393 [(set_attr "isa" "noavx,avx")
16394 (set_attr "type" "sselog1")
16395 (set_attr "prefix_extra" "1")
16396 (set_attr "prefix" "orig,vex")
16397 (set_attr "btver2_decode" "double,double")
16398 (set_attr "mode" "TI")])
16400 (define_insn "aesimc"
16401 [(set (match_operand:V2DI 0 "register_operand" "=x")
16402 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16405 "%vaesimc\t{%1, %0|%0, %1}"
16406 [(set_attr "type" "sselog1")
16407 (set_attr "prefix_extra" "1")
16408 (set_attr "prefix" "maybe_vex")
16409 (set_attr "mode" "TI")])
16411 (define_insn "aeskeygenassist"
16412 [(set (match_operand:V2DI 0 "register_operand" "=x")
16413 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16414 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16415 UNSPEC_AESKEYGENASSIST))]
16417 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16418 [(set_attr "type" "sselog1")
16419 (set_attr "prefix_extra" "1")
16420 (set_attr "length_immediate" "1")
16421 (set_attr "prefix" "maybe_vex")
16422 (set_attr "mode" "TI")])
16424 (define_insn "pclmulqdq"
16425 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16426 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16427 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16428 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16432 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16433 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16434 [(set_attr "isa" "noavx,avx")
16435 (set_attr "type" "sselog1")
16436 (set_attr "prefix_extra" "1")
16437 (set_attr "length_immediate" "1")
16438 (set_attr "prefix" "orig,vex")
16439 (set_attr "mode" "TI")])
16441 (define_expand "avx_vzeroall"
16442 [(match_par_dup 0 [(const_int 0)])]
16445 int nregs = TARGET_64BIT ? 16 : 8;
16448 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16450 XVECEXP (operands[0], 0, 0)
16451 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16454 for (regno = 0; regno < nregs; regno++)
16455 XVECEXP (operands[0], 0, regno + 1)
16456 = gen_rtx_SET (VOIDmode,
16457 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16458 CONST0_RTX (V8SImode));
16461 (define_insn "*avx_vzeroall"
16462 [(match_parallel 0 "vzeroall_operation"
16463 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16466 [(set_attr "type" "sse")
16467 (set_attr "modrm" "0")
16468 (set_attr "memory" "none")
16469 (set_attr "prefix" "vex")
16470 (set_attr "btver2_decode" "vector")
16471 (set_attr "mode" "OI")])
16473 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16474 ;; if the upper 128bits are unused.
16475 (define_insn "avx_vzeroupper"
16476 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16479 [(set_attr "type" "sse")
16480 (set_attr "modrm" "0")
16481 (set_attr "memory" "none")
16482 (set_attr "prefix" "vex")
16483 (set_attr "btver2_decode" "vector")
16484 (set_attr "mode" "OI")])
16486 (define_insn "avx2_pbroadcast<mode>"
16487 [(set (match_operand:VI 0 "register_operand" "=x")
16489 (vec_select:<ssescalarmode>
16490 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16491 (parallel [(const_int 0)]))))]
16493 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16494 [(set_attr "type" "ssemov")
16495 (set_attr "prefix_extra" "1")
16496 (set_attr "prefix" "vex")
16497 (set_attr "mode" "<sseinsnmode>")])
16499 (define_insn "avx2_pbroadcast<mode>_1"
16500 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16501 (vec_duplicate:VI_256
16502 (vec_select:<ssescalarmode>
16503 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16504 (parallel [(const_int 0)]))))]
16507 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16508 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16509 [(set_attr "type" "ssemov")
16510 (set_attr "prefix_extra" "1")
16511 (set_attr "prefix" "vex")
16512 (set_attr "mode" "<sseinsnmode>")])
16514 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16515 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16516 (unspec:VI48F_256_512
16517 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16518 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16520 "TARGET_AVX2 && <mask_mode512bit_condition>"
16521 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16522 [(set_attr "type" "sselog")
16523 (set_attr "prefix" "<mask_prefix2>")
16524 (set_attr "mode" "<sseinsnmode>")])
16526 (define_insn "<avx512>_permvar<mode><mask_name>"
16527 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16528 (unspec:VI1_AVX512VL
16529 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16530 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16532 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16533 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16534 [(set_attr "type" "sselog")
16535 (set_attr "prefix" "<mask_prefix2>")
16536 (set_attr "mode" "<sseinsnmode>")])
16538 (define_insn "<avx512>_permvar<mode><mask_name>"
16539 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16540 (unspec:VI2_AVX512VL
16541 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16542 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16544 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16545 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16546 [(set_attr "type" "sselog")
16547 (set_attr "prefix" "<mask_prefix2>")
16548 (set_attr "mode" "<sseinsnmode>")])
16550 (define_expand "<avx2_avx512>_perm<mode>"
16551 [(match_operand:VI8F_256_512 0 "register_operand")
16552 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16553 (match_operand:SI 2 "const_0_to_255_operand")]
16556 int mask = INTVAL (operands[2]);
16557 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16558 GEN_INT ((mask >> 0) & 3),
16559 GEN_INT ((mask >> 2) & 3),
16560 GEN_INT ((mask >> 4) & 3),
16561 GEN_INT ((mask >> 6) & 3)));
16565 (define_expand "<avx512>_perm<mode>_mask"
16566 [(match_operand:VI8F_256_512 0 "register_operand")
16567 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16568 (match_operand:SI 2 "const_0_to_255_operand")
16569 (match_operand:VI8F_256_512 3 "vector_move_operand")
16570 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16573 int mask = INTVAL (operands[2]);
16574 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16575 GEN_INT ((mask >> 0) & 3),
16576 GEN_INT ((mask >> 2) & 3),
16577 GEN_INT ((mask >> 4) & 3),
16578 GEN_INT ((mask >> 6) & 3),
16579 operands[3], operands[4]));
16583 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16584 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16585 (vec_select:VI8F_256_512
16586 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16587 (parallel [(match_operand 2 "const_0_to_3_operand")
16588 (match_operand 3 "const_0_to_3_operand")
16589 (match_operand 4 "const_0_to_3_operand")
16590 (match_operand 5 "const_0_to_3_operand")])))]
16591 "TARGET_AVX2 && <mask_mode512bit_condition>"
16594 mask |= INTVAL (operands[2]) << 0;
16595 mask |= INTVAL (operands[3]) << 2;
16596 mask |= INTVAL (operands[4]) << 4;
16597 mask |= INTVAL (operands[5]) << 6;
16598 operands[2] = GEN_INT (mask);
16599 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16601 [(set_attr "type" "sselog")
16602 (set_attr "prefix" "<mask_prefix2>")
16603 (set_attr "mode" "<sseinsnmode>")])
16605 (define_insn "avx2_permv2ti"
16606 [(set (match_operand:V4DI 0 "register_operand" "=x")
16608 [(match_operand:V4DI 1 "register_operand" "x")
16609 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16610 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16613 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16614 [(set_attr "type" "sselog")
16615 (set_attr "prefix" "vex")
16616 (set_attr "mode" "OI")])
16618 (define_insn "avx2_vec_dupv4df"
16619 [(set (match_operand:V4DF 0 "register_operand" "=x")
16620 (vec_duplicate:V4DF
16622 (match_operand:V2DF 1 "register_operand" "x")
16623 (parallel [(const_int 0)]))))]
16625 "vbroadcastsd\t{%1, %0|%0, %1}"
16626 [(set_attr "type" "sselog1")
16627 (set_attr "prefix" "vex")
16628 (set_attr "mode" "V4DF")])
16630 (define_insn "<avx512>_vec_dup<mode>_1"
16631 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16632 (vec_duplicate:VI_AVX512BW
16633 (vec_select:VI_AVX512BW
16634 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16635 (parallel [(const_int 0)]))))]
16637 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16638 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16639 [(set_attr "type" "ssemov")
16640 (set_attr "prefix" "evex")
16641 (set_attr "mode" "<sseinsnmode>")])
16643 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16644 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16645 (vec_duplicate:V48_AVX512VL
16646 (vec_select:<ssescalarmode>
16647 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16648 (parallel [(const_int 0)]))))]
16650 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16651 [(set_attr "type" "ssemov")
16652 (set_attr "prefix" "evex")
16653 (set_attr "mode" "<sseinsnmode>")])
16655 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16656 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16657 (vec_duplicate:VI12_AVX512VL
16658 (vec_select:<ssescalarmode>
16659 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16660 (parallel [(const_int 0)]))))]
16662 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16663 [(set_attr "type" "ssemov")
16664 (set_attr "prefix" "evex")
16665 (set_attr "mode" "<sseinsnmode>")])
16667 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16668 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16669 (vec_duplicate:V16FI
16670 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16673 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16674 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16675 [(set_attr "type" "ssemov")
16676 (set_attr "prefix" "evex")
16677 (set_attr "mode" "<sseinsnmode>")])
16679 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16680 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16681 (vec_duplicate:V8FI
16682 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16685 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16686 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16687 [(set_attr "type" "ssemov")
16688 (set_attr "prefix" "evex")
16689 (set_attr "mode" "<sseinsnmode>")])
16691 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16692 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16693 (vec_duplicate:VI12_AVX512VL
16694 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16697 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16698 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16699 [(set_attr "type" "ssemov")
16700 (set_attr "prefix" "evex")
16701 (set_attr "mode" "<sseinsnmode>")])
16703 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16704 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16705 (vec_duplicate:V48_AVX512VL
16706 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16708 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16709 [(set_attr "type" "ssemov")
16710 (set_attr "prefix" "evex")
16711 (set_attr "mode" "<sseinsnmode>")
16712 (set (attr "enabled")
16713 (if_then_else (eq_attr "alternative" "1")
16714 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16715 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16718 (define_insn "vec_dupv4sf"
16719 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16720 (vec_duplicate:V4SF
16721 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16724 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16725 vbroadcastss\t{%1, %0|%0, %1}
16726 shufps\t{$0, %0, %0|%0, %0, 0}"
16727 [(set_attr "isa" "avx,avx,noavx")
16728 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16729 (set_attr "length_immediate" "1,0,1")
16730 (set_attr "prefix_extra" "0,1,*")
16731 (set_attr "prefix" "vex,vex,orig")
16732 (set_attr "mode" "V4SF")])
16734 (define_insn "*vec_dupv4si"
16735 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16736 (vec_duplicate:V4SI
16737 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16740 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16741 vbroadcastss\t{%1, %0|%0, %1}
16742 shufps\t{$0, %0, %0|%0, %0, 0}"
16743 [(set_attr "isa" "sse2,avx,noavx")
16744 (set_attr "type" "sselog1,ssemov,sselog1")
16745 (set_attr "length_immediate" "1,0,1")
16746 (set_attr "prefix_extra" "0,1,*")
16747 (set_attr "prefix" "maybe_vex,vex,orig")
16748 (set_attr "mode" "TI,V4SF,V4SF")])
16750 (define_insn "*vec_dupv2di"
16751 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16752 (vec_duplicate:V2DI
16753 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16757 vpunpcklqdq\t{%d1, %0|%0, %d1}
16758 %vmovddup\t{%1, %0|%0, %1}
16760 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16761 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16762 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16763 (set_attr "mode" "TI,TI,DF,V4SF")])
16765 (define_insn "avx2_vbroadcasti128_<mode>"
16766 [(set (match_operand:VI_256 0 "register_operand" "=x")
16768 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16771 "vbroadcasti128\t{%1, %0|%0, %1}"
16772 [(set_attr "type" "ssemov")
16773 (set_attr "prefix_extra" "1")
16774 (set_attr "prefix" "vex")
16775 (set_attr "mode" "OI")])
16777 ;; Modes handled by AVX vec_dup patterns.
16778 (define_mode_iterator AVX_VEC_DUP_MODE
16779 [V8SI V8SF V4DI V4DF])
16780 ;; Modes handled by AVX2 vec_dup patterns.
16781 (define_mode_iterator AVX2_VEC_DUP_MODE
16782 [V32QI V16QI V16HI V8HI V8SI V4SI])
16784 (define_insn "*vec_dup<mode>"
16785 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16786 (vec_duplicate:AVX2_VEC_DUP_MODE
16787 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16790 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16791 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16793 [(set_attr "type" "ssemov")
16794 (set_attr "prefix_extra" "1")
16795 (set_attr "prefix" "maybe_evex")
16796 (set_attr "mode" "<sseinsnmode>")])
16798 (define_insn "vec_dup<mode>"
16799 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x")
16800 (vec_duplicate:AVX_VEC_DUP_MODE
16801 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))]
16804 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16805 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16806 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16808 [(set_attr "type" "ssemov")
16809 (set_attr "prefix_extra" "1")
16810 (set_attr "prefix" "maybe_evex")
16811 (set_attr "isa" "avx2,noavx2,avx2,noavx2")
16812 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")])
16815 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16816 (vec_duplicate:AVX2_VEC_DUP_MODE
16817 (match_operand:<ssescalarmode> 1 "register_operand")))]
16819 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16820 available, because then we can broadcast from GPRs directly.
16821 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16822 for V*SI mode it requires just -mavx512vl. */
16823 && !(TARGET_AVX512VL
16824 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16825 && reload_completed && GENERAL_REG_P (operands[1])"
16828 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16829 CONST0_RTX (V4SImode),
16830 gen_lowpart (SImode, operands[1])));
16831 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16832 gen_lowpart (<ssexmmmode>mode,
16838 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16839 (vec_duplicate:AVX_VEC_DUP_MODE
16840 (match_operand:<ssescalarmode> 1 "register_operand")))]
16841 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16842 [(set (match_dup 2)
16843 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16845 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16846 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16848 (define_insn "avx_vbroadcastf128_<mode>"
16849 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16851 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16855 vbroadcast<i128>\t{%1, %0|%0, %1}
16856 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16857 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16858 [(set_attr "type" "ssemov,sselog1,sselog1")
16859 (set_attr "prefix_extra" "1")
16860 (set_attr "length_immediate" "0,1,1")
16861 (set_attr "prefix" "vex")
16862 (set_attr "mode" "<sseinsnmode>")])
16864 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16865 (define_mode_iterator VI4F_BRCST32x2
16866 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16867 V16SF (V8SF "TARGET_AVX512VL")])
16869 (define_mode_attr 64x2mode
16870 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16872 (define_mode_attr 32x2mode
16873 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16874 (V8SF "V2SF") (V4SI "V2SI")])
16876 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16877 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16878 (vec_duplicate:VI4F_BRCST32x2
16879 (vec_select:<32x2mode>
16880 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16881 (parallel [(const_int 0) (const_int 1)]))))]
16883 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16884 [(set_attr "type" "ssemov")
16885 (set_attr "prefix_extra" "1")
16886 (set_attr "prefix" "evex")
16887 (set_attr "mode" "<sseinsnmode>")])
16889 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16890 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16891 (vec_duplicate:VI4F_256
16892 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16895 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16896 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16897 [(set_attr "type" "ssemov")
16898 (set_attr "prefix_extra" "1")
16899 (set_attr "prefix" "evex")
16900 (set_attr "mode" "<sseinsnmode>")])
16902 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16903 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16904 (vec_duplicate:V16FI
16905 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16908 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16909 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16910 [(set_attr "type" "ssemov")
16911 (set_attr "prefix_extra" "1")
16912 (set_attr "prefix" "evex")
16913 (set_attr "mode" "<sseinsnmode>")])
16915 ;; For broadcast[i|f]64x2
16916 (define_mode_iterator VI8F_BRCST64x2
16917 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16919 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16920 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16921 (vec_duplicate:VI8F_BRCST64x2
16922 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16925 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16926 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16927 [(set_attr "type" "ssemov")
16928 (set_attr "prefix_extra" "1")
16929 (set_attr "prefix" "evex")
16930 (set_attr "mode" "<sseinsnmode>")])
16932 (define_insn "avx512cd_maskb_vec_dup<mode>"
16933 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16934 (vec_duplicate:VI8_AVX512VL
16936 (match_operand:QI 1 "register_operand" "Yk"))))]
16938 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16939 [(set_attr "type" "mskmov")
16940 (set_attr "prefix" "evex")
16941 (set_attr "mode" "XI")])
16943 (define_insn "avx512cd_maskw_vec_dup<mode>"
16944 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16945 (vec_duplicate:VI4_AVX512VL
16947 (match_operand:HI 1 "register_operand" "Yk"))))]
16949 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16950 [(set_attr "type" "mskmov")
16951 (set_attr "prefix" "evex")
16952 (set_attr "mode" "XI")])
16954 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16955 ;; If it so happens that the input is in memory, use vbroadcast.
16956 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16957 (define_insn "*avx_vperm_broadcast_v4sf"
16958 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16960 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16961 (match_parallel 2 "avx_vbroadcast_operand"
16962 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16965 int elt = INTVAL (operands[3]);
16966 switch (which_alternative)
16970 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16971 return "vbroadcastss\t{%1, %0|%0, %k1}";
16973 operands[2] = GEN_INT (elt * 0x55);
16974 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16976 gcc_unreachable ();
16979 [(set_attr "type" "ssemov,ssemov,sselog1")
16980 (set_attr "prefix_extra" "1")
16981 (set_attr "length_immediate" "0,0,1")
16982 (set_attr "prefix" "vex")
16983 (set_attr "mode" "SF,SF,V4SF")])
16985 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
16986 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
16988 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
16989 (match_parallel 2 "avx_vbroadcast_operand"
16990 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16993 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
16994 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
16996 rtx op0 = operands[0], op1 = operands[1];
16997 int elt = INTVAL (operands[3]);
17003 if (TARGET_AVX2 && elt == 0)
17005 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17010 /* Shuffle element we care about into all elements of the 128-bit lane.
17011 The other lane gets shuffled too, but we don't care. */
17012 if (<MODE>mode == V4DFmode)
17013 mask = (elt & 1 ? 15 : 0);
17015 mask = (elt & 3) * 0x55;
17016 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17018 /* Shuffle the lane we care about into both lanes of the dest. */
17019 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17020 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17024 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17025 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17028 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17029 [(set (match_operand:VF2 0 "register_operand")
17031 (match_operand:VF2 1 "nonimmediate_operand")
17032 (match_operand:SI 2 "const_0_to_255_operand")))]
17033 "TARGET_AVX && <mask_mode512bit_condition>"
17035 int mask = INTVAL (operands[2]);
17036 rtx perm[<ssescalarnum>];
17039 for (i = 0; i < <ssescalarnum>; i = i + 2)
17041 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17042 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17046 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17049 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17050 [(set (match_operand:VF1 0 "register_operand")
17052 (match_operand:VF1 1 "nonimmediate_operand")
17053 (match_operand:SI 2 "const_0_to_255_operand")))]
17054 "TARGET_AVX && <mask_mode512bit_condition>"
17056 int mask = INTVAL (operands[2]);
17057 rtx perm[<ssescalarnum>];
17060 for (i = 0; i < <ssescalarnum>; i = i + 4)
17062 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17063 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17064 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17065 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17069 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17072 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17073 [(set (match_operand:VF 0 "register_operand" "=v")
17075 (match_operand:VF 1 "nonimmediate_operand" "vm")
17076 (match_parallel 2 ""
17077 [(match_operand 3 "const_int_operand")])))]
17078 "TARGET_AVX && <mask_mode512bit_condition>
17079 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17081 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17082 operands[2] = GEN_INT (mask);
17083 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17085 [(set_attr "type" "sselog")
17086 (set_attr "prefix_extra" "1")
17087 (set_attr "length_immediate" "1")
17088 (set_attr "prefix" "<mask_prefix>")
17089 (set_attr "mode" "<sseinsnmode>")])
17091 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17092 [(set (match_operand:VF 0 "register_operand" "=v")
17094 [(match_operand:VF 1 "register_operand" "v")
17095 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17097 "TARGET_AVX && <mask_mode512bit_condition>"
17098 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17099 [(set_attr "type" "sselog")
17100 (set_attr "prefix_extra" "1")
17101 (set_attr "btver2_decode" "vector")
17102 (set_attr "prefix" "<mask_prefix>")
17103 (set_attr "mode" "<sseinsnmode>")])
17105 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17106 [(match_operand:VI48F 0 "register_operand" "=v")
17107 (match_operand:VI48F 1 "register_operand" "v")
17108 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17109 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17110 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17113 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17114 operands[0], operands[1], operands[2], operands[3],
17115 CONST0_RTX (<MODE>mode), operands[4]));
17119 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17120 [(match_operand:VI1_AVX512VL 0 "register_operand")
17121 (match_operand:VI1_AVX512VL 1 "register_operand")
17122 (match_operand:<sseintvecmode> 2 "register_operand")
17123 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17124 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17125 "TARGET_AVX512VBMI"
17127 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17128 operands[0], operands[1], operands[2], operands[3],
17129 CONST0_RTX (<MODE>mode), operands[4]));
17133 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17134 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17135 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17136 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17137 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17138 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17141 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17142 operands[0], operands[1], operands[2], operands[3],
17143 CONST0_RTX (<MODE>mode), operands[4]));
17147 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17148 [(set (match_operand:VI48F 0 "register_operand" "=v")
17150 [(match_operand:VI48F 1 "register_operand" "v")
17151 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17152 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17155 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17156 [(set_attr "type" "sselog")
17157 (set_attr "prefix" "evex")
17158 (set_attr "mode" "<sseinsnmode>")])
17160 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17161 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17162 (unspec:VI1_AVX512VL
17163 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17164 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17165 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17167 "TARGET_AVX512VBMI"
17168 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17169 [(set_attr "type" "sselog")
17170 (set_attr "prefix" "evex")
17171 (set_attr "mode" "<sseinsnmode>")])
17173 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17174 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17175 (unspec:VI2_AVX512VL
17176 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17177 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17178 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17181 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17182 [(set_attr "type" "sselog")
17183 (set_attr "prefix" "evex")
17184 (set_attr "mode" "<sseinsnmode>")])
17186 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17187 [(set (match_operand:VI48F 0 "register_operand" "=v")
17190 [(match_operand:VI48F 1 "register_operand" "v")
17191 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17192 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17193 UNSPEC_VPERMI2_MASK)
17195 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17197 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17198 [(set_attr "type" "sselog")
17199 (set_attr "prefix" "evex")
17200 (set_attr "mode" "<sseinsnmode>")])
17202 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17203 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17204 (vec_merge:VI1_AVX512VL
17205 (unspec:VI1_AVX512VL
17206 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17207 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17208 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17209 UNSPEC_VPERMI2_MASK)
17211 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17212 "TARGET_AVX512VBMI"
17213 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17214 [(set_attr "type" "sselog")
17215 (set_attr "prefix" "evex")
17216 (set_attr "mode" "<sseinsnmode>")])
17218 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17219 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17220 (vec_merge:VI2_AVX512VL
17221 (unspec:VI2_AVX512VL
17222 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17223 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17224 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17225 UNSPEC_VPERMI2_MASK)
17227 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17229 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17230 [(set_attr "type" "sselog")
17231 (set_attr "prefix" "evex")
17232 (set_attr "mode" "<sseinsnmode>")])
17234 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17235 [(match_operand:VI48F 0 "register_operand" "=v")
17236 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17237 (match_operand:VI48F 2 "register_operand" "0")
17238 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17239 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17242 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17243 operands[0], operands[1], operands[2], operands[3],
17244 CONST0_RTX (<MODE>mode), operands[4]));
17248 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17249 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17250 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17251 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17252 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17253 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17254 "TARGET_AVX512VBMI"
17256 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17257 operands[0], operands[1], operands[2], operands[3],
17258 CONST0_RTX (<MODE>mode), operands[4]));
17262 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17263 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17264 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17265 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17266 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17267 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17270 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17271 operands[0], operands[1], operands[2], operands[3],
17272 CONST0_RTX (<MODE>mode), operands[4]));
17276 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17277 [(set (match_operand:VI48F 0 "register_operand" "=v")
17279 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17280 (match_operand:VI48F 2 "register_operand" "0")
17281 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17284 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17285 [(set_attr "type" "sselog")
17286 (set_attr "prefix" "evex")
17287 (set_attr "mode" "<sseinsnmode>")])
17289 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17290 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17291 (unspec:VI1_AVX512VL
17292 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17293 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17294 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17296 "TARGET_AVX512VBMI"
17297 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17298 [(set_attr "type" "sselog")
17299 (set_attr "prefix" "evex")
17300 (set_attr "mode" "<sseinsnmode>")])
17302 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17303 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17304 (unspec:VI2_AVX512VL
17305 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17306 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17307 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17310 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17311 [(set_attr "type" "sselog")
17312 (set_attr "prefix" "evex")
17313 (set_attr "mode" "<sseinsnmode>")])
17315 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17316 [(set (match_operand:VI48F 0 "register_operand" "=v")
17319 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17320 (match_operand:VI48F 2 "register_operand" "0")
17321 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17324 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17326 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17327 [(set_attr "type" "sselog")
17328 (set_attr "prefix" "evex")
17329 (set_attr "mode" "<sseinsnmode>")])
17331 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17332 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17333 (vec_merge:VI1_AVX512VL
17334 (unspec:VI1_AVX512VL
17335 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17336 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17337 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17340 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17341 "TARGET_AVX512VBMI"
17342 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17343 [(set_attr "type" "sselog")
17344 (set_attr "prefix" "evex")
17345 (set_attr "mode" "<sseinsnmode>")])
17347 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17348 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17349 (vec_merge:VI2_AVX512VL
17350 (unspec:VI2_AVX512VL
17351 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17352 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17353 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17356 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17358 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17359 [(set_attr "type" "sselog")
17360 (set_attr "prefix" "evex")
17361 (set_attr "mode" "<sseinsnmode>")])
17363 (define_expand "avx_vperm2f128<mode>3"
17364 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17365 (unspec:AVX256MODE2P
17366 [(match_operand:AVX256MODE2P 1 "register_operand")
17367 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17368 (match_operand:SI 3 "const_0_to_255_operand")]
17369 UNSPEC_VPERMIL2F128))]
17372 int mask = INTVAL (operands[3]);
17373 if ((mask & 0x88) == 0)
17375 rtx perm[<ssescalarnum>], t1, t2;
17376 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17378 base = (mask & 3) * nelt2;
17379 for (i = 0; i < nelt2; ++i)
17380 perm[i] = GEN_INT (base + i);
17382 base = ((mask >> 4) & 3) * nelt2;
17383 for (i = 0; i < nelt2; ++i)
17384 perm[i + nelt2] = GEN_INT (base + i);
17386 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17387 operands[1], operands[2]);
17388 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17389 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17390 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
17396 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17397 ;; means that in order to represent this properly in rtl we'd have to
17398 ;; nest *another* vec_concat with a zero operand and do the select from
17399 ;; a 4x wide vector. That doesn't seem very nice.
17400 (define_insn "*avx_vperm2f128<mode>_full"
17401 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17402 (unspec:AVX256MODE2P
17403 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17404 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17405 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17406 UNSPEC_VPERMIL2F128))]
17408 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17409 [(set_attr "type" "sselog")
17410 (set_attr "prefix_extra" "1")
17411 (set_attr "length_immediate" "1")
17412 (set_attr "prefix" "vex")
17413 (set_attr "mode" "<sseinsnmode>")])
17415 (define_insn "*avx_vperm2f128<mode>_nozero"
17416 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17417 (vec_select:AVX256MODE2P
17418 (vec_concat:<ssedoublevecmode>
17419 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17420 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17421 (match_parallel 3 ""
17422 [(match_operand 4 "const_int_operand")])))]
17424 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17426 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17428 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17430 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17431 operands[3] = GEN_INT (mask);
17432 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17434 [(set_attr "type" "sselog")
17435 (set_attr "prefix_extra" "1")
17436 (set_attr "length_immediate" "1")
17437 (set_attr "prefix" "vex")
17438 (set_attr "mode" "<sseinsnmode>")])
17440 (define_insn "*ssse3_palignr<mode>_perm"
17441 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17443 (match_operand:V_128 1 "register_operand" "0,x")
17444 (match_parallel 2 "palignr_operand"
17445 [(match_operand 3 "const_int_operand" "n, n")])))]
17448 machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
17449 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17451 switch (which_alternative)
17454 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17456 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17458 gcc_unreachable ();
17461 [(set_attr "isa" "noavx,avx")
17462 (set_attr "type" "sseishft")
17463 (set_attr "atom_unit" "sishuf")
17464 (set_attr "prefix_data16" "1,*")
17465 (set_attr "prefix_extra" "1")
17466 (set_attr "length_immediate" "1")
17467 (set_attr "prefix" "orig,vex")])
17469 (define_expand "avx512vl_vinsert<mode>"
17470 [(match_operand:VI48F_256 0 "register_operand")
17471 (match_operand:VI48F_256 1 "register_operand")
17472 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17473 (match_operand:SI 3 "const_0_to_1_operand")
17474 (match_operand:VI48F_256 4 "register_operand")
17475 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17478 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17480 switch (INTVAL (operands[3]))
17483 insn = gen_vec_set_lo_<mode>_mask;
17486 insn = gen_vec_set_hi_<mode>_mask;
17489 gcc_unreachable ();
17492 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17497 (define_expand "avx_vinsertf128<mode>"
17498 [(match_operand:V_256 0 "register_operand")
17499 (match_operand:V_256 1 "register_operand")
17500 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17501 (match_operand:SI 3 "const_0_to_1_operand")]
17504 rtx (*insn)(rtx, rtx, rtx);
17506 switch (INTVAL (operands[3]))
17509 insn = gen_vec_set_lo_<mode>;
17512 insn = gen_vec_set_hi_<mode>;
17515 gcc_unreachable ();
17518 emit_insn (insn (operands[0], operands[1], operands[2]));
17522 (define_insn "vec_set_lo_<mode><mask_name>"
17523 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17524 (vec_concat:VI8F_256
17525 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17526 (vec_select:<ssehalfvecmode>
17527 (match_operand:VI8F_256 1 "register_operand" "v")
17528 (parallel [(const_int 2) (const_int 3)]))))]
17531 if (TARGET_AVX512VL)
17532 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17534 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17536 [(set_attr "type" "sselog")
17537 (set_attr "prefix_extra" "1")
17538 (set_attr "length_immediate" "1")
17539 (set_attr "prefix" "vex")
17540 (set_attr "mode" "<sseinsnmode>")])
17542 (define_insn "vec_set_hi_<mode><mask_name>"
17543 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17544 (vec_concat:VI8F_256
17545 (vec_select:<ssehalfvecmode>
17546 (match_operand:VI8F_256 1 "register_operand" "v")
17547 (parallel [(const_int 0) (const_int 1)]))
17548 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17551 if (TARGET_AVX512VL)
17552 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17554 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17556 [(set_attr "type" "sselog")
17557 (set_attr "prefix_extra" "1")
17558 (set_attr "length_immediate" "1")
17559 (set_attr "prefix" "vex")
17560 (set_attr "mode" "<sseinsnmode>")])
17562 (define_insn "vec_set_lo_<mode><mask_name>"
17563 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17564 (vec_concat:VI4F_256
17565 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17566 (vec_select:<ssehalfvecmode>
17567 (match_operand:VI4F_256 1 "register_operand" "v")
17568 (parallel [(const_int 4) (const_int 5)
17569 (const_int 6) (const_int 7)]))))]
17572 if (TARGET_AVX512VL)
17573 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17575 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17577 [(set_attr "type" "sselog")
17578 (set_attr "prefix_extra" "1")
17579 (set_attr "length_immediate" "1")
17580 (set_attr "prefix" "vex")
17581 (set_attr "mode" "<sseinsnmode>")])
17583 (define_insn "vec_set_hi_<mode><mask_name>"
17584 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17585 (vec_concat:VI4F_256
17586 (vec_select:<ssehalfvecmode>
17587 (match_operand:VI4F_256 1 "register_operand" "v")
17588 (parallel [(const_int 0) (const_int 1)
17589 (const_int 2) (const_int 3)]))
17590 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17593 if (TARGET_AVX512VL)
17594 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17596 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17598 [(set_attr "type" "sselog")
17599 (set_attr "prefix_extra" "1")
17600 (set_attr "length_immediate" "1")
17601 (set_attr "prefix" "vex")
17602 (set_attr "mode" "<sseinsnmode>")])
17604 (define_insn "vec_set_lo_v16hi"
17605 [(set (match_operand:V16HI 0 "register_operand" "=x")
17607 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17609 (match_operand:V16HI 1 "register_operand" "x")
17610 (parallel [(const_int 8) (const_int 9)
17611 (const_int 10) (const_int 11)
17612 (const_int 12) (const_int 13)
17613 (const_int 14) (const_int 15)]))))]
17615 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17616 [(set_attr "type" "sselog")
17617 (set_attr "prefix_extra" "1")
17618 (set_attr "length_immediate" "1")
17619 (set_attr "prefix" "vex")
17620 (set_attr "mode" "OI")])
17622 (define_insn "vec_set_hi_v16hi"
17623 [(set (match_operand:V16HI 0 "register_operand" "=x")
17626 (match_operand:V16HI 1 "register_operand" "x")
17627 (parallel [(const_int 0) (const_int 1)
17628 (const_int 2) (const_int 3)
17629 (const_int 4) (const_int 5)
17630 (const_int 6) (const_int 7)]))
17631 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17633 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17634 [(set_attr "type" "sselog")
17635 (set_attr "prefix_extra" "1")
17636 (set_attr "length_immediate" "1")
17637 (set_attr "prefix" "vex")
17638 (set_attr "mode" "OI")])
17640 (define_insn "vec_set_lo_v32qi"
17641 [(set (match_operand:V32QI 0 "register_operand" "=x")
17643 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17645 (match_operand:V32QI 1 "register_operand" "x")
17646 (parallel [(const_int 16) (const_int 17)
17647 (const_int 18) (const_int 19)
17648 (const_int 20) (const_int 21)
17649 (const_int 22) (const_int 23)
17650 (const_int 24) (const_int 25)
17651 (const_int 26) (const_int 27)
17652 (const_int 28) (const_int 29)
17653 (const_int 30) (const_int 31)]))))]
17655 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17656 [(set_attr "type" "sselog")
17657 (set_attr "prefix_extra" "1")
17658 (set_attr "length_immediate" "1")
17659 (set_attr "prefix" "vex")
17660 (set_attr "mode" "OI")])
17662 (define_insn "vec_set_hi_v32qi"
17663 [(set (match_operand:V32QI 0 "register_operand" "=x")
17666 (match_operand:V32QI 1 "register_operand" "x")
17667 (parallel [(const_int 0) (const_int 1)
17668 (const_int 2) (const_int 3)
17669 (const_int 4) (const_int 5)
17670 (const_int 6) (const_int 7)
17671 (const_int 8) (const_int 9)
17672 (const_int 10) (const_int 11)
17673 (const_int 12) (const_int 13)
17674 (const_int 14) (const_int 15)]))
17675 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17677 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17678 [(set_attr "type" "sselog")
17679 (set_attr "prefix_extra" "1")
17680 (set_attr "length_immediate" "1")
17681 (set_attr "prefix" "vex")
17682 (set_attr "mode" "OI")])
17684 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17685 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17687 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17688 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17691 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17692 [(set_attr "type" "sselog1")
17693 (set_attr "prefix_extra" "1")
17694 (set_attr "prefix" "vex")
17695 (set_attr "btver2_decode" "vector")
17696 (set_attr "mode" "<sseinsnmode>")])
17698 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17699 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17701 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17702 (match_operand:V48_AVX2 2 "register_operand" "x")
17706 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17707 [(set_attr "type" "sselog1")
17708 (set_attr "prefix_extra" "1")
17709 (set_attr "prefix" "vex")
17710 (set_attr "btver2_decode" "vector")
17711 (set_attr "mode" "<sseinsnmode>")])
17713 (define_expand "maskload<mode>"
17714 [(set (match_operand:V48_AVX2 0 "register_operand")
17716 [(match_operand:<sseintvecmode> 2 "register_operand")
17717 (match_operand:V48_AVX2 1 "memory_operand")]
17721 (define_expand "maskstore<mode>"
17722 [(set (match_operand:V48_AVX2 0 "memory_operand")
17724 [(match_operand:<sseintvecmode> 2 "register_operand")
17725 (match_operand:V48_AVX2 1 "register_operand")
17730 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17731 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17732 (unspec:AVX256MODE2P
17733 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17737 "&& reload_completed"
17740 rtx op0 = operands[0];
17741 rtx op1 = operands[1];
17743 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17745 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17746 emit_move_insn (op0, op1);
17750 (define_expand "vec_init<mode>"
17751 [(match_operand:V_256 0 "register_operand")
17755 ix86_expand_vector_init (false, operands[0], operands[1]);
17759 (define_expand "vec_init<mode>"
17760 [(match_operand:VF48_I1248 0 "register_operand")
17764 ix86_expand_vector_init (false, operands[0], operands[1]);
17768 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17769 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17770 (ashiftrt:VI48_AVX512F_AVX512VL
17771 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17772 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17773 "TARGET_AVX2 && <mask_mode512bit_condition>"
17774 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17775 [(set_attr "type" "sseishft")
17776 (set_attr "prefix" "maybe_evex")
17777 (set_attr "mode" "<sseinsnmode>")])
17779 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17780 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17781 (ashiftrt:VI2_AVX512VL
17782 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17783 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17785 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17786 [(set_attr "type" "sseishft")
17787 (set_attr "prefix" "maybe_evex")
17788 (set_attr "mode" "<sseinsnmode>")])
17790 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17791 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17792 (any_lshift:VI48_AVX512F
17793 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17794 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17795 "TARGET_AVX2 && <mask_mode512bit_condition>"
17796 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17797 [(set_attr "type" "sseishft")
17798 (set_attr "prefix" "maybe_evex")
17799 (set_attr "mode" "<sseinsnmode>")])
17801 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17802 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17803 (any_lshift:VI2_AVX512VL
17804 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17805 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17807 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17808 [(set_attr "type" "sseishft")
17809 (set_attr "prefix" "maybe_evex")
17810 (set_attr "mode" "<sseinsnmode>")])
17812 (define_insn "avx_vec_concat<mode>"
17813 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17814 (vec_concat:V_256_512
17815 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17816 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17819 switch (which_alternative)
17822 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17824 switch (get_attr_mode (insn))
17827 return "vmovaps\t{%1, %t0|%t0, %1}";
17829 return "vmovapd\t{%1, %t0|%t0, %1}";
17831 return "vmovaps\t{%1, %x0|%x0, %1}";
17833 return "vmovapd\t{%1, %x0|%x0, %1}";
17835 return "vmovdqa\t{%1, %t0|%t0, %1}";
17837 return "vmovdqa\t{%1, %x0|%x0, %1}";
17839 gcc_unreachable ();
17842 gcc_unreachable ();
17845 [(set_attr "type" "sselog,ssemov")
17846 (set_attr "prefix_extra" "1,*")
17847 (set_attr "length_immediate" "1,*")
17848 (set_attr "prefix" "maybe_evex")
17849 (set_attr "mode" "<sseinsnmode>")])
17851 (define_insn "vcvtph2ps<mask_name>"
17852 [(set (match_operand:V4SF 0 "register_operand" "=v")
17854 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17856 (parallel [(const_int 0) (const_int 1)
17857 (const_int 2) (const_int 3)])))]
17858 "TARGET_F16C || TARGET_AVX512VL"
17859 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17860 [(set_attr "type" "ssecvt")
17861 (set_attr "prefix" "maybe_evex")
17862 (set_attr "mode" "V4SF")])
17864 (define_insn "*vcvtph2ps_load<mask_name>"
17865 [(set (match_operand:V4SF 0 "register_operand" "=v")
17866 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17867 UNSPEC_VCVTPH2PS))]
17868 "TARGET_F16C || TARGET_AVX512VL"
17869 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17870 [(set_attr "type" "ssecvt")
17871 (set_attr "prefix" "vex")
17872 (set_attr "mode" "V8SF")])
17874 (define_insn "vcvtph2ps256<mask_name>"
17875 [(set (match_operand:V8SF 0 "register_operand" "=v")
17876 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17877 UNSPEC_VCVTPH2PS))]
17878 "TARGET_F16C || TARGET_AVX512VL"
17879 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17880 [(set_attr "type" "ssecvt")
17881 (set_attr "prefix" "vex")
17882 (set_attr "btver2_decode" "double")
17883 (set_attr "mode" "V8SF")])
17885 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17886 [(set (match_operand:V16SF 0 "register_operand" "=v")
17888 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17889 UNSPEC_VCVTPH2PS))]
17891 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17892 [(set_attr "type" "ssecvt")
17893 (set_attr "prefix" "evex")
17894 (set_attr "mode" "V16SF")])
17896 (define_expand "vcvtps2ph_mask"
17897 [(set (match_operand:V8HI 0 "register_operand")
17900 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17901 (match_operand:SI 2 "const_0_to_255_operand")]
17904 (match_operand:V8HI 3 "vector_move_operand")
17905 (match_operand:QI 4 "register_operand")))]
17907 "operands[5] = CONST0_RTX (V4HImode);")
17909 (define_expand "vcvtps2ph"
17910 [(set (match_operand:V8HI 0 "register_operand")
17912 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17913 (match_operand:SI 2 "const_0_to_255_operand")]
17917 "operands[3] = CONST0_RTX (V4HImode);")
17919 (define_insn "*vcvtps2ph<mask_name>"
17920 [(set (match_operand:V8HI 0 "register_operand" "=v")
17922 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17923 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17925 (match_operand:V4HI 3 "const0_operand")))]
17926 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17927 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17928 [(set_attr "type" "ssecvt")
17929 (set_attr "prefix" "maybe_evex")
17930 (set_attr "mode" "V4SF")])
17932 (define_insn "*vcvtps2ph_store<mask_name>"
17933 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17934 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17935 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17936 UNSPEC_VCVTPS2PH))]
17937 "TARGET_F16C || TARGET_AVX512VL"
17938 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17939 [(set_attr "type" "ssecvt")
17940 (set_attr "prefix" "maybe_evex")
17941 (set_attr "mode" "V4SF")])
17943 (define_insn "vcvtps2ph256<mask_name>"
17944 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17945 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17946 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17947 UNSPEC_VCVTPS2PH))]
17948 "TARGET_F16C || TARGET_AVX512VL"
17949 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17950 [(set_attr "type" "ssecvt")
17951 (set_attr "prefix" "maybe_evex")
17952 (set_attr "btver2_decode" "vector")
17953 (set_attr "mode" "V8SF")])
17955 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17956 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17958 [(match_operand:V16SF 1 "register_operand" "v")
17959 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17960 UNSPEC_VCVTPS2PH))]
17962 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17963 [(set_attr "type" "ssecvt")
17964 (set_attr "prefix" "evex")
17965 (set_attr "mode" "V16SF")])
17967 ;; For gather* insn patterns
17968 (define_mode_iterator VEC_GATHER_MODE
17969 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17970 (define_mode_attr VEC_GATHER_IDXSI
17971 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17972 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17973 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17974 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17976 (define_mode_attr VEC_GATHER_IDXDI
17977 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17978 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
17979 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
17980 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
17982 (define_mode_attr VEC_GATHER_SRCDI
17983 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17984 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
17985 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
17986 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
17988 (define_expand "avx2_gathersi<mode>"
17989 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17990 (unspec:VEC_GATHER_MODE
17991 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
17992 (mem:<ssescalarmode>
17994 [(match_operand 2 "vsib_address_operand")
17995 (match_operand:<VEC_GATHER_IDXSI>
17996 3 "register_operand")
17997 (match_operand:SI 5 "const1248_operand ")]))
17998 (mem:BLK (scratch))
17999 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18001 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18005 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18006 operands[5]), UNSPEC_VSIBADDR);
18009 (define_insn "*avx2_gathersi<mode>"
18010 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18011 (unspec:VEC_GATHER_MODE
18012 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18013 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18015 [(match_operand:P 3 "vsib_address_operand" "Tv")
18016 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18017 (match_operand:SI 6 "const1248_operand" "n")]
18019 (mem:BLK (scratch))
18020 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18022 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18024 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18025 [(set_attr "type" "ssemov")
18026 (set_attr "prefix" "vex")
18027 (set_attr "mode" "<sseinsnmode>")])
18029 (define_insn "*avx2_gathersi<mode>_2"
18030 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18031 (unspec:VEC_GATHER_MODE
18033 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18035 [(match_operand:P 2 "vsib_address_operand" "Tv")
18036 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18037 (match_operand:SI 5 "const1248_operand" "n")]
18039 (mem:BLK (scratch))
18040 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18042 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18044 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18045 [(set_attr "type" "ssemov")
18046 (set_attr "prefix" "vex")
18047 (set_attr "mode" "<sseinsnmode>")])
18049 (define_expand "avx2_gatherdi<mode>"
18050 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18051 (unspec:VEC_GATHER_MODE
18052 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18053 (mem:<ssescalarmode>
18055 [(match_operand 2 "vsib_address_operand")
18056 (match_operand:<VEC_GATHER_IDXDI>
18057 3 "register_operand")
18058 (match_operand:SI 5 "const1248_operand ")]))
18059 (mem:BLK (scratch))
18060 (match_operand:<VEC_GATHER_SRCDI>
18061 4 "register_operand")]
18063 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18067 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18068 operands[5]), UNSPEC_VSIBADDR);
18071 (define_insn "*avx2_gatherdi<mode>"
18072 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18073 (unspec:VEC_GATHER_MODE
18074 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18075 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18077 [(match_operand:P 3 "vsib_address_operand" "Tv")
18078 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18079 (match_operand:SI 6 "const1248_operand" "n")]
18081 (mem:BLK (scratch))
18082 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18084 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18086 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18087 [(set_attr "type" "ssemov")
18088 (set_attr "prefix" "vex")
18089 (set_attr "mode" "<sseinsnmode>")])
18091 (define_insn "*avx2_gatherdi<mode>_2"
18092 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18093 (unspec:VEC_GATHER_MODE
18095 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18097 [(match_operand:P 2 "vsib_address_operand" "Tv")
18098 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18099 (match_operand:SI 5 "const1248_operand" "n")]
18101 (mem:BLK (scratch))
18102 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18104 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18107 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18108 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18109 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18111 [(set_attr "type" "ssemov")
18112 (set_attr "prefix" "vex")
18113 (set_attr "mode" "<sseinsnmode>")])
18115 (define_insn "*avx2_gatherdi<mode>_3"
18116 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18117 (vec_select:<VEC_GATHER_SRCDI>
18119 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18120 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18122 [(match_operand:P 3 "vsib_address_operand" "Tv")
18123 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18124 (match_operand:SI 6 "const1248_operand" "n")]
18126 (mem:BLK (scratch))
18127 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18129 (parallel [(const_int 0) (const_int 1)
18130 (const_int 2) (const_int 3)])))
18131 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18133 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18134 [(set_attr "type" "ssemov")
18135 (set_attr "prefix" "vex")
18136 (set_attr "mode" "<sseinsnmode>")])
18138 (define_insn "*avx2_gatherdi<mode>_4"
18139 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18140 (vec_select:<VEC_GATHER_SRCDI>
18143 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18145 [(match_operand:P 2 "vsib_address_operand" "Tv")
18146 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18147 (match_operand:SI 5 "const1248_operand" "n")]
18149 (mem:BLK (scratch))
18150 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18152 (parallel [(const_int 0) (const_int 1)
18153 (const_int 2) (const_int 3)])))
18154 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18156 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18157 [(set_attr "type" "ssemov")
18158 (set_attr "prefix" "vex")
18159 (set_attr "mode" "<sseinsnmode>")])
18161 (define_expand "<avx512>_gathersi<mode>"
18162 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18164 [(match_operand:VI48F 1 "register_operand")
18165 (match_operand:<avx512fmaskmode> 4 "register_operand")
18166 (mem:<ssescalarmode>
18168 [(match_operand 2 "vsib_address_operand")
18169 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18170 (match_operand:SI 5 "const1248_operand")]))]
18172 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18176 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18177 operands[5]), UNSPEC_VSIBADDR);
18180 (define_insn "*avx512f_gathersi<mode>"
18181 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18183 [(match_operand:VI48F 1 "register_operand" "0")
18184 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18185 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18187 [(match_operand:P 4 "vsib_address_operand" "Tv")
18188 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18189 (match_operand:SI 5 "const1248_operand" "n")]
18190 UNSPEC_VSIBADDR)])]
18192 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18194 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18195 [(set_attr "type" "ssemov")
18196 (set_attr "prefix" "evex")
18197 (set_attr "mode" "<sseinsnmode>")])
18199 (define_insn "*avx512f_gathersi<mode>_2"
18200 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18203 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18204 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18206 [(match_operand:P 3 "vsib_address_operand" "Tv")
18207 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18208 (match_operand:SI 4 "const1248_operand" "n")]
18209 UNSPEC_VSIBADDR)])]
18211 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18213 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18214 [(set_attr "type" "ssemov")
18215 (set_attr "prefix" "evex")
18216 (set_attr "mode" "<sseinsnmode>")])
18219 (define_expand "<avx512>_gatherdi<mode>"
18220 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18222 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18223 (match_operand:QI 4 "register_operand")
18224 (mem:<ssescalarmode>
18226 [(match_operand 2 "vsib_address_operand")
18227 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18228 (match_operand:SI 5 "const1248_operand")]))]
18230 (clobber (match_scratch:QI 7))])]
18234 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18235 operands[5]), UNSPEC_VSIBADDR);
18238 (define_insn "*avx512f_gatherdi<mode>"
18239 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18241 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18242 (match_operand:QI 7 "register_operand" "2")
18243 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18245 [(match_operand:P 4 "vsib_address_operand" "Tv")
18246 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18247 (match_operand:SI 5 "const1248_operand" "n")]
18248 UNSPEC_VSIBADDR)])]
18250 (clobber (match_scratch:QI 2 "=&Yk"))]
18252 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18253 [(set_attr "type" "ssemov")
18254 (set_attr "prefix" "evex")
18255 (set_attr "mode" "<sseinsnmode>")])
18257 (define_insn "*avx512f_gatherdi<mode>_2"
18258 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18261 (match_operand:QI 6 "register_operand" "1")
18262 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18264 [(match_operand:P 3 "vsib_address_operand" "Tv")
18265 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18266 (match_operand:SI 4 "const1248_operand" "n")]
18267 UNSPEC_VSIBADDR)])]
18269 (clobber (match_scratch:QI 1 "=&Yk"))]
18272 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18274 if (<MODE_SIZE> != 64)
18275 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18277 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18279 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18281 [(set_attr "type" "ssemov")
18282 (set_attr "prefix" "evex")
18283 (set_attr "mode" "<sseinsnmode>")])
18285 (define_expand "<avx512>_scattersi<mode>"
18286 [(parallel [(set (mem:VI48F
18288 [(match_operand 0 "vsib_address_operand")
18289 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18290 (match_operand:SI 4 "const1248_operand")]))
18292 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18293 (match_operand:VI48F 3 "register_operand")]
18295 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18299 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18300 operands[4]), UNSPEC_VSIBADDR);
18303 (define_insn "*avx512f_scattersi<mode>"
18304 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18306 [(match_operand:P 0 "vsib_address_operand" "Tv")
18307 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18308 (match_operand:SI 4 "const1248_operand" "n")]
18311 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18312 (match_operand:VI48F 3 "register_operand" "v")]
18314 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18316 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18317 [(set_attr "type" "ssemov")
18318 (set_attr "prefix" "evex")
18319 (set_attr "mode" "<sseinsnmode>")])
18321 (define_expand "<avx512>_scatterdi<mode>"
18322 [(parallel [(set (mem:VI48F
18324 [(match_operand 0 "vsib_address_operand")
18325 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18326 (match_operand:SI 4 "const1248_operand")]))
18328 [(match_operand:QI 1 "register_operand")
18329 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18331 (clobber (match_scratch:QI 6))])]
18335 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18336 operands[4]), UNSPEC_VSIBADDR);
18339 (define_insn "*avx512f_scatterdi<mode>"
18340 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18342 [(match_operand:P 0 "vsib_address_operand" "Tv")
18343 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18344 (match_operand:SI 4 "const1248_operand" "n")]
18347 [(match_operand:QI 6 "register_operand" "1")
18348 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18350 (clobber (match_scratch:QI 1 "=&Yk"))]
18352 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18353 [(set_attr "type" "ssemov")
18354 (set_attr "prefix" "evex")
18355 (set_attr "mode" "<sseinsnmode>")])
18357 (define_insn "<avx512>_compress<mode>_mask"
18358 [(set (match_operand:VI48F 0 "register_operand" "=v")
18360 [(match_operand:VI48F 1 "register_operand" "v")
18361 (match_operand:VI48F 2 "vector_move_operand" "0C")
18362 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18365 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18366 [(set_attr "type" "ssemov")
18367 (set_attr "prefix" "evex")
18368 (set_attr "mode" "<sseinsnmode>")])
18370 (define_insn "<avx512>_compressstore<mode>_mask"
18371 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18373 [(match_operand:VI48F 1 "register_operand" "x")
18375 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18376 UNSPEC_COMPRESS_STORE))]
18378 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18379 [(set_attr "type" "ssemov")
18380 (set_attr "prefix" "evex")
18381 (set_attr "memory" "store")
18382 (set_attr "mode" "<sseinsnmode>")])
18384 (define_expand "<avx512>_expand<mode>_maskz"
18385 [(set (match_operand:VI48F 0 "register_operand")
18387 [(match_operand:VI48F 1 "nonimmediate_operand")
18388 (match_operand:VI48F 2 "vector_move_operand")
18389 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18392 "operands[2] = CONST0_RTX (<MODE>mode);")
18394 (define_insn "<avx512>_expand<mode>_mask"
18395 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18397 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18398 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18399 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18402 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18403 [(set_attr "type" "ssemov")
18404 (set_attr "prefix" "evex")
18405 (set_attr "memory" "none,load")
18406 (set_attr "mode" "<sseinsnmode>")])
18408 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18409 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18410 (unspec:VF_AVX512VL
18411 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18412 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18413 (match_operand:SI 3 "const_0_to_15_operand")]
18415 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18416 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
18417 [(set_attr "type" "sse")
18418 (set_attr "prefix" "evex")
18419 (set_attr "mode" "<MODE>")])
18421 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18422 [(set (match_operand:VF_128 0 "register_operand" "=v")
18425 [(match_operand:VF_128 1 "register_operand" "v")
18426 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18427 (match_operand:SI 3 "const_0_to_15_operand")]
18432 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
18433 [(set_attr "type" "sse")
18434 (set_attr "prefix" "evex")
18435 (set_attr "mode" "<MODE>")])
18437 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18438 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18439 (unspec:<avx512fmaskmode>
18440 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18441 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18444 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18445 [(set_attr "type" "sse")
18446 (set_attr "length_immediate" "1")
18447 (set_attr "prefix" "evex")
18448 (set_attr "mode" "<MODE>")])
18450 (define_insn "avx512dq_vmfpclass<mode>"
18451 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18452 (and:<avx512fmaskmode>
18453 (unspec:<avx512fmaskmode>
18454 [(match_operand:VF_128 1 "register_operand" "v")
18455 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18459 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18460 [(set_attr "type" "sse")
18461 (set_attr "length_immediate" "1")
18462 (set_attr "prefix" "evex")
18463 (set_attr "mode" "<MODE>")])
18465 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18466 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18467 (unspec:VF_AVX512VL
18468 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18469 (match_operand:SI 2 "const_0_to_15_operand")]
18472 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18473 [(set_attr "prefix" "evex")
18474 (set_attr "mode" "<MODE>")])
18476 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18477 [(set (match_operand:VF_128 0 "register_operand" "=v")
18480 [(match_operand:VF_128 1 "register_operand" "v")
18481 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18482 (match_operand:SI 3 "const_0_to_15_operand")]
18487 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18488 [(set_attr "prefix" "evex")
18489 (set_attr "mode" "<ssescalarmode>")])
18491 ;; The correct representation for this is absolutely enormous, and
18492 ;; surely not generally useful.
18493 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18494 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18495 (unspec:VI2_AVX512VL
18496 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18497 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18498 (match_operand:SI 3 "const_0_to_255_operand")]
18501 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18502 [(set_attr "isa" "avx")
18503 (set_attr "type" "sselog1")
18504 (set_attr "length_immediate" "1")
18505 (set_attr "prefix" "evex")
18506 (set_attr "mode" "<sseinsnmode>")])
18508 (define_insn "clz<mode>2<mask_name>"
18509 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18511 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18513 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18514 [(set_attr "type" "sse")
18515 (set_attr "prefix" "evex")
18516 (set_attr "mode" "<sseinsnmode>")])
18518 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18519 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18520 (unspec:VI48_AVX512VL
18521 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18524 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18525 [(set_attr "type" "sse")
18526 (set_attr "prefix" "evex")
18527 (set_attr "mode" "<sseinsnmode>")])
18529 (define_insn "sha1msg1"
18530 [(set (match_operand:V4SI 0 "register_operand" "=x")
18532 [(match_operand:V4SI 1 "register_operand" "0")
18533 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18536 "sha1msg1\t{%2, %0|%0, %2}"
18537 [(set_attr "type" "sselog1")
18538 (set_attr "mode" "TI")])
18540 (define_insn "sha1msg2"
18541 [(set (match_operand:V4SI 0 "register_operand" "=x")
18543 [(match_operand:V4SI 1 "register_operand" "0")
18544 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18547 "sha1msg2\t{%2, %0|%0, %2}"
18548 [(set_attr "type" "sselog1")
18549 (set_attr "mode" "TI")])
18551 (define_insn "sha1nexte"
18552 [(set (match_operand:V4SI 0 "register_operand" "=x")
18554 [(match_operand:V4SI 1 "register_operand" "0")
18555 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18556 UNSPEC_SHA1NEXTE))]
18558 "sha1nexte\t{%2, %0|%0, %2}"
18559 [(set_attr "type" "sselog1")
18560 (set_attr "mode" "TI")])
18562 (define_insn "sha1rnds4"
18563 [(set (match_operand:V4SI 0 "register_operand" "=x")
18565 [(match_operand:V4SI 1 "register_operand" "0")
18566 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18567 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18568 UNSPEC_SHA1RNDS4))]
18570 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18571 [(set_attr "type" "sselog1")
18572 (set_attr "length_immediate" "1")
18573 (set_attr "mode" "TI")])
18575 (define_insn "sha256msg1"
18576 [(set (match_operand:V4SI 0 "register_operand" "=x")
18578 [(match_operand:V4SI 1 "register_operand" "0")
18579 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18580 UNSPEC_SHA256MSG1))]
18582 "sha256msg1\t{%2, %0|%0, %2}"
18583 [(set_attr "type" "sselog1")
18584 (set_attr "mode" "TI")])
18586 (define_insn "sha256msg2"
18587 [(set (match_operand:V4SI 0 "register_operand" "=x")
18589 [(match_operand:V4SI 1 "register_operand" "0")
18590 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18591 UNSPEC_SHA256MSG2))]
18593 "sha256msg2\t{%2, %0|%0, %2}"
18594 [(set_attr "type" "sselog1")
18595 (set_attr "mode" "TI")])
18597 (define_insn "sha256rnds2"
18598 [(set (match_operand:V4SI 0 "register_operand" "=x")
18600 [(match_operand:V4SI 1 "register_operand" "0")
18601 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18602 (match_operand:V4SI 3 "register_operand" "Yz")]
18603 UNSPEC_SHA256RNDS2))]
18605 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18606 [(set_attr "type" "sselog1")
18607 (set_attr "length_immediate" "1")
18608 (set_attr "mode" "TI")])
18610 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18611 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18612 (unspec:AVX512MODE2P
18613 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18617 "&& reload_completed"
18620 rtx op0 = operands[0];
18621 rtx op1 = operands[1];
18623 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18625 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18626 emit_move_insn (op0, op1);
18630 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18631 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18632 (unspec:AVX512MODE2P
18633 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18637 "&& reload_completed"
18640 rtx op0 = operands[0];
18641 rtx op1 = operands[1];
18643 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18645 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18646 emit_move_insn (op0, op1);
18650 (define_int_iterator VPMADD52
18651 [UNSPEC_VPMADD52LUQ
18652 UNSPEC_VPMADD52HUQ])
18654 (define_int_attr vpmadd52type
18655 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18657 (define_expand "vpamdd52huq<mode>_maskz"
18658 [(match_operand:VI8_AVX512VL 0 "register_operand")
18659 (match_operand:VI8_AVX512VL 1 "register_operand")
18660 (match_operand:VI8_AVX512VL 2 "register_operand")
18661 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18662 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18663 "TARGET_AVX512IFMA"
18665 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18666 operands[0], operands[1], operands[2], operands[3],
18667 CONST0_RTX (<MODE>mode), operands[4]));
18671 (define_expand "vpamdd52luq<mode>_maskz"
18672 [(match_operand:VI8_AVX512VL 0 "register_operand")
18673 (match_operand:VI8_AVX512VL 1 "register_operand")
18674 (match_operand:VI8_AVX512VL 2 "register_operand")
18675 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18676 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18677 "TARGET_AVX512IFMA"
18679 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18680 operands[0], operands[1], operands[2], operands[3],
18681 CONST0_RTX (<MODE>mode), operands[4]));
18685 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18686 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18687 (unspec:VI8_AVX512VL
18688 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18689 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18690 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18692 "TARGET_AVX512IFMA"
18693 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18694 [(set_attr "type" "ssemuladd")
18695 (set_attr "prefix" "evex")
18696 (set_attr "mode" "<sseinsnmode>")])
18698 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
18699 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18700 (vec_merge:VI8_AVX512VL
18701 (unspec:VI8_AVX512VL
18702 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18703 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18704 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18707 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18708 "TARGET_AVX512IFMA"
18709 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18710 [(set_attr "type" "ssemuladd")
18711 (set_attr "prefix" "evex")
18712 (set_attr "mode" "<sseinsnmode>")])
18714 (define_insn "vpmultishiftqb<mode><mask_name>"
18715 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18716 (unspec:VI1_AVX512VL
18717 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18718 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18719 UNSPEC_VPMULTISHIFT))]
18720 "TARGET_AVX512VBMI"
18721 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18722 [(set_attr "type" "sselog")
18723 (set_attr "prefix" "evex")
18724 (set_attr "mode" "<sseinsnmode>")])