1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
132 ;; For AVX512BW support
140 ;; For AVX512DQ support
145 ;; For AVX512IFMA support
149 ;; For AVX512VBMI support
153 (define_c_enum "unspecv" [
163 ;; All vector modes including V?TImode, used in move patterns.
164 (define_mode_iterator VMOVE
165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
173 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174 (define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
180 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181 (define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
185 (define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
189 (define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
197 ;; All 128bit vector modes
198 (define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
201 ;; All 256bit vector modes
202 (define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
205 ;; All 512bit vector modes
206 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
208 ;; All 256bit and 512bit vector modes
209 (define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
214 ;; All vector float modes
215 (define_mode_iterator VF
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
219 ;; 128- and 256-bit float vector modes
220 (define_mode_iterator VF_128_256
221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
224 ;; All SFmode vector float modes
225 (define_mode_iterator VF1
226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
228 ;; 128- and 256-bit SF vector modes
229 (define_mode_iterator VF1_128_256
230 [(V8SF "TARGET_AVX") V4SF])
232 (define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
235 ;; All DFmode vector float modes
236 (define_mode_iterator VF2
237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
239 ;; 128- and 256-bit DF vector modes
240 (define_mode_iterator VF2_128_256
241 [(V4DF "TARGET_AVX") V2DF])
243 (define_mode_iterator VF2_512_256
244 [(V8DF "TARGET_AVX512F") V4DF])
246 (define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
249 ;; All 128bit vector float modes
250 (define_mode_iterator VF_128
251 [V4SF (V2DF "TARGET_SSE2")])
253 ;; All 256bit vector float modes
254 (define_mode_iterator VF_256
257 ;; All 512bit vector float modes
258 (define_mode_iterator VF_512
261 (define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
265 (define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
269 (define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
272 (define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
275 ;; All vector integer modes
276 (define_mode_iterator VI
277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
283 (define_mode_iterator VI_AVX2
284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
289 ;; All QImode vector integer modes
290 (define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
293 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
297 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
301 ;; All DImode vector integer modes
302 (define_mode_iterator VI8
303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
305 (define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
308 (define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
311 (define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
314 (define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
317 (define_mode_iterator VI2_AVX2
318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
320 (define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
323 (define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
326 (define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
329 (define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
332 (define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
335 (define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
339 (define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
342 (define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
345 (define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
348 (define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
351 (define_mode_iterator VI4_128_8_256
355 (define_mode_iterator V8FI
359 (define_mode_iterator V16FI
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
374 (define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
378 (define_mode_iterator VI124_AVX512F
379 [(V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
388 (define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
391 (define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
393 (V4DI "TARGET_AVX2") V2DI])
395 (define_mode_iterator VI248_AVX2_8_AVX512F
396 [(V16HI "TARGET_AVX2") V8HI
397 (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
400 (define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
404 ;; Suppose TARGET_AVX512VL as baseline
405 (define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
409 (define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
413 (define_mode_iterator V48_AVX2
416 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
417 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
419 (define_mode_attr avx512
420 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
421 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
422 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
423 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
424 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
425 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
427 (define_mode_attr sse2_avx_avx512f
428 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
430 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
432 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
433 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
435 (define_mode_attr sse2_avx2
436 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
437 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
438 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
439 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
440 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
442 (define_mode_attr ssse3_avx2
443 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
444 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
445 (V4SI "ssse3") (V8SI "avx2")
446 (V2DI "ssse3") (V4DI "avx2")
447 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
449 (define_mode_attr sse4_1_avx2
450 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
452 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
453 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
455 (define_mode_attr avx_avx2
456 [(V4SF "avx") (V2DF "avx")
457 (V8SF "avx") (V4DF "avx")
458 (V4SI "avx2") (V2DI "avx2")
459 (V8SI "avx2") (V4DI "avx2")])
461 (define_mode_attr vec_avx2
462 [(V16QI "vec") (V32QI "avx2")
463 (V8HI "vec") (V16HI "avx2")
464 (V4SI "vec") (V8SI "avx2")
465 (V2DI "vec") (V4DI "avx2")])
467 (define_mode_attr avx2_avx512
468 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
469 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
470 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
471 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
472 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
474 (define_mode_attr shuffletype
475 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
476 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
477 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
478 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
479 (V64QI "i") (V1TI "i") (V2TI "i")])
481 (define_mode_attr ssequartermode
482 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
484 (define_mode_attr ssedoublemodelower
485 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
486 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
487 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
489 (define_mode_attr ssedoublemode
490 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
491 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
492 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
493 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
495 (define_mode_attr ssebytemode
496 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
498 ;; All 128bit vector integer modes
499 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
501 ;; All 256bit vector integer modes
502 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
504 ;; All 512bit vector integer modes
505 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
507 ;; Various 128bit vector integer mode combinations
508 (define_mode_iterator VI12_128 [V16QI V8HI])
509 (define_mode_iterator VI14_128 [V16QI V4SI])
510 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
511 (define_mode_iterator VI24_128 [V8HI V4SI])
512 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
513 (define_mode_iterator VI48_128 [V4SI V2DI])
515 ;; Various 256bit and 512 vector integer mode combinations
516 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
517 (define_mode_iterator VI124_256_AVX512F_AVX512BW
519 (V64QI "TARGET_AVX512BW")
520 (V32HI "TARGET_AVX512BW")
521 (V16SI "TARGET_AVX512F")])
522 (define_mode_iterator VI48_256 [V8SI V4DI])
523 (define_mode_iterator VI48_512 [V16SI V8DI])
524 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
525 (define_mode_iterator VI_AVX512BW
526 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
528 ;; Int-float size matches
529 (define_mode_iterator VI4F_128 [V4SI V4SF])
530 (define_mode_iterator VI8F_128 [V2DI V2DF])
531 (define_mode_iterator VI4F_256 [V8SI V8SF])
532 (define_mode_iterator VI8F_256 [V4DI V4DF])
533 (define_mode_iterator VI8F_256_512
534 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
535 (define_mode_iterator VI48F_256_512
537 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
538 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
539 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
540 (define_mode_iterator VF48_I1248
541 [V16SI V16SF V8DI V8DF V32HI V64QI])
542 (define_mode_iterator VI48F
543 [V16SI V16SF V8DI V8DF
544 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
545 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
546 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
547 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
548 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
550 ;; Mapping from float mode to required SSE level
551 (define_mode_attr sse
552 [(SF "sse") (DF "sse2")
553 (V4SF "sse") (V2DF "sse2")
554 (V16SF "avx512f") (V8SF "avx")
555 (V8DF "avx512f") (V4DF "avx")])
557 (define_mode_attr sse2
558 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
559 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
561 (define_mode_attr sse3
562 [(V16QI "sse3") (V32QI "avx")])
564 (define_mode_attr sse4_1
565 [(V4SF "sse4_1") (V2DF "sse4_1")
566 (V8SF "avx") (V4DF "avx")
569 (define_mode_attr avxsizesuffix
570 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
571 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
572 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
573 (V16SF "512") (V8DF "512")
574 (V8SF "256") (V4DF "256")
575 (V4SF "") (V2DF "")])
577 ;; SSE instruction mode
578 (define_mode_attr sseinsnmode
579 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
580 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
581 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
582 (V16SF "V16SF") (V8DF "V8DF")
583 (V8SF "V8SF") (V4DF "V4DF")
584 (V4SF "V4SF") (V2DF "V2DF")
587 ;; Mapping of vector modes to corresponding mask size
588 (define_mode_attr avx512fmaskmode
589 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
590 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
591 (V16SI "HI") (V8SI "QI") (V4SI "QI")
592 (V8DI "QI") (V4DI "QI") (V2DI "QI")
593 (V16SF "HI") (V8SF "QI") (V4SF "QI")
594 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
596 ;; Mapping of vector float modes to an integer mode of the same size
597 (define_mode_attr sseintvecmode
598 [(V16SF "V16SI") (V8DF "V8DI")
599 (V8SF "V8SI") (V4DF "V4DI")
600 (V4SF "V4SI") (V2DF "V2DI")
601 (V16SI "V16SI") (V8DI "V8DI")
602 (V8SI "V8SI") (V4DI "V4DI")
603 (V4SI "V4SI") (V2DI "V2DI")
604 (V16HI "V16HI") (V8HI "V8HI")
605 (V32HI "V32HI") (V64QI "V64QI")
606 (V32QI "V32QI") (V16QI "V16QI")])
608 (define_mode_attr sseintvecmode2
609 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
610 (V8SF "OI") (V4SF "TI")])
612 (define_mode_attr sseintvecmodelower
613 [(V16SF "v16si") (V8DF "v8di")
614 (V8SF "v8si") (V4DF "v4di")
615 (V4SF "v4si") (V2DF "v2di")
616 (V8SI "v8si") (V4DI "v4di")
617 (V4SI "v4si") (V2DI "v2di")
618 (V16HI "v16hi") (V8HI "v8hi")
619 (V32QI "v32qi") (V16QI "v16qi")])
621 ;; Mapping of vector modes to a vector mode of double size
622 (define_mode_attr ssedoublevecmode
623 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
624 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
625 (V8SF "V16SF") (V4DF "V8DF")
626 (V4SF "V8SF") (V2DF "V4DF")])
628 ;; Mapping of vector modes to a vector mode of half size
629 (define_mode_attr ssehalfvecmode
630 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
631 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
632 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
633 (V16SF "V8SF") (V8DF "V4DF")
634 (V8SF "V4SF") (V4DF "V2DF")
637 ;; Mapping of vector modes ti packed single mode of the same size
638 (define_mode_attr ssePSmode
639 [(V16SI "V16SF") (V8DF "V16SF")
640 (V16SF "V16SF") (V8DI "V16SF")
641 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
642 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
643 (V8SI "V8SF") (V4SI "V4SF")
644 (V4DI "V8SF") (V2DI "V4SF")
645 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
646 (V8SF "V8SF") (V4SF "V4SF")
647 (V4DF "V8SF") (V2DF "V4SF")])
649 (define_mode_attr ssePSmode2
650 [(V8DI "V8SF") (V4DI "V4SF")])
652 ;; Mapping of vector modes back to the scalar modes
653 (define_mode_attr ssescalarmode
654 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
655 (V32HI "HI") (V16HI "HI") (V8HI "HI")
656 (V16SI "SI") (V8SI "SI") (V4SI "SI")
657 (V8DI "DI") (V4DI "DI") (V2DI "DI")
658 (V16SF "SF") (V8SF "SF") (V4SF "SF")
659 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
661 ;; Mapping of vector modes to the 128bit modes
662 (define_mode_attr ssexmmmode
663 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
664 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
665 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
666 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
667 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
668 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
670 ;; Pointer size override for scalar modes (Intel asm dialect)
671 (define_mode_attr iptr
672 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
673 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
674 (V8SF "k") (V4DF "q")
675 (V4SF "k") (V2DF "q")
678 ;; Number of scalar elements in each vector type
679 (define_mode_attr ssescalarnum
680 [(V64QI "64") (V16SI "16") (V8DI "8")
681 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
682 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
683 (V16SF "16") (V8DF "8")
684 (V8SF "8") (V4DF "4")
685 (V4SF "4") (V2DF "2")])
687 ;; Mask of scalar elements in each vector type
688 (define_mode_attr ssescalarnummask
689 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
690 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
691 (V8SF "7") (V4DF "3")
692 (V4SF "3") (V2DF "1")])
694 (define_mode_attr ssescalarsize
695 [(V8DI "64") (V4DI "64") (V2DI "64")
696 (V64QI "8") (V32QI "8") (V16QI "8")
697 (V32HI "16") (V16HI "16") (V8HI "16")
698 (V16SI "32") (V8SI "32") (V4SI "32")
699 (V16SF "32") (V8DF "64")])
701 ;; SSE prefix for integer vector modes
702 (define_mode_attr sseintprefix
703 [(V2DI "p") (V2DF "")
708 (V16SI "p") (V16SF "")
709 (V16QI "p") (V8HI "p")
710 (V32QI "p") (V16HI "p")
711 (V64QI "p") (V32HI "p")])
713 ;; SSE scalar suffix for vector modes
714 (define_mode_attr ssescalarmodesuffix
716 (V8SF "ss") (V4DF "sd")
717 (V4SF "ss") (V2DF "sd")
718 (V8SI "ss") (V4DI "sd")
721 ;; Pack/unpack vector modes
722 (define_mode_attr sseunpackmode
723 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
724 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
725 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
727 (define_mode_attr ssepackmode
728 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
729 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
730 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
732 ;; Mapping of the max integer size for xop rotate immediate constraint
733 (define_mode_attr sserotatemax
734 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
736 ;; Mapping of mode to cast intrinsic name
737 (define_mode_attr castmode
738 [(V8SI "si") (V8SF "ps") (V4DF "pd")
739 (V16SI "si") (V16SF "ps") (V8DF "pd")])
741 ;; Instruction suffix for sign and zero extensions.
742 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
744 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
745 ;; i64x4 or f64x4 for 512bit modes.
746 (define_mode_attr i128
747 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
748 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
749 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
752 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
753 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
755 ;; Mapping for dbpsabbw modes
756 (define_mode_attr dbpsadbwmode
757 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
759 ;; Mapping suffixes for broadcast
760 (define_mode_attr bcstscalarsuff
761 [(V64QI "b") (V32QI "b") (V16QI "b")
762 (V32HI "w") (V16HI "w") (V8HI "w")
763 (V16SI "d") (V8SI "d") (V4SI "d")
764 (V8DI "q") (V4DI "q") (V2DI "q")
765 (V16SF "ss") (V8SF "ss") (V4SF "ss")
766 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
768 ;; Tie mode of assembler operand to mode iterator
769 (define_mode_attr concat_tg_mode
770 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
771 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
774 ;; Include define_subst patterns for instructions with mask
777 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; All of these patterns are enabled for SSE1 as well as SSE2.
786 ;; This is essential for maintaining stable calling conventions.
788 (define_expand "mov<mode>"
789 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
790 (match_operand:VMOVE 1 "nonimmediate_operand"))]
793 ix86_expand_vector_move (<MODE>mode, operands);
797 (define_insn "*mov<mode>_internal"
798 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
799 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
801 && (register_operand (operands[0], <MODE>mode)
802 || register_operand (operands[1], <MODE>mode))"
804 int mode = get_attr_mode (insn);
805 switch (which_alternative)
808 return standard_sse_constant_opcode (insn, operands[1]);
811 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
812 in avx512f, so we need to use workarounds, to access sse registers
813 16-31, which are evex-only. In avx512vl we don't need workarounds. */
814 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
815 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
816 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
818 if (memory_operand (operands[0], <MODE>mode))
820 if (<MODE_SIZE> == 32)
821 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
822 else if (<MODE_SIZE> == 16)
823 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
827 else if (memory_operand (operands[1], <MODE>mode))
829 if (<MODE_SIZE> == 32)
830 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
831 else if (<MODE_SIZE> == 16)
832 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
837 /* Reg -> reg move is always aligned. Just use wider move. */
842 return "vmovaps\t{%g1, %g0|%g0, %g1}";
845 return "vmovapd\t{%g1, %g0|%g0, %g1}";
848 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
859 && (misaligned_operand (operands[0], <MODE>mode)
860 || misaligned_operand (operands[1], <MODE>mode)))
861 return "vmovups\t{%1, %0|%0, %1}";
863 return "%vmovaps\t{%1, %0|%0, %1}";
869 && (misaligned_operand (operands[0], <MODE>mode)
870 || misaligned_operand (operands[1], <MODE>mode)))
871 return "vmovupd\t{%1, %0|%0, %1}";
873 return "%vmovapd\t{%1, %0|%0, %1}";
878 && (misaligned_operand (operands[0], <MODE>mode)
879 || misaligned_operand (operands[1], <MODE>mode)))
880 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
881 : "vmovdqu\t{%1, %0|%0, %1}";
883 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
884 : "%vmovdqa\t{%1, %0|%0, %1}";
886 if (misaligned_operand (operands[0], <MODE>mode)
887 || misaligned_operand (operands[1], <MODE>mode))
888 return "vmovdqu64\t{%1, %0|%0, %1}";
890 return "vmovdqa64\t{%1, %0|%0, %1}";
899 [(set_attr "type" "sselog1,ssemov,ssemov")
900 (set_attr "prefix" "maybe_vex")
902 (cond [(and (match_test "<MODE_SIZE> == 16")
903 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
904 (and (eq_attr "alternative" "2")
905 (match_test "TARGET_SSE_TYPELESS_STORES"))))
906 (const_string "<ssePSmode>")
907 (match_test "TARGET_AVX")
908 (const_string "<sseinsnmode>")
909 (ior (not (match_test "TARGET_SSE2"))
910 (match_test "optimize_function_for_size_p (cfun)"))
911 (const_string "V4SF")
912 (and (eq_attr "alternative" "0")
913 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
916 (const_string "<sseinsnmode>")))])
918 (define_insn "<avx512>_load<mode>_mask"
919 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
920 (vec_merge:V48_AVX512VL
921 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
922 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
923 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
926 static char buf [64];
929 const char *sse_suffix;
931 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
934 sse_suffix = "<ssemodesuffix>";
939 sse_suffix = "<ssescalarsize>";
942 if (misaligned_operand (operands[1], <MODE>mode))
947 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
948 insn_op, align, sse_suffix);
951 [(set_attr "type" "ssemov")
952 (set_attr "prefix" "evex")
953 (set_attr "memory" "none,load")
954 (set_attr "mode" "<sseinsnmode>")])
956 (define_insn "<avx512>_load<mode>_mask"
957 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
958 (vec_merge:VI12_AVX512VL
959 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
960 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
961 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
963 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
964 [(set_attr "type" "ssemov")
965 (set_attr "prefix" "evex")
966 (set_attr "memory" "none,load")
967 (set_attr "mode" "<sseinsnmode>")])
969 (define_insn "<avx512>_blendm<mode>"
970 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
971 (vec_merge:V48_AVX512VL
972 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
973 (match_operand:V48_AVX512VL 1 "register_operand" "v")
974 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
976 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
977 [(set_attr "type" "ssemov")
978 (set_attr "prefix" "evex")
979 (set_attr "mode" "<sseinsnmode>")])
981 (define_insn "<avx512>_blendm<mode>"
982 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
983 (vec_merge:VI12_AVX512VL
984 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
985 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
986 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
988 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
989 [(set_attr "type" "ssemov")
990 (set_attr "prefix" "evex")
991 (set_attr "mode" "<sseinsnmode>")])
993 (define_insn "<avx512>_store<mode>_mask"
994 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
995 (vec_merge:V48_AVX512VL
996 (match_operand:V48_AVX512VL 1 "register_operand" "v")
998 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1001 static char buf [64];
1003 const char *insn_op;
1004 const char *sse_suffix;
1006 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1009 sse_suffix = "<ssemodesuffix>";
1014 sse_suffix = "<ssescalarsize>";
1017 if (misaligned_operand (operands[1], <MODE>mode))
1022 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1023 insn_op, align, sse_suffix);
1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "memory" "store")
1029 (set_attr "mode" "<sseinsnmode>")])
1031 (define_insn "<avx512>_store<mode>_mask"
1032 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1033 (vec_merge:VI12_AVX512VL
1034 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1036 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1038 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1039 [(set_attr "type" "ssemov")
1040 (set_attr "prefix" "evex")
1041 (set_attr "memory" "store")
1042 (set_attr "mode" "<sseinsnmode>")])
1044 (define_insn "sse2_movq128"
1045 [(set (match_operand:V2DI 0 "register_operand" "=x")
1048 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1049 (parallel [(const_int 0)]))
1052 "%vmovq\t{%1, %0|%0, %q1}"
1053 [(set_attr "type" "ssemov")
1054 (set_attr "prefix" "maybe_vex")
1055 (set_attr "mode" "TI")])
1057 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1058 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1059 ;; from memory, we'd prefer to load the memory directly into the %xmm
1060 ;; register. To facilitate this happy circumstance, this pattern won't
1061 ;; split until after register allocation. If the 64-bit value didn't
1062 ;; come from memory, this is the best we can do. This is much better
1063 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1066 (define_insn_and_split "movdi_to_sse"
1068 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1069 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1070 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1071 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1073 "&& reload_completed"
1076 if (register_operand (operands[1], DImode))
1078 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1079 Assemble the 64-bit DImode value in an xmm register. */
1080 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1081 gen_rtx_SUBREG (SImode, operands[1], 0)));
1082 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1083 gen_rtx_SUBREG (SImode, operands[1], 4)));
1084 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1087 else if (memory_operand (operands[1], DImode))
1089 rtx tmp = gen_reg_rtx (V2DImode);
1090 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1091 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1098 [(set (match_operand:V4SF 0 "register_operand")
1099 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1100 "TARGET_SSE && reload_completed"
1103 (vec_duplicate:V4SF (match_dup 1))
1107 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1108 operands[2] = CONST0_RTX (V4SFmode);
1112 [(set (match_operand:V2DF 0 "register_operand")
1113 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1114 "TARGET_SSE2 && reload_completed"
1115 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1117 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1118 operands[2] = CONST0_RTX (DFmode);
1121 (define_expand "movmisalign<mode>"
1122 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1123 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1126 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1130 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1131 [(set (match_operand:VF 0 "register_operand")
1132 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1134 "TARGET_SSE && <mask_mode512bit_condition>"
1136 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1137 just fine if misaligned_operand is true, and without the UNSPEC it can
1138 be combined with arithmetic instructions. If misaligned_operand is
1139 false, still emit UNSPEC_LOADU insn to honor user's request for
1142 && misaligned_operand (operands[1], <MODE>mode))
1144 rtx src = operands[1];
1146 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1147 operands[2 * <mask_applied>],
1148 operands[3 * <mask_applied>]);
1149 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1154 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1155 [(set (match_operand:VF 0 "register_operand" "=v")
1157 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1159 "TARGET_SSE && <mask_mode512bit_condition>"
1161 switch (get_attr_mode (insn))
1166 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1168 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1171 [(set_attr "type" "ssemov")
1172 (set_attr "movu" "1")
1173 (set_attr "ssememalign" "8")
1174 (set_attr "prefix" "maybe_vex")
1176 (cond [(and (match_test "<MODE_SIZE> == 16")
1177 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1178 (const_string "<ssePSmode>")
1179 (match_test "TARGET_AVX")
1180 (const_string "<MODE>")
1181 (match_test "optimize_function_for_size_p (cfun)")
1182 (const_string "V4SF")
1184 (const_string "<MODE>")))])
1186 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1187 [(set (match_operand:VF 0 "memory_operand" "=m")
1189 [(match_operand:VF 1 "register_operand" "v")]
1193 switch (get_attr_mode (insn))
1198 return "%vmovups\t{%1, %0|%0, %1}";
1200 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1203 [(set_attr "type" "ssemov")
1204 (set_attr "movu" "1")
1205 (set_attr "ssememalign" "8")
1206 (set_attr "prefix" "maybe_vex")
1208 (cond [(and (match_test "<MODE_SIZE> == 16")
1209 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1210 (match_test "TARGET_SSE_TYPELESS_STORES")))
1211 (const_string "<ssePSmode>")
1212 (match_test "TARGET_AVX")
1213 (const_string "<MODE>")
1214 (match_test "optimize_function_for_size_p (cfun)")
1215 (const_string "V4SF")
1217 (const_string "<MODE>")))])
1219 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1220 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1221 (vec_merge:VF_AVX512VL
1223 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1226 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1229 switch (get_attr_mode (insn))
1234 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1236 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1239 [(set_attr "type" "ssemov")
1240 (set_attr "movu" "1")
1241 (set_attr "memory" "store")
1242 (set_attr "prefix" "evex")
1243 (set_attr "mode" "<sseinsnmode>")])
1245 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1246 just fine if misaligned_operand is true, and without the UNSPEC it can
1247 be combined with arithmetic instructions. If misaligned_operand is
1248 false, still emit UNSPEC_LOADU insn to honor user's request for
1250 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1251 [(set (match_operand:VI1 0 "register_operand")
1253 [(match_operand:VI1 1 "nonimmediate_operand")]
1255 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1258 && misaligned_operand (operands[1], <MODE>mode))
1260 rtx src = operands[1];
1262 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1263 operands[2 * <mask_applied>],
1264 operands[3 * <mask_applied>]);
1265 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1270 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1271 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1272 (unspec:VI_ULOADSTORE_BW_AVX512VL
1273 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1277 if (misaligned_operand (operands[1], <MODE>mode))
1279 rtx src = operands[1];
1281 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1282 operands[2 * <mask_applied>],
1283 operands[3 * <mask_applied>]);
1284 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1289 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1290 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1291 (unspec:VI_ULOADSTORE_F_AVX512VL
1292 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1296 if (misaligned_operand (operands[1], <MODE>mode))
1298 rtx src = operands[1];
1300 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1301 operands[2 * <mask_applied>],
1302 operands[3 * <mask_applied>]);
1303 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1308 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1309 [(set (match_operand:VI1 0 "register_operand" "=v")
1311 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1313 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1315 switch (get_attr_mode (insn))
1319 return "%vmovups\t{%1, %0|%0, %1}";
1321 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1322 return "%vmovdqu\t{%1, %0|%0, %1}";
1324 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1327 [(set_attr "type" "ssemov")
1328 (set_attr "movu" "1")
1329 (set_attr "ssememalign" "8")
1330 (set (attr "prefix_data16")
1332 (match_test "TARGET_AVX")
1334 (const_string "1")))
1335 (set_attr "prefix" "maybe_vex")
1337 (cond [(and (match_test "<MODE_SIZE> == 16")
1338 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1339 (const_string "<ssePSmode>")
1340 (match_test "TARGET_AVX")
1341 (const_string "<sseinsnmode>")
1342 (match_test "optimize_function_for_size_p (cfun)")
1343 (const_string "V4SF")
1345 (const_string "<sseinsnmode>")))])
1347 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1348 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1349 (unspec:VI_ULOADSTORE_BW_AVX512VL
1350 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1353 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1354 [(set_attr "type" "ssemov")
1355 (set_attr "movu" "1")
1356 (set_attr "ssememalign" "8")
1357 (set_attr "prefix" "maybe_evex")])
1359 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1360 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1361 (unspec:VI_ULOADSTORE_F_AVX512VL
1362 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1365 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1366 [(set_attr "type" "ssemov")
1367 (set_attr "movu" "1")
1368 (set_attr "ssememalign" "8")
1369 (set_attr "prefix" "maybe_evex")])
1371 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1372 [(set (match_operand:VI1 0 "memory_operand" "=m")
1374 [(match_operand:VI1 1 "register_operand" "v")]
1378 switch (get_attr_mode (insn))
1383 return "%vmovups\t{%1, %0|%0, %1}";
1389 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1390 return "%vmovdqu\t{%1, %0|%0, %1}";
1392 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1396 [(set_attr "type" "ssemov")
1397 (set_attr "movu" "1")
1398 (set_attr "ssememalign" "8")
1399 (set (attr "prefix_data16")
1401 (match_test "TARGET_AVX")
1403 (const_string "1")))
1404 (set_attr "prefix" "maybe_vex")
1406 (cond [(and (match_test "<MODE_SIZE> == 16")
1407 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1408 (match_test "TARGET_SSE_TYPELESS_STORES")))
1409 (const_string "<ssePSmode>")
1410 (match_test "TARGET_AVX")
1411 (const_string "<sseinsnmode>")
1412 (match_test "optimize_function_for_size_p (cfun)")
1413 (const_string "V4SF")
1415 (const_string "<sseinsnmode>")))])
1417 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1418 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1419 (unspec:VI_ULOADSTORE_BW_AVX512VL
1420 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1423 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1424 [(set_attr "type" "ssemov")
1425 (set_attr "movu" "1")
1426 (set_attr "ssememalign" "8")
1427 (set_attr "prefix" "maybe_evex")])
1429 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1430 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1431 (unspec:VI_ULOADSTORE_F_AVX512VL
1432 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1435 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1436 [(set_attr "type" "ssemov")
1437 (set_attr "movu" "1")
1438 (set_attr "ssememalign" "8")
1439 (set_attr "prefix" "maybe_vex")])
1441 (define_insn "<avx512>_storedqu<mode>_mask"
1442 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1443 (vec_merge:VI48_AVX512VL
1444 (unspec:VI48_AVX512VL
1445 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1448 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1450 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1451 [(set_attr "type" "ssemov")
1452 (set_attr "movu" "1")
1453 (set_attr "memory" "store")
1454 (set_attr "prefix" "evex")
1455 (set_attr "mode" "<sseinsnmode>")])
1457 (define_insn "<avx512>_storedqu<mode>_mask"
1458 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1459 (vec_merge:VI12_AVX512VL
1460 (unspec:VI12_AVX512VL
1461 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1464 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1466 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1467 [(set_attr "type" "ssemov")
1468 (set_attr "movu" "1")
1469 (set_attr "memory" "store")
1470 (set_attr "prefix" "evex")
1471 (set_attr "mode" "<sseinsnmode>")])
1473 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1474 [(set (match_operand:VI1 0 "register_operand" "=x")
1475 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1478 "%vlddqu\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "ssemov")
1480 (set_attr "movu" "1")
1481 (set_attr "ssememalign" "8")
1482 (set (attr "prefix_data16")
1484 (match_test "TARGET_AVX")
1486 (const_string "0")))
1487 (set (attr "prefix_rep")
1489 (match_test "TARGET_AVX")
1491 (const_string "1")))
1492 (set_attr "prefix" "maybe_vex")
1493 (set_attr "mode" "<sseinsnmode>")])
1495 (define_insn "sse2_movnti<mode>"
1496 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1497 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1500 "movnti\t{%1, %0|%0, %1}"
1501 [(set_attr "type" "ssemov")
1502 (set_attr "prefix_data16" "0")
1503 (set_attr "mode" "<MODE>")])
1505 (define_insn "<sse>_movnt<mode>"
1506 [(set (match_operand:VF 0 "memory_operand" "=m")
1508 [(match_operand:VF 1 "register_operand" "v")]
1511 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1512 [(set_attr "type" "ssemov")
1513 (set_attr "prefix" "maybe_vex")
1514 (set_attr "mode" "<MODE>")])
1516 (define_insn "<sse2>_movnt<mode>"
1517 [(set (match_operand:VI8 0 "memory_operand" "=m")
1518 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1521 "%vmovntdq\t{%1, %0|%0, %1}"
1522 [(set_attr "type" "ssecvt")
1523 (set (attr "prefix_data16")
1525 (match_test "TARGET_AVX")
1527 (const_string "1")))
1528 (set_attr "prefix" "maybe_vex")
1529 (set_attr "mode" "<sseinsnmode>")])
1531 ; Expand patterns for non-temporal stores. At the moment, only those
1532 ; that directly map to insns are defined; it would be possible to
1533 ; define patterns for other modes that would expand to several insns.
1535 ;; Modes handled by storent patterns.
1536 (define_mode_iterator STORENT_MODE
1537 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1538 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1539 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1540 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1541 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1543 (define_expand "storent<mode>"
1544 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1545 (unspec:STORENT_MODE
1546 [(match_operand:STORENT_MODE 1 "register_operand")]
1550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1552 ;; Parallel floating point arithmetic
1554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1556 (define_expand "<code><mode>2"
1557 [(set (match_operand:VF 0 "register_operand")
1559 (match_operand:VF 1 "register_operand")))]
1561 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1563 (define_insn_and_split "*absneg<mode>2"
1564 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1565 (match_operator:VF 3 "absneg_operator"
1566 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1567 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1570 "&& reload_completed"
1573 enum rtx_code absneg_op;
1579 if (MEM_P (operands[1]))
1580 op1 = operands[2], op2 = operands[1];
1582 op1 = operands[1], op2 = operands[2];
1587 if (rtx_equal_p (operands[0], operands[1]))
1593 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1594 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1595 t = gen_rtx_SET (VOIDmode, operands[0], t);
1599 [(set_attr "isa" "noavx,noavx,avx,avx")])
1601 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1602 [(set (match_operand:VF 0 "register_operand")
1604 (match_operand:VF 1 "<round_nimm_predicate>")
1605 (match_operand:VF 2 "<round_nimm_predicate>")))]
1606 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1607 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1609 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1610 [(set (match_operand:VF 0 "register_operand" "=x,v")
1612 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1613 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1614 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1616 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1617 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1618 [(set_attr "isa" "noavx,avx")
1619 (set_attr "type" "sseadd")
1620 (set_attr "prefix" "<mask_prefix3>")
1621 (set_attr "mode" "<MODE>")])
1623 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1624 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1627 (match_operand:VF_128 1 "register_operand" "0,v")
1628 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1633 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1634 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1635 [(set_attr "isa" "noavx,avx")
1636 (set_attr "type" "sseadd")
1637 (set_attr "prefix" "<round_prefix>")
1638 (set_attr "mode" "<ssescalarmode>")])
1640 (define_expand "mul<mode>3<mask_name><round_name>"
1641 [(set (match_operand:VF 0 "register_operand")
1643 (match_operand:VF 1 "<round_nimm_predicate>")
1644 (match_operand:VF 2 "<round_nimm_predicate>")))]
1645 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1646 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1648 (define_insn "*mul<mode>3<mask_name><round_name>"
1649 [(set (match_operand:VF 0 "register_operand" "=x,v")
1651 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1652 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1653 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1655 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1656 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1657 [(set_attr "isa" "noavx,avx")
1658 (set_attr "type" "ssemul")
1659 (set_attr "prefix" "<mask_prefix3>")
1660 (set_attr "btver2_decode" "direct,double")
1661 (set_attr "mode" "<MODE>")])
1663 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1664 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1667 (match_operand:VF_128 1 "register_operand" "0,v")
1668 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1673 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1674 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1675 [(set_attr "isa" "noavx,avx")
1676 (set_attr "type" "sse<multdiv_mnemonic>")
1677 (set_attr "prefix" "<round_prefix>")
1678 (set_attr "btver2_decode" "direct,double")
1679 (set_attr "mode" "<ssescalarmode>")])
1681 (define_expand "div<mode>3"
1682 [(set (match_operand:VF2 0 "register_operand")
1683 (div:VF2 (match_operand:VF2 1 "register_operand")
1684 (match_operand:VF2 2 "nonimmediate_operand")))]
1686 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1688 (define_expand "div<mode>3"
1689 [(set (match_operand:VF1 0 "register_operand")
1690 (div:VF1 (match_operand:VF1 1 "register_operand")
1691 (match_operand:VF1 2 "nonimmediate_operand")))]
1694 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1697 && TARGET_RECIP_VEC_DIV
1698 && !optimize_insn_for_size_p ()
1699 && flag_finite_math_only && !flag_trapping_math
1700 && flag_unsafe_math_optimizations)
1702 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1707 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1708 [(set (match_operand:VF 0 "register_operand" "=x,v")
1710 (match_operand:VF 1 "register_operand" "0,v")
1711 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1712 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1714 div<ssemodesuffix>\t{%2, %0|%0, %2}
1715 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1716 [(set_attr "isa" "noavx,avx")
1717 (set_attr "type" "ssediv")
1718 (set_attr "prefix" "<mask_prefix3>")
1719 (set_attr "mode" "<MODE>")])
1721 (define_insn "<sse>_rcp<mode>2"
1722 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1724 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1726 "%vrcpps\t{%1, %0|%0, %1}"
1727 [(set_attr "type" "sse")
1728 (set_attr "atom_sse_attr" "rcp")
1729 (set_attr "btver2_sse_attr" "rcp")
1730 (set_attr "prefix" "maybe_vex")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "sse_vmrcpv4sf2"
1734 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1736 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1738 (match_operand:V4SF 2 "register_operand" "0,x")
1742 rcpss\t{%1, %0|%0, %k1}
1743 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1744 [(set_attr "isa" "noavx,avx")
1745 (set_attr "type" "sse")
1746 (set_attr "ssememalign" "32")
1747 (set_attr "atom_sse_attr" "rcp")
1748 (set_attr "btver2_sse_attr" "rcp")
1749 (set_attr "prefix" "orig,vex")
1750 (set_attr "mode" "SF")])
1752 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1753 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1755 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1758 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1759 [(set_attr "type" "sse")
1760 (set_attr "prefix" "evex")
1761 (set_attr "mode" "<MODE>")])
1763 (define_insn "srcp14<mode>"
1764 [(set (match_operand:VF_128 0 "register_operand" "=v")
1767 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1769 (match_operand:VF_128 2 "register_operand" "v")
1772 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1773 [(set_attr "type" "sse")
1774 (set_attr "prefix" "evex")
1775 (set_attr "mode" "<MODE>")])
1777 (define_expand "sqrt<mode>2"
1778 [(set (match_operand:VF2 0 "register_operand")
1779 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1782 (define_expand "sqrt<mode>2"
1783 [(set (match_operand:VF1 0 "register_operand")
1784 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1788 && TARGET_RECIP_VEC_SQRT
1789 && !optimize_insn_for_size_p ()
1790 && flag_finite_math_only && !flag_trapping_math
1791 && flag_unsafe_math_optimizations)
1793 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1798 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1799 [(set (match_operand:VF 0 "register_operand" "=v")
1800 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1801 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1802 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1803 [(set_attr "type" "sse")
1804 (set_attr "atom_sse_attr" "sqrt")
1805 (set_attr "btver2_sse_attr" "sqrt")
1806 (set_attr "prefix" "maybe_vex")
1807 (set_attr "mode" "<MODE>")])
1809 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1810 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1813 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1814 (match_operand:VF_128 2 "register_operand" "0,v")
1818 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1819 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1820 [(set_attr "isa" "noavx,avx")
1821 (set_attr "type" "sse")
1822 (set_attr "atom_sse_attr" "sqrt")
1823 (set_attr "prefix" "<round_prefix>")
1824 (set_attr "btver2_sse_attr" "sqrt")
1825 (set_attr "mode" "<ssescalarmode>")])
1827 (define_expand "rsqrt<mode>2"
1828 [(set (match_operand:VF1_128_256 0 "register_operand")
1830 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1833 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1837 (define_insn "<sse>_rsqrt<mode>2"
1838 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1840 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1842 "%vrsqrtps\t{%1, %0|%0, %1}"
1843 [(set_attr "type" "sse")
1844 (set_attr "prefix" "maybe_vex")
1845 (set_attr "mode" "<MODE>")])
1847 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1848 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1850 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1853 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1854 [(set_attr "type" "sse")
1855 (set_attr "prefix" "evex")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "rsqrt14<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=v")
1862 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1864 (match_operand:VF_128 2 "register_operand" "v")
1867 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1868 [(set_attr "type" "sse")
1869 (set_attr "prefix" "evex")
1870 (set_attr "mode" "<MODE>")])
1872 (define_insn "sse_vmrsqrtv4sf2"
1873 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1875 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1877 (match_operand:V4SF 2 "register_operand" "0,x")
1881 rsqrtss\t{%1, %0|%0, %k1}
1882 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1883 [(set_attr "isa" "noavx,avx")
1884 (set_attr "type" "sse")
1885 (set_attr "ssememalign" "32")
1886 (set_attr "prefix" "orig,vex")
1887 (set_attr "mode" "SF")])
1889 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1890 ;; isn't really correct, as those rtl operators aren't defined when
1891 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1893 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1894 [(set (match_operand:VF 0 "register_operand")
1896 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1897 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1898 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1900 if (!flag_finite_math_only)
1901 operands[1] = force_reg (<MODE>mode, operands[1]);
1902 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1905 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1906 [(set (match_operand:VF 0 "register_operand" "=x,v")
1908 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1909 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1910 "TARGET_SSE && flag_finite_math_only
1911 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1912 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1914 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1915 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1916 [(set_attr "isa" "noavx,avx")
1917 (set_attr "type" "sseadd")
1918 (set_attr "btver2_sse_attr" "maxmin")
1919 (set_attr "prefix" "<mask_prefix3>")
1920 (set_attr "mode" "<MODE>")])
1922 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1923 [(set (match_operand:VF 0 "register_operand" "=x,v")
1925 (match_operand:VF 1 "register_operand" "0,v")
1926 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1927 "TARGET_SSE && !flag_finite_math_only
1928 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1930 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1931 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1932 [(set_attr "isa" "noavx,avx")
1933 (set_attr "type" "sseadd")
1934 (set_attr "btver2_sse_attr" "maxmin")
1935 (set_attr "prefix" "<mask_prefix3>")
1936 (set_attr "mode" "<MODE>")])
1938 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1939 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1942 (match_operand:VF_128 1 "register_operand" "0,v")
1943 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1948 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1949 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1950 [(set_attr "isa" "noavx,avx")
1951 (set_attr "type" "sse")
1952 (set_attr "btver2_sse_attr" "maxmin")
1953 (set_attr "prefix" "<round_saeonly_prefix>")
1954 (set_attr "mode" "<ssescalarmode>")])
1956 ;; These versions of the min/max patterns implement exactly the operations
1957 ;; min = (op1 < op2 ? op1 : op2)
1958 ;; max = (!(op1 < op2) ? op1 : op2)
1959 ;; Their operands are not commutative, and thus they may be used in the
1960 ;; presence of -0.0 and NaN.
1962 (define_insn "*ieee_smin<mode>3"
1963 [(set (match_operand:VF 0 "register_operand" "=v,v")
1965 [(match_operand:VF 1 "register_operand" "0,v")
1966 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1970 min<ssemodesuffix>\t{%2, %0|%0, %2}
1971 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1972 [(set_attr "isa" "noavx,avx")
1973 (set_attr "type" "sseadd")
1974 (set_attr "prefix" "orig,vex")
1975 (set_attr "mode" "<MODE>")])
1977 (define_insn "*ieee_smax<mode>3"
1978 [(set (match_operand:VF 0 "register_operand" "=v,v")
1980 [(match_operand:VF 1 "register_operand" "0,v")
1981 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1985 max<ssemodesuffix>\t{%2, %0|%0, %2}
1986 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1987 [(set_attr "isa" "noavx,avx")
1988 (set_attr "type" "sseadd")
1989 (set_attr "prefix" "orig,vex")
1990 (set_attr "mode" "<MODE>")])
1992 (define_insn "avx_addsubv4df3"
1993 [(set (match_operand:V4DF 0 "register_operand" "=x")
1996 (match_operand:V4DF 1 "register_operand" "x")
1997 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1998 (minus:V4DF (match_dup 1) (match_dup 2))
2001 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2002 [(set_attr "type" "sseadd")
2003 (set_attr "prefix" "vex")
2004 (set_attr "mode" "V4DF")])
2006 (define_insn "sse3_addsubv2df3"
2007 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2010 (match_operand:V2DF 1 "register_operand" "0,x")
2011 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2012 (minus:V2DF (match_dup 1) (match_dup 2))
2016 addsubpd\t{%2, %0|%0, %2}
2017 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2018 [(set_attr "isa" "noavx,avx")
2019 (set_attr "type" "sseadd")
2020 (set_attr "atom_unit" "complex")
2021 (set_attr "prefix" "orig,vex")
2022 (set_attr "mode" "V2DF")])
2024 (define_insn "avx_addsubv8sf3"
2025 [(set (match_operand:V8SF 0 "register_operand" "=x")
2028 (match_operand:V8SF 1 "register_operand" "x")
2029 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2030 (minus:V8SF (match_dup 1) (match_dup 2))
2033 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2034 [(set_attr "type" "sseadd")
2035 (set_attr "prefix" "vex")
2036 (set_attr "mode" "V8SF")])
2038 (define_insn "sse3_addsubv4sf3"
2039 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2042 (match_operand:V4SF 1 "register_operand" "0,x")
2043 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2044 (minus:V4SF (match_dup 1) (match_dup 2))
2048 addsubps\t{%2, %0|%0, %2}
2049 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "isa" "noavx,avx")
2051 (set_attr "type" "sseadd")
2052 (set_attr "prefix" "orig,vex")
2053 (set_attr "prefix_rep" "1,*")
2054 (set_attr "mode" "V4SF")])
2056 (define_insn "avx_h<plusminus_insn>v4df3"
2057 [(set (match_operand:V4DF 0 "register_operand" "=x")
2062 (match_operand:V4DF 1 "register_operand" "x")
2063 (parallel [(const_int 0)]))
2064 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2067 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2068 (parallel [(const_int 0)]))
2069 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2072 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2073 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2075 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2076 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2078 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2079 [(set_attr "type" "sseadd")
2080 (set_attr "prefix" "vex")
2081 (set_attr "mode" "V4DF")])
2083 (define_expand "sse3_haddv2df3"
2084 [(set (match_operand:V2DF 0 "register_operand")
2088 (match_operand:V2DF 1 "register_operand")
2089 (parallel [(const_int 0)]))
2090 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2093 (match_operand:V2DF 2 "nonimmediate_operand")
2094 (parallel [(const_int 0)]))
2095 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2098 (define_insn "*sse3_haddv2df3"
2099 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2103 (match_operand:V2DF 1 "register_operand" "0,x")
2104 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2107 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2110 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2111 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2114 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2116 && INTVAL (operands[3]) != INTVAL (operands[4])
2117 && INTVAL (operands[5]) != INTVAL (operands[6])"
2119 haddpd\t{%2, %0|%0, %2}
2120 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2121 [(set_attr "isa" "noavx,avx")
2122 (set_attr "type" "sseadd")
2123 (set_attr "prefix" "orig,vex")
2124 (set_attr "mode" "V2DF")])
2126 (define_insn "sse3_hsubv2df3"
2127 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2131 (match_operand:V2DF 1 "register_operand" "0,x")
2132 (parallel [(const_int 0)]))
2133 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2136 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2137 (parallel [(const_int 0)]))
2138 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2141 hsubpd\t{%2, %0|%0, %2}
2142 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2143 [(set_attr "isa" "noavx,avx")
2144 (set_attr "type" "sseadd")
2145 (set_attr "prefix" "orig,vex")
2146 (set_attr "mode" "V2DF")])
2148 (define_insn "*sse3_haddv2df3_low"
2149 [(set (match_operand:DF 0 "register_operand" "=x,x")
2152 (match_operand:V2DF 1 "register_operand" "0,x")
2153 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2156 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2158 && INTVAL (operands[2]) != INTVAL (operands[3])"
2160 haddpd\t{%0, %0|%0, %0}
2161 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2162 [(set_attr "isa" "noavx,avx")
2163 (set_attr "type" "sseadd1")
2164 (set_attr "prefix" "orig,vex")
2165 (set_attr "mode" "V2DF")])
2167 (define_insn "*sse3_hsubv2df3_low"
2168 [(set (match_operand:DF 0 "register_operand" "=x,x")
2171 (match_operand:V2DF 1 "register_operand" "0,x")
2172 (parallel [(const_int 0)]))
2175 (parallel [(const_int 1)]))))]
2178 hsubpd\t{%0, %0|%0, %0}
2179 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2180 [(set_attr "isa" "noavx,avx")
2181 (set_attr "type" "sseadd1")
2182 (set_attr "prefix" "orig,vex")
2183 (set_attr "mode" "V2DF")])
2185 (define_insn "avx_h<plusminus_insn>v8sf3"
2186 [(set (match_operand:V8SF 0 "register_operand" "=x")
2192 (match_operand:V8SF 1 "register_operand" "x")
2193 (parallel [(const_int 0)]))
2194 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2196 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2197 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2201 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2202 (parallel [(const_int 0)]))
2203 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2205 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2206 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2210 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2211 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2213 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2214 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2217 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2218 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2220 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2221 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2223 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2224 [(set_attr "type" "sseadd")
2225 (set_attr "prefix" "vex")
2226 (set_attr "mode" "V8SF")])
2228 (define_insn "sse3_h<plusminus_insn>v4sf3"
2229 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2234 (match_operand:V4SF 1 "register_operand" "0,x")
2235 (parallel [(const_int 0)]))
2236 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2238 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2239 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2243 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2244 (parallel [(const_int 0)]))
2245 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2247 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2248 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2251 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2252 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "isa" "noavx,avx")
2254 (set_attr "type" "sseadd")
2255 (set_attr "atom_unit" "complex")
2256 (set_attr "prefix" "orig,vex")
2257 (set_attr "prefix_rep" "1,*")
2258 (set_attr "mode" "V4SF")])
2260 (define_expand "reduc_splus_v8df"
2261 [(match_operand:V8DF 0 "register_operand")
2262 (match_operand:V8DF 1 "register_operand")]
2265 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2269 (define_expand "reduc_splus_v4df"
2270 [(match_operand:V4DF 0 "register_operand")
2271 (match_operand:V4DF 1 "register_operand")]
2274 rtx tmp = gen_reg_rtx (V4DFmode);
2275 rtx tmp2 = gen_reg_rtx (V4DFmode);
2276 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2277 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2278 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2282 (define_expand "reduc_splus_v2df"
2283 [(match_operand:V2DF 0 "register_operand")
2284 (match_operand:V2DF 1 "register_operand")]
2287 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2291 (define_expand "reduc_splus_v16sf"
2292 [(match_operand:V16SF 0 "register_operand")
2293 (match_operand:V16SF 1 "register_operand")]
2296 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2300 (define_expand "reduc_splus_v8sf"
2301 [(match_operand:V8SF 0 "register_operand")
2302 (match_operand:V8SF 1 "register_operand")]
2305 rtx tmp = gen_reg_rtx (V8SFmode);
2306 rtx tmp2 = gen_reg_rtx (V8SFmode);
2307 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2308 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2309 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2310 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2314 (define_expand "reduc_splus_v4sf"
2315 [(match_operand:V4SF 0 "register_operand")
2316 (match_operand:V4SF 1 "register_operand")]
2321 rtx tmp = gen_reg_rtx (V4SFmode);
2322 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2323 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2326 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2330 ;; Modes handled by reduc_sm{in,ax}* patterns.
2331 (define_mode_iterator REDUC_SMINMAX_MODE
2332 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2333 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2334 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2335 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2336 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2337 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2338 (V8DF "TARGET_AVX512F")])
2340 (define_expand "reduc_<code>_<mode>"
2341 [(smaxmin:REDUC_SMINMAX_MODE
2342 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2343 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2346 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2350 (define_expand "reduc_<code>_<mode>"
2351 [(umaxmin:VI_AVX512BW
2352 (match_operand:VI_AVX512BW 0 "register_operand")
2353 (match_operand:VI_AVX512BW 1 "register_operand"))]
2356 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2360 (define_expand "reduc_<code>_<mode>"
2362 (match_operand:VI_256 0 "register_operand")
2363 (match_operand:VI_256 1 "register_operand"))]
2366 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2370 (define_expand "reduc_umin_v8hi"
2372 (match_operand:V8HI 0 "register_operand")
2373 (match_operand:V8HI 1 "register_operand"))]
2376 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2380 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2381 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2383 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2384 (match_operand:SI 2 "const_0_to_255_operand")]
2387 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2388 [(set_attr "type" "sse")
2389 (set_attr "prefix" "evex")
2390 (set_attr "mode" "<MODE>")])
2392 (define_insn "reduces<mode>"
2393 [(set (match_operand:VF_128 0 "register_operand" "=v")
2396 [(match_operand:VF_128 1 "register_operand" "v")
2397 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2398 (match_operand:SI 3 "const_0_to_255_operand")]
2403 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2404 [(set_attr "type" "sse")
2405 (set_attr "prefix" "evex")
2406 (set_attr "mode" "<MODE>")])
2408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2410 ;; Parallel floating point comparisons
2412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2414 (define_insn "avx_cmp<mode>3"
2415 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2417 [(match_operand:VF_128_256 1 "register_operand" "x")
2418 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2419 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2422 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2423 [(set_attr "type" "ssecmp")
2424 (set_attr "length_immediate" "1")
2425 (set_attr "prefix" "vex")
2426 (set_attr "mode" "<MODE>")])
2428 (define_insn "avx_vmcmp<mode>3"
2429 [(set (match_operand:VF_128 0 "register_operand" "=x")
2432 [(match_operand:VF_128 1 "register_operand" "x")
2433 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2439 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2440 [(set_attr "type" "ssecmp")
2441 (set_attr "length_immediate" "1")
2442 (set_attr "prefix" "vex")
2443 (set_attr "mode" "<ssescalarmode>")])
2445 (define_insn "*<sse>_maskcmp<mode>3_comm"
2446 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2447 (match_operator:VF_128_256 3 "sse_comparison_operator"
2448 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2449 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2451 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2453 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2454 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2455 [(set_attr "isa" "noavx,avx")
2456 (set_attr "type" "ssecmp")
2457 (set_attr "length_immediate" "1")
2458 (set_attr "prefix" "orig,vex")
2459 (set_attr "mode" "<MODE>")])
2461 (define_insn "<sse>_maskcmp<mode>3"
2462 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2463 (match_operator:VF_128_256 3 "sse_comparison_operator"
2464 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2465 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2468 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2469 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2470 [(set_attr "isa" "noavx,avx")
2471 (set_attr "type" "ssecmp")
2472 (set_attr "length_immediate" "1")
2473 (set_attr "prefix" "orig,vex")
2474 (set_attr "mode" "<MODE>")])
2476 (define_insn "<sse>_vmmaskcmp<mode>3"
2477 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2479 (match_operator:VF_128 3 "sse_comparison_operator"
2480 [(match_operand:VF_128 1 "register_operand" "0,x")
2481 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2486 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2487 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2488 [(set_attr "isa" "noavx,avx")
2489 (set_attr "type" "ssecmp")
2490 (set_attr "length_immediate" "1,*")
2491 (set_attr "prefix" "orig,vex")
2492 (set_attr "mode" "<ssescalarmode>")])
2494 (define_mode_attr cmp_imm_predicate
2495 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2496 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2497 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2498 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2499 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2500 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2501 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2502 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2503 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2505 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2506 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2507 (unspec:<avx512fmaskmode>
2508 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2509 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2510 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2512 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2513 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2514 [(set_attr "type" "ssecmp")
2515 (set_attr "length_immediate" "1")
2516 (set_attr "prefix" "evex")
2517 (set_attr "mode" "<sseinsnmode>")])
2519 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2520 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2521 (unspec:<avx512fmaskmode>
2522 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2523 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2524 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2527 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2528 [(set_attr "type" "ssecmp")
2529 (set_attr "length_immediate" "1")
2530 (set_attr "prefix" "evex")
2531 (set_attr "mode" "<sseinsnmode>")])
2533 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2534 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2535 (unspec:<avx512fmaskmode>
2536 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2537 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2538 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2539 UNSPEC_UNSIGNED_PCMP))]
2541 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2542 [(set_attr "type" "ssecmp")
2543 (set_attr "length_immediate" "1")
2544 (set_attr "prefix" "evex")
2545 (set_attr "mode" "<sseinsnmode>")])
2547 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2548 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2549 (unspec:<avx512fmaskmode>
2550 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2551 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2552 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2553 UNSPEC_UNSIGNED_PCMP))]
2555 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2556 [(set_attr "type" "ssecmp")
2557 (set_attr "length_immediate" "1")
2558 (set_attr "prefix" "evex")
2559 (set_attr "mode" "<sseinsnmode>")])
2561 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2562 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2563 (and:<avx512fmaskmode>
2564 (unspec:<avx512fmaskmode>
2565 [(match_operand:VF_128 1 "register_operand" "v")
2566 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2567 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2571 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2572 [(set_attr "type" "ssecmp")
2573 (set_attr "length_immediate" "1")
2574 (set_attr "prefix" "evex")
2575 (set_attr "mode" "<ssescalarmode>")])
2577 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2579 (and:<avx512fmaskmode>
2580 (unspec:<avx512fmaskmode>
2581 [(match_operand:VF_128 1 "register_operand" "v")
2582 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2583 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2585 (and:<avx512fmaskmode>
2586 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2589 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2590 [(set_attr "type" "ssecmp")
2591 (set_attr "length_immediate" "1")
2592 (set_attr "prefix" "evex")
2593 (set_attr "mode" "<ssescalarmode>")])
2595 (define_insn "avx512f_maskcmp<mode>3"
2596 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2597 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2598 [(match_operand:VF 1 "register_operand" "v")
2599 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2601 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2602 [(set_attr "type" "ssecmp")
2603 (set_attr "length_immediate" "1")
2604 (set_attr "prefix" "evex")
2605 (set_attr "mode" "<sseinsnmode>")])
2607 (define_insn "<sse>_comi<round_saeonly_name>"
2608 [(set (reg:CCFP FLAGS_REG)
2611 (match_operand:<ssevecmode> 0 "register_operand" "v")
2612 (parallel [(const_int 0)]))
2614 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2615 (parallel [(const_int 0)]))))]
2616 "SSE_FLOAT_MODE_P (<MODE>mode)"
2617 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2618 [(set_attr "type" "ssecomi")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "prefix_rep" "0")
2621 (set (attr "prefix_data16")
2622 (if_then_else (eq_attr "mode" "DF")
2624 (const_string "0")))
2625 (set_attr "mode" "<MODE>")])
2627 (define_insn "<sse>_ucomi<round_saeonly_name>"
2628 [(set (reg:CCFPU FLAGS_REG)
2631 (match_operand:<ssevecmode> 0 "register_operand" "v")
2632 (parallel [(const_int 0)]))
2634 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2635 (parallel [(const_int 0)]))))]
2636 "SSE_FLOAT_MODE_P (<MODE>mode)"
2637 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2638 [(set_attr "type" "ssecomi")
2639 (set_attr "prefix" "maybe_vex")
2640 (set_attr "prefix_rep" "0")
2641 (set (attr "prefix_data16")
2642 (if_then_else (eq_attr "mode" "DF")
2644 (const_string "0")))
2645 (set_attr "mode" "<MODE>")])
2647 (define_expand "vcond<V_512:mode><VF_512:mode>"
2648 [(set (match_operand:V_512 0 "register_operand")
2650 (match_operator 3 ""
2651 [(match_operand:VF_512 4 "nonimmediate_operand")
2652 (match_operand:VF_512 5 "nonimmediate_operand")])
2653 (match_operand:V_512 1 "general_operand")
2654 (match_operand:V_512 2 "general_operand")))]
2656 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2657 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2659 bool ok = ix86_expand_fp_vcond (operands);
2664 (define_expand "vcond<V_256:mode><VF_256:mode>"
2665 [(set (match_operand:V_256 0 "register_operand")
2667 (match_operator 3 ""
2668 [(match_operand:VF_256 4 "nonimmediate_operand")
2669 (match_operand:VF_256 5 "nonimmediate_operand")])
2670 (match_operand:V_256 1 "general_operand")
2671 (match_operand:V_256 2 "general_operand")))]
2673 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2674 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2676 bool ok = ix86_expand_fp_vcond (operands);
2681 (define_expand "vcond<V_128:mode><VF_128:mode>"
2682 [(set (match_operand:V_128 0 "register_operand")
2684 (match_operator 3 ""
2685 [(match_operand:VF_128 4 "nonimmediate_operand")
2686 (match_operand:VF_128 5 "nonimmediate_operand")])
2687 (match_operand:V_128 1 "general_operand")
2688 (match_operand:V_128 2 "general_operand")))]
2690 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2691 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2693 bool ok = ix86_expand_fp_vcond (operands);
2698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2700 ;; Parallel floating point logical operations
2702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2704 (define_insn "<sse>_andnot<mode>3<mask_name>"
2705 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2708 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2709 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2710 "TARGET_SSE && <mask_avx512vl_condition>"
2712 static char buf[128];
2716 switch (get_attr_mode (insn))
2723 suffix = "<ssemodesuffix>";
2726 switch (which_alternative)
2729 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2732 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2738 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2739 if (<mask_applied> && !TARGET_AVX512DQ)
2741 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2742 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2745 snprintf (buf, sizeof (buf), ops, suffix);
2748 [(set_attr "isa" "noavx,avx")
2749 (set_attr "type" "sselog")
2750 (set_attr "prefix" "orig,maybe_evex")
2752 (cond [(and (match_test "<MODE_SIZE> == 16")
2753 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2754 (const_string "<ssePSmode>")
2755 (match_test "TARGET_AVX")
2756 (const_string "<MODE>")
2757 (match_test "optimize_function_for_size_p (cfun)")
2758 (const_string "V4SF")
2760 (const_string "<MODE>")))])
2763 (define_insn "<sse>_andnot<mode>3<mask_name>"
2764 [(set (match_operand:VF_512 0 "register_operand" "=v")
2767 (match_operand:VF_512 1 "register_operand" "v"))
2768 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2771 static char buf[128];
2775 suffix = "<ssemodesuffix>";
2778 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2779 if (!TARGET_AVX512DQ)
2781 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2785 snprintf (buf, sizeof (buf),
2786 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2790 [(set_attr "type" "sselog")
2791 (set_attr "prefix" "evex")
2792 (set_attr "mode" "<sseinsnmode>")])
2794 (define_expand "<code><mode>3<mask_name>"
2795 [(set (match_operand:VF_128_256 0 "register_operand")
2796 (any_logic:VF_128_256
2797 (match_operand:VF_128_256 1 "nonimmediate_operand")
2798 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2799 "TARGET_SSE && <mask_avx512vl_condition>"
2800 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2802 (define_expand "<code><mode>3<mask_name>"
2803 [(set (match_operand:VF_512 0 "register_operand")
2805 (match_operand:VF_512 1 "nonimmediate_operand")
2806 (match_operand:VF_512 2 "nonimmediate_operand")))]
2808 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2810 (define_insn "*<code><mode>3<mask_name>"
2811 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2812 (any_logic:VF_128_256
2813 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2814 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2815 "TARGET_SSE && <mask_avx512vl_condition>
2816 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2818 static char buf[128];
2822 switch (get_attr_mode (insn))
2829 suffix = "<ssemodesuffix>";
2832 switch (which_alternative)
2835 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2838 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2844 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2845 if (<mask_applied> && !TARGET_AVX512DQ)
2847 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2848 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2851 snprintf (buf, sizeof (buf), ops, suffix);
2854 [(set_attr "isa" "noavx,avx")
2855 (set_attr "type" "sselog")
2856 (set_attr "prefix" "orig,maybe_evex")
2858 (cond [(and (match_test "<MODE_SIZE> == 16")
2859 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2860 (const_string "<ssePSmode>")
2861 (match_test "TARGET_AVX")
2862 (const_string "<MODE>")
2863 (match_test "optimize_function_for_size_p (cfun)")
2864 (const_string "V4SF")
2866 (const_string "<MODE>")))])
2868 (define_insn "*<code><mode>3<mask_name>"
2869 [(set (match_operand:VF_512 0 "register_operand" "=v")
2871 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2872 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2873 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2875 static char buf[128];
2879 suffix = "<ssemodesuffix>";
2882 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2883 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2885 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2889 snprintf (buf, sizeof (buf),
2890 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2894 [(set_attr "type" "sselog")
2895 (set_attr "prefix" "evex")
2896 (set_attr "mode" "<sseinsnmode>")])
2898 (define_expand "copysign<mode>3"
2901 (not:VF (match_dup 3))
2902 (match_operand:VF 1 "nonimmediate_operand")))
2904 (and:VF (match_dup 3)
2905 (match_operand:VF 2 "nonimmediate_operand")))
2906 (set (match_operand:VF 0 "register_operand")
2907 (ior:VF (match_dup 4) (match_dup 5)))]
2910 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2912 operands[4] = gen_reg_rtx (<MODE>mode);
2913 operands[5] = gen_reg_rtx (<MODE>mode);
2916 ;; Also define scalar versions. These are used for abs, neg, and
2917 ;; conditional move. Using subregs into vector modes causes register
2918 ;; allocation lossage. These patterns do not allow memory operands
2919 ;; because the native instructions read the full 128-bits.
2921 (define_insn "*andnot<mode>3"
2922 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2925 (match_operand:MODEF 1 "register_operand" "0,x"))
2926 (match_operand:MODEF 2 "register_operand" "x,x")))]
2927 "SSE_FLOAT_MODE_P (<MODE>mode)"
2929 static char buf[32];
2932 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2934 switch (which_alternative)
2937 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2940 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2946 snprintf (buf, sizeof (buf), ops, suffix);
2949 [(set_attr "isa" "noavx,avx")
2950 (set_attr "type" "sselog")
2951 (set_attr "prefix" "orig,vex")
2953 (cond [(and (match_test "<MODE_SIZE> == 16")
2954 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2955 (const_string "V4SF")
2956 (match_test "TARGET_AVX")
2957 (const_string "<ssevecmode>")
2958 (match_test "optimize_function_for_size_p (cfun)")
2959 (const_string "V4SF")
2961 (const_string "<ssevecmode>")))])
2963 (define_insn "*andnottf3"
2964 [(set (match_operand:TF 0 "register_operand" "=x,x")
2966 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2967 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2970 static char buf[32];
2973 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2975 switch (which_alternative)
2978 ops = "%s\t{%%2, %%0|%%0, %%2}";
2981 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2987 snprintf (buf, sizeof (buf), ops, tmp);
2990 [(set_attr "isa" "noavx,avx")
2991 (set_attr "type" "sselog")
2992 (set (attr "prefix_data16")
2994 (and (eq_attr "alternative" "0")
2995 (eq_attr "mode" "TI"))
2997 (const_string "*")))
2998 (set_attr "prefix" "orig,vex")
3000 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3001 (const_string "V4SF")
3002 (match_test "TARGET_AVX")
3004 (ior (not (match_test "TARGET_SSE2"))
3005 (match_test "optimize_function_for_size_p (cfun)"))
3006 (const_string "V4SF")
3008 (const_string "TI")))])
3010 (define_insn "*<code><mode>3"
3011 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3013 (match_operand:MODEF 1 "register_operand" "%0,x")
3014 (match_operand:MODEF 2 "register_operand" "x,x")))]
3015 "SSE_FLOAT_MODE_P (<MODE>mode)"
3017 static char buf[32];
3020 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3022 switch (which_alternative)
3025 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3028 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3034 snprintf (buf, sizeof (buf), ops, suffix);
3037 [(set_attr "isa" "noavx,avx")
3038 (set_attr "type" "sselog")
3039 (set_attr "prefix" "orig,vex")
3041 (cond [(and (match_test "<MODE_SIZE> == 16")
3042 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3043 (const_string "V4SF")
3044 (match_test "TARGET_AVX")
3045 (const_string "<ssevecmode>")
3046 (match_test "optimize_function_for_size_p (cfun)")
3047 (const_string "V4SF")
3049 (const_string "<ssevecmode>")))])
3051 (define_expand "<code>tf3"
3052 [(set (match_operand:TF 0 "register_operand")
3054 (match_operand:TF 1 "nonimmediate_operand")
3055 (match_operand:TF 2 "nonimmediate_operand")))]
3057 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3059 (define_insn "*<code>tf3"
3060 [(set (match_operand:TF 0 "register_operand" "=x,x")
3062 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3063 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3065 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3067 static char buf[32];
3070 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3072 switch (which_alternative)
3075 ops = "%s\t{%%2, %%0|%%0, %%2}";
3078 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3084 snprintf (buf, sizeof (buf), ops, tmp);
3087 [(set_attr "isa" "noavx,avx")
3088 (set_attr "type" "sselog")
3089 (set (attr "prefix_data16")
3091 (and (eq_attr "alternative" "0")
3092 (eq_attr "mode" "TI"))
3094 (const_string "*")))
3095 (set_attr "prefix" "orig,vex")
3097 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3098 (const_string "V4SF")
3099 (match_test "TARGET_AVX")
3101 (ior (not (match_test "TARGET_SSE2"))
3102 (match_test "optimize_function_for_size_p (cfun)"))
3103 (const_string "V4SF")
3105 (const_string "TI")))])
3107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3109 ;; FMA floating point multiply/accumulate instructions. These include
3110 ;; scalar versions of the instructions as well as vector versions.
3112 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3114 ;; The standard names for scalar FMA are only available with SSE math enabled.
3115 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3116 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3117 ;; and TARGET_FMA4 are both false.
3118 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3119 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3120 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3121 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3122 (define_mode_iterator FMAMODEM
3123 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3124 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3125 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3126 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3127 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3128 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3129 (V16SF "TARGET_AVX512F")
3130 (V8DF "TARGET_AVX512F")])
3132 (define_expand "fma<mode>4"
3133 [(set (match_operand:FMAMODEM 0 "register_operand")
3135 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3136 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3137 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3139 (define_expand "fms<mode>4"
3140 [(set (match_operand:FMAMODEM 0 "register_operand")
3142 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3143 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3144 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3146 (define_expand "fnma<mode>4"
3147 [(set (match_operand:FMAMODEM 0 "register_operand")
3149 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3150 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3151 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3153 (define_expand "fnms<mode>4"
3154 [(set (match_operand:FMAMODEM 0 "register_operand")
3156 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3157 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3158 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3160 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3161 (define_mode_iterator FMAMODE_AVX512
3162 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3163 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3164 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3165 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3166 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3167 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3168 (V16SF "TARGET_AVX512F")
3169 (V8DF "TARGET_AVX512F")])
3171 (define_mode_iterator FMAMODE
3172 [SF DF V4SF V2DF V8SF V4DF])
3174 (define_expand "fma4i_fmadd_<mode>"
3175 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3177 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3178 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3179 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3181 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3182 [(match_operand:VF_AVX512VL 0 "register_operand")
3183 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3184 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3185 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3186 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3187 "TARGET_AVX512F && <round_mode512bit_condition>"
3189 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3190 operands[0], operands[1], operands[2], operands[3],
3191 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3195 (define_insn "*fma_fmadd_<mode>"
3196 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3198 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3199 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3200 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3201 "TARGET_FMA || TARGET_FMA4"
3203 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3204 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3205 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3206 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3207 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3208 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3209 (set_attr "type" "ssemuladd")
3210 (set_attr "mode" "<MODE>")])
3212 ;; Suppose AVX-512F as baseline
3213 (define_mode_iterator VF_SF_AVX512VL
3214 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3215 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3217 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3218 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3220 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3221 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3222 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3223 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3225 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3226 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3227 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3228 [(set_attr "type" "ssemuladd")
3229 (set_attr "mode" "<MODE>")])
3231 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3232 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3233 (vec_merge:VF_AVX512VL
3235 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3236 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3237 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3239 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3240 "TARGET_AVX512F && <round_mode512bit_condition>"
3242 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3243 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3244 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3245 (set_attr "type" "ssemuladd")
3246 (set_attr "mode" "<MODE>")])
3248 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3249 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3250 (vec_merge:VF_AVX512VL
3252 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3253 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3254 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3256 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3258 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3259 [(set_attr "isa" "fma_avx512f")
3260 (set_attr "type" "ssemuladd")
3261 (set_attr "mode" "<MODE>")])
3263 (define_insn "*fma_fmsub_<mode>"
3264 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3266 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3267 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3269 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3270 "TARGET_FMA || TARGET_FMA4"
3272 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3273 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3274 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3275 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3276 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3277 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3278 (set_attr "type" "ssemuladd")
3279 (set_attr "mode" "<MODE>")])
3281 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3282 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3284 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3285 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3287 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3288 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3290 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3291 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3292 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3293 [(set_attr "type" "ssemuladd")
3294 (set_attr "mode" "<MODE>")])
3296 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3297 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3298 (vec_merge:VF_AVX512VL
3300 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3301 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3303 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3305 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3308 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3309 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3310 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3311 (set_attr "type" "ssemuladd")
3312 (set_attr "mode" "<MODE>")])
3314 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3315 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3316 (vec_merge:VF_AVX512VL
3318 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3319 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3321 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3323 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3324 "TARGET_AVX512F && <round_mode512bit_condition>"
3325 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3326 [(set_attr "isa" "fma_avx512f")
3327 (set_attr "type" "ssemuladd")
3328 (set_attr "mode" "<MODE>")])
3330 (define_insn "*fma_fnmadd_<mode>"
3331 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3334 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3335 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3336 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3337 "TARGET_FMA || TARGET_FMA4"
3339 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3340 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3341 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3342 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3343 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3344 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3345 (set_attr "type" "ssemuladd")
3346 (set_attr "mode" "<MODE>")])
3348 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3349 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3352 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3353 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3354 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3355 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3357 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3358 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3359 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3360 [(set_attr "type" "ssemuladd")
3361 (set_attr "mode" "<MODE>")])
3363 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3364 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3365 (vec_merge:VF_AVX512VL
3368 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3369 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3370 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3372 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3373 "TARGET_AVX512F && <round_mode512bit_condition>"
3375 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3376 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3377 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3378 (set_attr "type" "ssemuladd")
3379 (set_attr "mode" "<MODE>")])
3381 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3382 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3383 (vec_merge:VF_AVX512VL
3386 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3387 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3388 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3390 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3391 "TARGET_AVX512F && <round_mode512bit_condition>"
3392 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3393 [(set_attr "isa" "fma_avx512f")
3394 (set_attr "type" "ssemuladd")
3395 (set_attr "mode" "<MODE>")])
3397 (define_insn "*fma_fnmsub_<mode>"
3398 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3401 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3402 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3404 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3405 "TARGET_FMA || TARGET_FMA4"
3407 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3408 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3409 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3410 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3411 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3412 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3413 (set_attr "type" "ssemuladd")
3414 (set_attr "mode" "<MODE>")])
3416 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3417 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3420 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3421 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3423 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3424 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3426 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3427 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3428 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3429 [(set_attr "type" "ssemuladd")
3430 (set_attr "mode" "<MODE>")])
3432 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3433 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3434 (vec_merge:VF_AVX512VL
3437 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3438 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3440 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3442 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3443 "TARGET_AVX512F && <round_mode512bit_condition>"
3445 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3446 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3447 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3448 (set_attr "type" "ssemuladd")
3449 (set_attr "mode" "<MODE>")])
3451 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3452 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3453 (vec_merge:VF_AVX512VL
3456 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3457 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3459 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3461 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3463 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3464 [(set_attr "isa" "fma_avx512f")
3465 (set_attr "type" "ssemuladd")
3466 (set_attr "mode" "<MODE>")])
3468 ;; FMA parallel floating point multiply addsub and subadd operations.
3470 ;; It would be possible to represent these without the UNSPEC as
3473 ;; (fma op1 op2 op3)
3474 ;; (fma op1 op2 (neg op3))
3477 ;; But this doesn't seem useful in practice.
3479 (define_expand "fmaddsub_<mode>"
3480 [(set (match_operand:VF 0 "register_operand")
3482 [(match_operand:VF 1 "nonimmediate_operand")
3483 (match_operand:VF 2 "nonimmediate_operand")
3484 (match_operand:VF 3 "nonimmediate_operand")]
3486 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3488 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3489 [(match_operand:VF_AVX512VL 0 "register_operand")
3490 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3491 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3492 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3493 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3496 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3497 operands[0], operands[1], operands[2], operands[3],
3498 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3502 (define_insn "*fma_fmaddsub_<mode>"
3503 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3505 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3506 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3507 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3509 "TARGET_FMA || TARGET_FMA4"
3511 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3512 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3513 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3514 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3515 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3516 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3517 (set_attr "type" "ssemuladd")
3518 (set_attr "mode" "<MODE>")])
3520 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3521 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3522 (unspec:VF_SF_AVX512VL
3523 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3524 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3525 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3527 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3529 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3530 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3531 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3532 [(set_attr "type" "ssemuladd")
3533 (set_attr "mode" "<MODE>")])
3535 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3537 (vec_merge:VF_AVX512VL
3539 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3540 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3541 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3544 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3547 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3548 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3549 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3550 (set_attr "type" "ssemuladd")
3551 (set_attr "mode" "<MODE>")])
3553 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3554 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3555 (vec_merge:VF_AVX512VL
3557 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3558 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3559 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3562 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3564 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3565 [(set_attr "isa" "fma_avx512f")
3566 (set_attr "type" "ssemuladd")
3567 (set_attr "mode" "<MODE>")])
3569 (define_insn "*fma_fmsubadd_<mode>"
3570 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3572 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3573 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3575 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3577 "TARGET_FMA || TARGET_FMA4"
3579 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3580 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3581 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3582 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3583 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3584 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3585 (set_attr "type" "ssemuladd")
3586 (set_attr "mode" "<MODE>")])
3588 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3589 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3590 (unspec:VF_SF_AVX512VL
3591 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3592 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3594 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3596 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3598 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3599 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3600 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3601 [(set_attr "type" "ssemuladd")
3602 (set_attr "mode" "<MODE>")])
3604 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3605 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3606 (vec_merge:VF_AVX512VL
3608 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3609 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3611 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3614 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3617 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3618 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3619 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3620 (set_attr "type" "ssemuladd")
3621 (set_attr "mode" "<MODE>")])
3623 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3624 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3625 (vec_merge:VF_AVX512VL
3627 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3628 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3630 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3633 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3635 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3636 [(set_attr "isa" "fma_avx512f")
3637 (set_attr "type" "ssemuladd")
3638 (set_attr "mode" "<MODE>")])
3640 ;; FMA3 floating point scalar intrinsics. These merge result with
3641 ;; high-order elements from the destination register.
3643 (define_expand "fmai_vmfmadd_<mode><round_name>"
3644 [(set (match_operand:VF_128 0 "register_operand")
3647 (match_operand:VF_128 1 "<round_nimm_predicate>")
3648 (match_operand:VF_128 2 "<round_nimm_predicate>")
3649 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3654 (define_insn "*fmai_fmadd_<mode>"
3655 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3658 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3659 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3660 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3663 "TARGET_FMA || TARGET_AVX512F"
3665 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3666 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3667 [(set_attr "type" "ssemuladd")
3668 (set_attr "mode" "<MODE>")])
3670 (define_insn "*fmai_fmsub_<mode>"
3671 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3674 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3675 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3677 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3680 "TARGET_FMA || TARGET_AVX512F"
3682 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3683 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3684 [(set_attr "type" "ssemuladd")
3685 (set_attr "mode" "<MODE>")])
3687 (define_insn "*fmai_fnmadd_<mode><round_name>"
3688 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3692 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3693 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3694 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3697 "TARGET_FMA || TARGET_AVX512F"
3699 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3700 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3701 [(set_attr "type" "ssemuladd")
3702 (set_attr "mode" "<MODE>")])
3704 (define_insn "*fmai_fnmsub_<mode><round_name>"
3705 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3709 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3710 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3712 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3715 "TARGET_FMA || TARGET_AVX512F"
3717 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3718 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3719 [(set_attr "type" "ssemuladd")
3720 (set_attr "mode" "<MODE>")])
3722 ;; FMA4 floating point scalar intrinsics. These write the
3723 ;; entire destination register, with the high-order elements zeroed.
3725 (define_expand "fma4i_vmfmadd_<mode>"
3726 [(set (match_operand:VF_128 0 "register_operand")
3729 (match_operand:VF_128 1 "nonimmediate_operand")
3730 (match_operand:VF_128 2 "nonimmediate_operand")
3731 (match_operand:VF_128 3 "nonimmediate_operand"))
3735 "operands[4] = CONST0_RTX (<MODE>mode);")
3737 (define_insn "*fma4i_vmfmadd_<mode>"
3738 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3741 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3742 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3743 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3744 (match_operand:VF_128 4 "const0_operand")
3747 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3748 [(set_attr "type" "ssemuladd")
3749 (set_attr "mode" "<MODE>")])
3751 (define_insn "*fma4i_vmfmsub_<mode>"
3752 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3755 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3756 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3758 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3759 (match_operand:VF_128 4 "const0_operand")
3762 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3763 [(set_attr "type" "ssemuladd")
3764 (set_attr "mode" "<MODE>")])
3766 (define_insn "*fma4i_vmfnmadd_<mode>"
3767 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3771 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3772 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3773 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3774 (match_operand:VF_128 4 "const0_operand")
3777 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3778 [(set_attr "type" "ssemuladd")
3779 (set_attr "mode" "<MODE>")])
3781 (define_insn "*fma4i_vmfnmsub_<mode>"
3782 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3786 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3787 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3789 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3790 (match_operand:VF_128 4 "const0_operand")
3793 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3794 [(set_attr "type" "ssemuladd")
3795 (set_attr "mode" "<MODE>")])
3797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3799 ;; Parallel single-precision floating point conversion operations
3801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3803 (define_insn "sse_cvtpi2ps"
3804 [(set (match_operand:V4SF 0 "register_operand" "=x")
3807 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3808 (match_operand:V4SF 1 "register_operand" "0")
3811 "cvtpi2ps\t{%2, %0|%0, %2}"
3812 [(set_attr "type" "ssecvt")
3813 (set_attr "mode" "V4SF")])
3815 (define_insn "sse_cvtps2pi"
3816 [(set (match_operand:V2SI 0 "register_operand" "=y")
3818 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3820 (parallel [(const_int 0) (const_int 1)])))]
3822 "cvtps2pi\t{%1, %0|%0, %q1}"
3823 [(set_attr "type" "ssecvt")
3824 (set_attr "unit" "mmx")
3825 (set_attr "mode" "DI")])
3827 (define_insn "sse_cvttps2pi"
3828 [(set (match_operand:V2SI 0 "register_operand" "=y")
3830 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3831 (parallel [(const_int 0) (const_int 1)])))]
3833 "cvttps2pi\t{%1, %0|%0, %q1}"
3834 [(set_attr "type" "ssecvt")
3835 (set_attr "unit" "mmx")
3836 (set_attr "prefix_rep" "0")
3837 (set_attr "mode" "SF")])
3839 (define_insn "sse_cvtsi2ss<round_name>"
3840 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3843 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3844 (match_operand:V4SF 1 "register_operand" "0,0,v")
3848 cvtsi2ss\t{%2, %0|%0, %2}
3849 cvtsi2ss\t{%2, %0|%0, %2}
3850 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3851 [(set_attr "isa" "noavx,noavx,avx")
3852 (set_attr "type" "sseicvt")
3853 (set_attr "athlon_decode" "vector,double,*")
3854 (set_attr "amdfam10_decode" "vector,double,*")
3855 (set_attr "bdver1_decode" "double,direct,*")
3856 (set_attr "btver2_decode" "double,double,double")
3857 (set_attr "prefix" "orig,orig,maybe_evex")
3858 (set_attr "mode" "SF")])
3860 (define_insn "sse_cvtsi2ssq<round_name>"
3861 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3864 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3865 (match_operand:V4SF 1 "register_operand" "0,0,v")
3867 "TARGET_SSE && TARGET_64BIT"
3869 cvtsi2ssq\t{%2, %0|%0, %2}
3870 cvtsi2ssq\t{%2, %0|%0, %2}
3871 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3872 [(set_attr "isa" "noavx,noavx,avx")
3873 (set_attr "type" "sseicvt")
3874 (set_attr "athlon_decode" "vector,double,*")
3875 (set_attr "amdfam10_decode" "vector,double,*")
3876 (set_attr "bdver1_decode" "double,direct,*")
3877 (set_attr "btver2_decode" "double,double,double")
3878 (set_attr "length_vex" "*,*,4")
3879 (set_attr "prefix_rex" "1,1,*")
3880 (set_attr "prefix" "orig,orig,maybe_evex")
3881 (set_attr "mode" "SF")])
3883 (define_insn "sse_cvtss2si<round_name>"
3884 [(set (match_operand:SI 0 "register_operand" "=r,r")
3887 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3888 (parallel [(const_int 0)]))]
3889 UNSPEC_FIX_NOTRUNC))]
3891 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3892 [(set_attr "type" "sseicvt")
3893 (set_attr "athlon_decode" "double,vector")
3894 (set_attr "bdver1_decode" "double,double")
3895 (set_attr "prefix_rep" "1")
3896 (set_attr "prefix" "maybe_vex")
3897 (set_attr "mode" "SI")])
3899 (define_insn "sse_cvtss2si_2"
3900 [(set (match_operand:SI 0 "register_operand" "=r,r")
3901 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3902 UNSPEC_FIX_NOTRUNC))]
3904 "%vcvtss2si\t{%1, %0|%0, %k1}"
3905 [(set_attr "type" "sseicvt")
3906 (set_attr "athlon_decode" "double,vector")
3907 (set_attr "amdfam10_decode" "double,double")
3908 (set_attr "bdver1_decode" "double,double")
3909 (set_attr "prefix_rep" "1")
3910 (set_attr "prefix" "maybe_vex")
3911 (set_attr "mode" "SI")])
3913 (define_insn "sse_cvtss2siq<round_name>"
3914 [(set (match_operand:DI 0 "register_operand" "=r,r")
3917 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3918 (parallel [(const_int 0)]))]
3919 UNSPEC_FIX_NOTRUNC))]
3920 "TARGET_SSE && TARGET_64BIT"
3921 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3922 [(set_attr "type" "sseicvt")
3923 (set_attr "athlon_decode" "double,vector")
3924 (set_attr "bdver1_decode" "double,double")
3925 (set_attr "prefix_rep" "1")
3926 (set_attr "prefix" "maybe_vex")
3927 (set_attr "mode" "DI")])
3929 (define_insn "sse_cvtss2siq_2"
3930 [(set (match_operand:DI 0 "register_operand" "=r,r")
3931 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3932 UNSPEC_FIX_NOTRUNC))]
3933 "TARGET_SSE && TARGET_64BIT"
3934 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3935 [(set_attr "type" "sseicvt")
3936 (set_attr "athlon_decode" "double,vector")
3937 (set_attr "amdfam10_decode" "double,double")
3938 (set_attr "bdver1_decode" "double,double")
3939 (set_attr "prefix_rep" "1")
3940 (set_attr "prefix" "maybe_vex")
3941 (set_attr "mode" "DI")])
3943 (define_insn "sse_cvttss2si<round_saeonly_name>"
3944 [(set (match_operand:SI 0 "register_operand" "=r,r")
3947 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3948 (parallel [(const_int 0)]))))]
3950 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3951 [(set_attr "type" "sseicvt")
3952 (set_attr "athlon_decode" "double,vector")
3953 (set_attr "amdfam10_decode" "double,double")
3954 (set_attr "bdver1_decode" "double,double")
3955 (set_attr "prefix_rep" "1")
3956 (set_attr "prefix" "maybe_vex")
3957 (set_attr "mode" "SI")])
3959 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3960 [(set (match_operand:DI 0 "register_operand" "=r,r")
3963 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3964 (parallel [(const_int 0)]))))]
3965 "TARGET_SSE && TARGET_64BIT"
3966 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3967 [(set_attr "type" "sseicvt")
3968 (set_attr "athlon_decode" "double,vector")
3969 (set_attr "amdfam10_decode" "double,double")
3970 (set_attr "bdver1_decode" "double,double")
3971 (set_attr "prefix_rep" "1")
3972 (set_attr "prefix" "maybe_vex")
3973 (set_attr "mode" "DI")])
3975 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3976 [(set (match_operand:VF_128 0 "register_operand" "=v")
3978 (vec_duplicate:VF_128
3979 (unsigned_float:<ssescalarmode>
3980 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3981 (match_operand:VF_128 1 "register_operand" "v")
3983 "TARGET_AVX512F && <round_modev4sf_condition>"
3984 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3985 [(set_attr "type" "sseicvt")
3986 (set_attr "prefix" "evex")
3987 (set_attr "mode" "<ssescalarmode>")])
3989 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3990 [(set (match_operand:VF_128 0 "register_operand" "=v")
3992 (vec_duplicate:VF_128
3993 (unsigned_float:<ssescalarmode>
3994 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3995 (match_operand:VF_128 1 "register_operand" "v")
3997 "TARGET_AVX512F && TARGET_64BIT"
3998 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3999 [(set_attr "type" "sseicvt")
4000 (set_attr "prefix" "evex")
4001 (set_attr "mode" "<ssescalarmode>")])
4003 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4004 [(set (match_operand:VF1 0 "register_operand" "=v")
4006 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4007 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4008 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4009 [(set_attr "type" "ssecvt")
4010 (set_attr "prefix" "maybe_vex")
4011 (set_attr "mode" "<sseinsnmode>")])
4013 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4014 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4015 (unsigned_float:VF1_AVX512VL
4016 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4018 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4019 [(set_attr "type" "ssecvt")
4020 (set_attr "prefix" "evex")
4021 (set_attr "mode" "<MODE>")])
4023 (define_expand "floatuns<sseintvecmodelower><mode>2"
4024 [(match_operand:VF1 0 "register_operand")
4025 (match_operand:<sseintvecmode> 1 "register_operand")]
4026 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4028 if (<MODE>mode == V16SFmode)
4029 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4031 if (TARGET_AVX512VL)
4033 if (<MODE>mode == V4SFmode)
4034 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4036 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4039 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4045 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4046 (define_mode_attr sf2simodelower
4047 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4049 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4050 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4052 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4053 UNSPEC_FIX_NOTRUNC))]
4054 "TARGET_SSE2 && <mask_mode512bit_condition>"
4055 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4056 [(set_attr "type" "ssecvt")
4057 (set (attr "prefix_data16")
4059 (match_test "TARGET_AVX")
4061 (const_string "1")))
4062 (set_attr "prefix" "maybe_vex")
4063 (set_attr "mode" "<sseinsnmode>")])
4065 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4066 [(set (match_operand:V16SI 0 "register_operand" "=v")
4068 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4069 UNSPEC_FIX_NOTRUNC))]
4071 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4072 [(set_attr "type" "ssecvt")
4073 (set_attr "prefix" "evex")
4074 (set_attr "mode" "XI")])
4076 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4077 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4078 (unspec:VI4_AVX512VL
4079 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4080 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4082 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4083 [(set_attr "type" "ssecvt")
4084 (set_attr "prefix" "evex")
4085 (set_attr "mode" "<sseinsnmode>")])
4087 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4088 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4089 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4090 UNSPEC_FIX_NOTRUNC))]
4091 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4092 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4093 [(set_attr "type" "ssecvt")
4094 (set_attr "prefix" "evex")
4095 (set_attr "mode" "<sseinsnmode>")])
4097 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4098 [(set (match_operand:V2DI 0 "register_operand" "=v")
4101 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4102 (parallel [(const_int 0) (const_int 1)]))]
4103 UNSPEC_FIX_NOTRUNC))]
4104 "TARGET_AVX512DQ && TARGET_AVX512VL"
4105 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4106 [(set_attr "type" "ssecvt")
4107 (set_attr "prefix" "evex")
4108 (set_attr "mode" "TI")])
4110 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4111 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4112 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4113 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4114 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4115 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4116 [(set_attr "type" "ssecvt")
4117 (set_attr "prefix" "evex")
4118 (set_attr "mode" "<sseinsnmode>")])
4120 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4121 [(set (match_operand:V2DI 0 "register_operand" "=v")
4124 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4125 (parallel [(const_int 0) (const_int 1)]))]
4126 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4127 "TARGET_AVX512DQ && TARGET_AVX512VL"
4128 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4129 [(set_attr "type" "ssecvt")
4130 (set_attr "prefix" "evex")
4131 (set_attr "mode" "TI")])
4133 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4134 [(set (match_operand:V16SI 0 "register_operand" "=v")
4136 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4138 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4139 [(set_attr "type" "ssecvt")
4140 (set_attr "prefix" "evex")
4141 (set_attr "mode" "XI")])
4143 (define_insn "fix_truncv8sfv8si2<mask_name>"
4144 [(set (match_operand:V8SI 0 "register_operand" "=v")
4145 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4146 "TARGET_AVX && <mask_avx512vl_condition>"
4147 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4148 [(set_attr "type" "ssecvt")
4149 (set_attr "prefix" "<mask_prefix>")
4150 (set_attr "mode" "OI")])
4152 (define_insn "fix_truncv4sfv4si2<mask_name>"
4153 [(set (match_operand:V4SI 0 "register_operand" "=v")
4154 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4155 "TARGET_SSE2 && <mask_avx512vl_condition>"
4156 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4157 [(set_attr "type" "ssecvt")
4158 (set (attr "prefix_rep")
4160 (match_test "TARGET_AVX")
4162 (const_string "1")))
4163 (set (attr "prefix_data16")
4165 (match_test "TARGET_AVX")
4167 (const_string "0")))
4168 (set_attr "prefix_data16" "0")
4169 (set_attr "prefix" "<mask_prefix2>")
4170 (set_attr "mode" "TI")])
4172 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4173 [(match_operand:<sseintvecmode> 0 "register_operand")
4174 (match_operand:VF1 1 "register_operand")]
4177 if (<MODE>mode == V16SFmode)
4178 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4183 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4184 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4185 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4186 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4193 ;; Parallel double-precision floating point conversion operations
4195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4197 (define_insn "sse2_cvtpi2pd"
4198 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4199 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4201 "cvtpi2pd\t{%1, %0|%0, %1}"
4202 [(set_attr "type" "ssecvt")
4203 (set_attr "unit" "mmx,*")
4204 (set_attr "prefix_data16" "1,*")
4205 (set_attr "mode" "V2DF")])
4207 (define_insn "sse2_cvtpd2pi"
4208 [(set (match_operand:V2SI 0 "register_operand" "=y")
4209 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4210 UNSPEC_FIX_NOTRUNC))]
4212 "cvtpd2pi\t{%1, %0|%0, %1}"
4213 [(set_attr "type" "ssecvt")
4214 (set_attr "unit" "mmx")
4215 (set_attr "bdver1_decode" "double")
4216 (set_attr "btver2_decode" "direct")
4217 (set_attr "prefix_data16" "1")
4218 (set_attr "mode" "DI")])
4220 (define_insn "sse2_cvttpd2pi"
4221 [(set (match_operand:V2SI 0 "register_operand" "=y")
4222 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4224 "cvttpd2pi\t{%1, %0|%0, %1}"
4225 [(set_attr "type" "ssecvt")
4226 (set_attr "unit" "mmx")
4227 (set_attr "bdver1_decode" "double")
4228 (set_attr "prefix_data16" "1")
4229 (set_attr "mode" "TI")])
4231 (define_insn "sse2_cvtsi2sd"
4232 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4235 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4236 (match_operand:V2DF 1 "register_operand" "0,0,x")
4240 cvtsi2sd\t{%2, %0|%0, %2}
4241 cvtsi2sd\t{%2, %0|%0, %2}
4242 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4243 [(set_attr "isa" "noavx,noavx,avx")
4244 (set_attr "type" "sseicvt")
4245 (set_attr "athlon_decode" "double,direct,*")
4246 (set_attr "amdfam10_decode" "vector,double,*")
4247 (set_attr "bdver1_decode" "double,direct,*")
4248 (set_attr "btver2_decode" "double,double,double")
4249 (set_attr "prefix" "orig,orig,vex")
4250 (set_attr "mode" "DF")])
4252 (define_insn "sse2_cvtsi2sdq<round_name>"
4253 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4256 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4257 (match_operand:V2DF 1 "register_operand" "0,0,v")
4259 "TARGET_SSE2 && TARGET_64BIT"
4261 cvtsi2sdq\t{%2, %0|%0, %2}
4262 cvtsi2sdq\t{%2, %0|%0, %2}
4263 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4264 [(set_attr "isa" "noavx,noavx,avx")
4265 (set_attr "type" "sseicvt")
4266 (set_attr "athlon_decode" "double,direct,*")
4267 (set_attr "amdfam10_decode" "vector,double,*")
4268 (set_attr "bdver1_decode" "double,direct,*")
4269 (set_attr "length_vex" "*,*,4")
4270 (set_attr "prefix_rex" "1,1,*")
4271 (set_attr "prefix" "orig,orig,maybe_evex")
4272 (set_attr "mode" "DF")])
4274 (define_insn "avx512f_vcvtss2usi<round_name>"
4275 [(set (match_operand:SI 0 "register_operand" "=r")
4278 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4279 (parallel [(const_int 0)]))]
4280 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4282 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4283 [(set_attr "type" "sseicvt")
4284 (set_attr "prefix" "evex")
4285 (set_attr "mode" "SI")])
4287 (define_insn "avx512f_vcvtss2usiq<round_name>"
4288 [(set (match_operand:DI 0 "register_operand" "=r")
4291 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4292 (parallel [(const_int 0)]))]
4293 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4294 "TARGET_AVX512F && TARGET_64BIT"
4295 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4296 [(set_attr "type" "sseicvt")
4297 (set_attr "prefix" "evex")
4298 (set_attr "mode" "DI")])
4300 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4301 [(set (match_operand:SI 0 "register_operand" "=r")
4304 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4305 (parallel [(const_int 0)]))))]
4307 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4308 [(set_attr "type" "sseicvt")
4309 (set_attr "prefix" "evex")
4310 (set_attr "mode" "SI")])
4312 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4313 [(set (match_operand:DI 0 "register_operand" "=r")
4316 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4317 (parallel [(const_int 0)]))))]
4318 "TARGET_AVX512F && TARGET_64BIT"
4319 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4320 [(set_attr "type" "sseicvt")
4321 (set_attr "prefix" "evex")
4322 (set_attr "mode" "DI")])
4324 (define_insn "avx512f_vcvtsd2usi<round_name>"
4325 [(set (match_operand:SI 0 "register_operand" "=r")
4328 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4329 (parallel [(const_int 0)]))]
4330 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4332 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4333 [(set_attr "type" "sseicvt")
4334 (set_attr "prefix" "evex")
4335 (set_attr "mode" "SI")])
4337 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4338 [(set (match_operand:DI 0 "register_operand" "=r")
4341 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4342 (parallel [(const_int 0)]))]
4343 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4344 "TARGET_AVX512F && TARGET_64BIT"
4345 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4346 [(set_attr "type" "sseicvt")
4347 (set_attr "prefix" "evex")
4348 (set_attr "mode" "DI")])
4350 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4351 [(set (match_operand:SI 0 "register_operand" "=r")
4354 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4355 (parallel [(const_int 0)]))))]
4357 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4358 [(set_attr "type" "sseicvt")
4359 (set_attr "prefix" "evex")
4360 (set_attr "mode" "SI")])
4362 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4363 [(set (match_operand:DI 0 "register_operand" "=r")
4366 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4367 (parallel [(const_int 0)]))))]
4368 "TARGET_AVX512F && TARGET_64BIT"
4369 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4370 [(set_attr "type" "sseicvt")
4371 (set_attr "prefix" "evex")
4372 (set_attr "mode" "DI")])
4374 (define_insn "sse2_cvtsd2si<round_name>"
4375 [(set (match_operand:SI 0 "register_operand" "=r,r")
4378 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4379 (parallel [(const_int 0)]))]
4380 UNSPEC_FIX_NOTRUNC))]
4382 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4383 [(set_attr "type" "sseicvt")
4384 (set_attr "athlon_decode" "double,vector")
4385 (set_attr "bdver1_decode" "double,double")
4386 (set_attr "btver2_decode" "double,double")
4387 (set_attr "prefix_rep" "1")
4388 (set_attr "prefix" "maybe_vex")
4389 (set_attr "mode" "SI")])
4391 (define_insn "sse2_cvtsd2si_2"
4392 [(set (match_operand:SI 0 "register_operand" "=r,r")
4393 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4394 UNSPEC_FIX_NOTRUNC))]
4396 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4397 [(set_attr "type" "sseicvt")
4398 (set_attr "athlon_decode" "double,vector")
4399 (set_attr "amdfam10_decode" "double,double")
4400 (set_attr "bdver1_decode" "double,double")
4401 (set_attr "prefix_rep" "1")
4402 (set_attr "prefix" "maybe_vex")
4403 (set_attr "mode" "SI")])
4405 (define_insn "sse2_cvtsd2siq<round_name>"
4406 [(set (match_operand:DI 0 "register_operand" "=r,r")
4409 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4410 (parallel [(const_int 0)]))]
4411 UNSPEC_FIX_NOTRUNC))]
4412 "TARGET_SSE2 && TARGET_64BIT"
4413 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4414 [(set_attr "type" "sseicvt")
4415 (set_attr "athlon_decode" "double,vector")
4416 (set_attr "bdver1_decode" "double,double")
4417 (set_attr "prefix_rep" "1")
4418 (set_attr "prefix" "maybe_vex")
4419 (set_attr "mode" "DI")])
4421 (define_insn "sse2_cvtsd2siq_2"
4422 [(set (match_operand:DI 0 "register_operand" "=r,r")
4423 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4424 UNSPEC_FIX_NOTRUNC))]
4425 "TARGET_SSE2 && TARGET_64BIT"
4426 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4427 [(set_attr "type" "sseicvt")
4428 (set_attr "athlon_decode" "double,vector")
4429 (set_attr "amdfam10_decode" "double,double")
4430 (set_attr "bdver1_decode" "double,double")
4431 (set_attr "prefix_rep" "1")
4432 (set_attr "prefix" "maybe_vex")
4433 (set_attr "mode" "DI")])
4435 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4436 [(set (match_operand:SI 0 "register_operand" "=r,r")
4439 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4440 (parallel [(const_int 0)]))))]
4442 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4443 [(set_attr "type" "sseicvt")
4444 (set_attr "athlon_decode" "double,vector")
4445 (set_attr "amdfam10_decode" "double,double")
4446 (set_attr "bdver1_decode" "double,double")
4447 (set_attr "btver2_decode" "double,double")
4448 (set_attr "prefix_rep" "1")
4449 (set_attr "prefix" "maybe_vex")
4450 (set_attr "mode" "SI")])
4452 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4453 [(set (match_operand:DI 0 "register_operand" "=r,r")
4456 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4457 (parallel [(const_int 0)]))))]
4458 "TARGET_SSE2 && TARGET_64BIT"
4459 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4460 [(set_attr "type" "sseicvt")
4461 (set_attr "athlon_decode" "double,vector")
4462 (set_attr "amdfam10_decode" "double,double")
4463 (set_attr "bdver1_decode" "double,double")
4464 (set_attr "prefix_rep" "1")
4465 (set_attr "prefix" "maybe_vex")
4466 (set_attr "mode" "DI")])
4468 ;; For float<si2dfmode><mode>2 insn pattern
4469 (define_mode_attr si2dfmode
4470 [(V8DF "V8SI") (V4DF "V4SI")])
4471 (define_mode_attr si2dfmodelower
4472 [(V8DF "v8si") (V4DF "v4si")])
4474 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4475 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4476 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4477 "TARGET_AVX && <mask_mode512bit_condition>"
4478 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4479 [(set_attr "type" "ssecvt")
4480 (set_attr "prefix" "maybe_vex")
4481 (set_attr "mode" "<MODE>")])
4483 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4484 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4485 (any_float:VF2_AVX512VL
4486 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4488 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4489 [(set_attr "type" "ssecvt")
4490 (set_attr "prefix" "evex")
4491 (set_attr "mode" "<MODE>")])
4493 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4494 (define_mode_attr qq2pssuff
4495 [(V8SF "") (V4SF "{y}")])
4497 (define_mode_attr sselongvecmode
4498 [(V8SF "V8DI") (V4SF "V4DI")])
4500 (define_mode_attr sselongvecmodelower
4501 [(V8SF "v8di") (V4SF "v4di")])
4503 (define_mode_attr sseintvecmode3
4504 [(V8SF "XI") (V4SF "OI")
4505 (V8DF "OI") (V4DF "TI")])
4507 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4508 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4509 (any_float:VF1_128_256VL
4510 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4511 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4512 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4513 [(set_attr "type" "ssecvt")
4514 (set_attr "prefix" "evex")
4515 (set_attr "mode" "<MODE>")])
4517 (define_insn "*<floatsuffix>floatv2div2sf2"
4518 [(set (match_operand:V4SF 0 "register_operand" "=v")
4520 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4521 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4522 "TARGET_AVX512DQ && TARGET_AVX512VL"
4523 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4524 [(set_attr "type" "ssecvt")
4525 (set_attr "prefix" "evex")
4526 (set_attr "mode" "V4SF")])
4528 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4529 [(set (match_operand:V4SF 0 "register_operand" "=v")
4532 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4534 (match_operand:V4SF 2 "vector_move_operand" "0C")
4535 (parallel [(const_int 0) (const_int 1)]))
4536 (match_operand:QI 3 "register_operand" "Yk"))
4537 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4538 "TARGET_AVX512DQ && TARGET_AVX512VL"
4539 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4540 [(set_attr "type" "ssecvt")
4541 (set_attr "prefix" "evex")
4542 (set_attr "mode" "V4SF")])
4544 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4545 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4546 (unsigned_float:VF2_512_256VL
4547 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4549 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4550 [(set_attr "type" "ssecvt")
4551 (set_attr "prefix" "evex")
4552 (set_attr "mode" "<MODE>")])
4554 (define_insn "ufloatv2siv2df2<mask_name>"
4555 [(set (match_operand:V2DF 0 "register_operand" "=v")
4556 (unsigned_float:V2DF
4558 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4559 (parallel [(const_int 0) (const_int 1)]))))]
4561 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4562 [(set_attr "type" "ssecvt")
4563 (set_attr "prefix" "evex")
4564 (set_attr "mode" "V2DF")])
4566 (define_insn "avx512f_cvtdq2pd512_2"
4567 [(set (match_operand:V8DF 0 "register_operand" "=v")
4570 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4571 (parallel [(const_int 0) (const_int 1)
4572 (const_int 2) (const_int 3)
4573 (const_int 4) (const_int 5)
4574 (const_int 6) (const_int 7)]))))]
4576 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4577 [(set_attr "type" "ssecvt")
4578 (set_attr "prefix" "evex")
4579 (set_attr "mode" "V8DF")])
4581 (define_insn "avx_cvtdq2pd256_2"
4582 [(set (match_operand:V4DF 0 "register_operand" "=v")
4585 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4586 (parallel [(const_int 0) (const_int 1)
4587 (const_int 2) (const_int 3)]))))]
4589 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4590 [(set_attr "type" "ssecvt")
4591 (set_attr "prefix" "maybe_evex")
4592 (set_attr "mode" "V4DF")])
4594 (define_insn "sse2_cvtdq2pd<mask_name>"
4595 [(set (match_operand:V2DF 0 "register_operand" "=v")
4598 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4599 (parallel [(const_int 0) (const_int 1)]))))]
4600 "TARGET_SSE2 && <mask_avx512vl_condition>"
4601 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4602 [(set_attr "type" "ssecvt")
4603 (set_attr "prefix" "maybe_vex")
4604 (set_attr "ssememalign" "64")
4605 (set_attr "mode" "V2DF")])
4607 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4608 [(set (match_operand:V8SI 0 "register_operand" "=v")
4610 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4611 UNSPEC_FIX_NOTRUNC))]
4613 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4614 [(set_attr "type" "ssecvt")
4615 (set_attr "prefix" "evex")
4616 (set_attr "mode" "OI")])
4618 (define_insn "avx_cvtpd2dq256<mask_name>"
4619 [(set (match_operand:V4SI 0 "register_operand" "=v")
4620 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4621 UNSPEC_FIX_NOTRUNC))]
4622 "TARGET_AVX && <mask_avx512vl_condition>"
4623 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4624 [(set_attr "type" "ssecvt")
4625 (set_attr "prefix" "<mask_prefix>")
4626 (set_attr "mode" "OI")])
4628 (define_expand "avx_cvtpd2dq256_2"
4629 [(set (match_operand:V8SI 0 "register_operand")
4631 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4635 "operands[2] = CONST0_RTX (V4SImode);")
4637 (define_insn "*avx_cvtpd2dq256_2"
4638 [(set (match_operand:V8SI 0 "register_operand" "=x")
4640 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4642 (match_operand:V4SI 2 "const0_operand")))]
4644 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4645 [(set_attr "type" "ssecvt")
4646 (set_attr "prefix" "vex")
4647 (set_attr "btver2_decode" "vector")
4648 (set_attr "mode" "OI")])
4650 (define_insn "sse2_cvtpd2dq<mask_name>"
4651 [(set (match_operand:V4SI 0 "register_operand" "=v")
4653 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4655 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4656 "TARGET_SSE2 && <mask_avx512vl_condition>"
4659 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4661 return "cvtpd2dq\t{%1, %0|%0, %1}";
4663 [(set_attr "type" "ssecvt")
4664 (set_attr "prefix_rep" "1")
4665 (set_attr "prefix_data16" "0")
4666 (set_attr "prefix" "maybe_vex")
4667 (set_attr "mode" "TI")
4668 (set_attr "amdfam10_decode" "double")
4669 (set_attr "athlon_decode" "vector")
4670 (set_attr "bdver1_decode" "double")])
4672 ;; For ufix_notrunc* insn patterns
4673 (define_mode_attr pd2udqsuff
4674 [(V8DF "") (V4DF "{y}")])
4676 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4677 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4679 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4680 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4682 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4683 [(set_attr "type" "ssecvt")
4684 (set_attr "prefix" "evex")
4685 (set_attr "mode" "<sseinsnmode>")])
4687 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4688 [(set (match_operand:V4SI 0 "register_operand" "=v")
4691 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4692 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4693 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4695 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4696 [(set_attr "type" "ssecvt")
4697 (set_attr "prefix" "evex")
4698 (set_attr "mode" "TI")])
4700 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4701 [(set (match_operand:V8SI 0 "register_operand" "=v")
4703 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4705 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4706 [(set_attr "type" "ssecvt")
4707 (set_attr "prefix" "evex")
4708 (set_attr "mode" "OI")])
4710 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4711 [(set (match_operand:V4SI 0 "register_operand" "=v")
4713 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4714 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4716 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4717 [(set_attr "type" "ssecvt")
4718 (set_attr "prefix" "evex")
4719 (set_attr "mode" "TI")])
4721 (define_insn "fix_truncv4dfv4si2<mask_name>"
4722 [(set (match_operand:V4SI 0 "register_operand" "=v")
4723 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4724 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4725 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4726 [(set_attr "type" "ssecvt")
4727 (set_attr "prefix" "maybe_evex")
4728 (set_attr "mode" "OI")])
4730 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4731 [(set (match_operand:V4SI 0 "register_operand" "=v")
4732 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4733 "TARGET_AVX512VL && TARGET_AVX512F"
4734 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "maybe_evex")
4737 (set_attr "mode" "OI")])
4739 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4740 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4741 (any_fix:<sseintvecmode>
4742 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4743 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4744 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4745 [(set_attr "type" "ssecvt")
4746 (set_attr "prefix" "evex")
4747 (set_attr "mode" "<sseintvecmode2>")])
4749 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4750 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4751 (unspec:<sseintvecmode>
4752 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4753 UNSPEC_FIX_NOTRUNC))]
4754 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4755 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4756 [(set_attr "type" "ssecvt")
4757 (set_attr "prefix" "evex")
4758 (set_attr "mode" "<sseintvecmode2>")])
4760 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4761 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4762 (unspec:<sseintvecmode>
4763 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4764 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4765 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4766 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4767 [(set_attr "type" "ssecvt")
4768 (set_attr "prefix" "evex")
4769 (set_attr "mode" "<sseintvecmode2>")])
4771 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4772 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4773 (any_fix:<sselongvecmode>
4774 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4775 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4776 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4777 [(set_attr "type" "ssecvt")
4778 (set_attr "prefix" "evex")
4779 (set_attr "mode" "<sseintvecmode3>")])
4781 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4782 [(set (match_operand:V2DI 0 "register_operand" "=v")
4785 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4786 (parallel [(const_int 0) (const_int 1)]))))]
4787 "TARGET_AVX512DQ && TARGET_AVX512VL"
4788 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4789 [(set_attr "type" "ssecvt")
4790 (set_attr "prefix" "evex")
4791 (set_attr "mode" "TI")])
4793 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4794 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4795 (unsigned_fix:<sseintvecmode>
4796 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4798 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4799 [(set_attr "type" "ssecvt")
4800 (set_attr "prefix" "evex")
4801 (set_attr "mode" "<sseintvecmode2>")])
4803 (define_expand "avx_cvttpd2dq256_2"
4804 [(set (match_operand:V8SI 0 "register_operand")
4806 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4809 "operands[2] = CONST0_RTX (V4SImode);")
4811 (define_insn "sse2_cvttpd2dq<mask_name>"
4812 [(set (match_operand:V4SI 0 "register_operand" "=v")
4814 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4815 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4816 "TARGET_SSE2 && <mask_avx512vl_condition>"
4819 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4821 return "cvttpd2dq\t{%1, %0|%0, %1}";
4823 [(set_attr "type" "ssecvt")
4824 (set_attr "amdfam10_decode" "double")
4825 (set_attr "athlon_decode" "vector")
4826 (set_attr "bdver1_decode" "double")
4827 (set_attr "prefix" "maybe_vex")
4828 (set_attr "mode" "TI")])
4830 (define_insn "sse2_cvtsd2ss<round_name>"
4831 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4834 (float_truncate:V2SF
4835 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4836 (match_operand:V4SF 1 "register_operand" "0,0,v")
4840 cvtsd2ss\t{%2, %0|%0, %2}
4841 cvtsd2ss\t{%2, %0|%0, %q2}
4842 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4843 [(set_attr "isa" "noavx,noavx,avx")
4844 (set_attr "type" "ssecvt")
4845 (set_attr "athlon_decode" "vector,double,*")
4846 (set_attr "amdfam10_decode" "vector,double,*")
4847 (set_attr "bdver1_decode" "direct,direct,*")
4848 (set_attr "btver2_decode" "double,double,double")
4849 (set_attr "prefix" "orig,orig,<round_prefix>")
4850 (set_attr "mode" "SF")])
4852 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4853 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4857 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4858 (parallel [(const_int 0) (const_int 1)])))
4859 (match_operand:V2DF 1 "register_operand" "0,0,v")
4863 cvtss2sd\t{%2, %0|%0, %2}
4864 cvtss2sd\t{%2, %0|%0, %k2}
4865 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4866 [(set_attr "isa" "noavx,noavx,avx")
4867 (set_attr "type" "ssecvt")
4868 (set_attr "amdfam10_decode" "vector,double,*")
4869 (set_attr "athlon_decode" "direct,direct,*")
4870 (set_attr "bdver1_decode" "direct,direct,*")
4871 (set_attr "btver2_decode" "double,double,double")
4872 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4873 (set_attr "mode" "DF")])
4875 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4876 [(set (match_operand:V8SF 0 "register_operand" "=v")
4877 (float_truncate:V8SF
4878 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4880 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4881 [(set_attr "type" "ssecvt")
4882 (set_attr "prefix" "evex")
4883 (set_attr "mode" "V8SF")])
4885 (define_insn "avx_cvtpd2ps256<mask_name>"
4886 [(set (match_operand:V4SF 0 "register_operand" "=v")
4887 (float_truncate:V4SF
4888 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4889 "TARGET_AVX && <mask_avx512vl_condition>"
4890 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4891 [(set_attr "type" "ssecvt")
4892 (set_attr "prefix" "maybe_evex")
4893 (set_attr "btver2_decode" "vector")
4894 (set_attr "mode" "V4SF")])
4896 (define_expand "sse2_cvtpd2ps"
4897 [(set (match_operand:V4SF 0 "register_operand")
4899 (float_truncate:V2SF
4900 (match_operand:V2DF 1 "nonimmediate_operand"))
4903 "operands[2] = CONST0_RTX (V2SFmode);")
4905 (define_expand "sse2_cvtpd2ps_mask"
4906 [(set (match_operand:V4SF 0 "register_operand")
4909 (float_truncate:V2SF
4910 (match_operand:V2DF 1 "nonimmediate_operand"))
4912 (match_operand:V4SF 2 "register_operand")
4913 (match_operand:QI 3 "register_operand")))]
4915 "operands[4] = CONST0_RTX (V2SFmode);")
4917 (define_insn "*sse2_cvtpd2ps<mask_name>"
4918 [(set (match_operand:V4SF 0 "register_operand" "=v")
4920 (float_truncate:V2SF
4921 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4922 (match_operand:V2SF 2 "const0_operand")))]
4923 "TARGET_SSE2 && <mask_avx512vl_condition>"
4926 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4928 return "cvtpd2ps\t{%1, %0|%0, %1}";
4930 [(set_attr "type" "ssecvt")
4931 (set_attr "amdfam10_decode" "double")
4932 (set_attr "athlon_decode" "vector")
4933 (set_attr "bdver1_decode" "double")
4934 (set_attr "prefix_data16" "1")
4935 (set_attr "prefix" "maybe_vex")
4936 (set_attr "mode" "V4SF")])
4938 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4939 (define_mode_attr sf2dfmode
4940 [(V8DF "V8SF") (V4DF "V4SF")])
4942 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4943 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4944 (float_extend:VF2_512_256
4945 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4946 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4947 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4948 [(set_attr "type" "ssecvt")
4949 (set_attr "prefix" "maybe_vex")
4950 (set_attr "mode" "<MODE>")])
4952 (define_insn "*avx_cvtps2pd256_2"
4953 [(set (match_operand:V4DF 0 "register_operand" "=x")
4956 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4957 (parallel [(const_int 0) (const_int 1)
4958 (const_int 2) (const_int 3)]))))]
4960 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4961 [(set_attr "type" "ssecvt")
4962 (set_attr "prefix" "vex")
4963 (set_attr "mode" "V4DF")])
4965 (define_insn "vec_unpacks_lo_v16sf"
4966 [(set (match_operand:V8DF 0 "register_operand" "=v")
4969 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4970 (parallel [(const_int 0) (const_int 1)
4971 (const_int 2) (const_int 3)
4972 (const_int 4) (const_int 5)
4973 (const_int 6) (const_int 7)]))))]
4975 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4976 [(set_attr "type" "ssecvt")
4977 (set_attr "prefix" "evex")
4978 (set_attr "mode" "V8DF")])
4980 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4981 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4982 (unspec:<avx512fmaskmode>
4983 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
4984 UNSPEC_CVTINT2MASK))]
4986 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4987 [(set_attr "prefix" "evex")
4988 (set_attr "mode" "<sseinsnmode>")])
4990 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4991 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4992 (unspec:<avx512fmaskmode>
4993 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
4994 UNSPEC_CVTINT2MASK))]
4996 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4997 [(set_attr "prefix" "evex")
4998 (set_attr "mode" "<sseinsnmode>")])
5000 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5001 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5002 (vec_merge:VI12_AVX512VL
5005 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5008 operands[2] = CONSTM1_RTX (<MODE>mode);
5009 operands[3] = CONST0_RTX (<MODE>mode);
5012 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5013 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5014 (vec_merge:VI12_AVX512VL
5015 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5016 (match_operand:VI12_AVX512VL 3 "const0_operand")
5017 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5019 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5020 [(set_attr "prefix" "evex")
5021 (set_attr "mode" "<sseinsnmode>")])
5023 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5024 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5025 (vec_merge:VI48_AVX512VL
5028 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5031 operands[2] = CONSTM1_RTX (<MODE>mode);
5032 operands[3] = CONST0_RTX (<MODE>mode);
5035 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5036 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5037 (vec_merge:VI48_AVX512VL
5038 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5039 (match_operand:VI48_AVX512VL 3 "const0_operand")
5040 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5042 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5043 [(set_attr "prefix" "evex")
5044 (set_attr "mode" "<sseinsnmode>")])
5046 (define_insn "sse2_cvtps2pd<mask_name>"
5047 [(set (match_operand:V2DF 0 "register_operand" "=v")
5050 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5051 (parallel [(const_int 0) (const_int 1)]))))]
5052 "TARGET_SSE2 && <mask_avx512vl_condition>"
5053 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5054 [(set_attr "type" "ssecvt")
5055 (set_attr "amdfam10_decode" "direct")
5056 (set_attr "athlon_decode" "double")
5057 (set_attr "bdver1_decode" "double")
5058 (set_attr "prefix_data16" "0")
5059 (set_attr "prefix" "maybe_vex")
5060 (set_attr "mode" "V2DF")])
5062 (define_expand "vec_unpacks_hi_v4sf"
5067 (match_operand:V4SF 1 "nonimmediate_operand"))
5068 (parallel [(const_int 6) (const_int 7)
5069 (const_int 2) (const_int 3)])))
5070 (set (match_operand:V2DF 0 "register_operand")
5074 (parallel [(const_int 0) (const_int 1)]))))]
5076 "operands[2] = gen_reg_rtx (V4SFmode);")
5078 (define_expand "vec_unpacks_hi_v8sf"
5081 (match_operand:V8SF 1 "register_operand")
5082 (parallel [(const_int 4) (const_int 5)
5083 (const_int 6) (const_int 7)])))
5084 (set (match_operand:V4DF 0 "register_operand")
5088 "operands[2] = gen_reg_rtx (V4SFmode);")
5090 (define_expand "vec_unpacks_hi_v16sf"
5093 (match_operand:V16SF 1 "register_operand")
5094 (parallel [(const_int 8) (const_int 9)
5095 (const_int 10) (const_int 11)
5096 (const_int 12) (const_int 13)
5097 (const_int 14) (const_int 15)])))
5098 (set (match_operand:V8DF 0 "register_operand")
5102 "operands[2] = gen_reg_rtx (V8SFmode);")
5104 (define_expand "vec_unpacks_lo_v4sf"
5105 [(set (match_operand:V2DF 0 "register_operand")
5108 (match_operand:V4SF 1 "nonimmediate_operand")
5109 (parallel [(const_int 0) (const_int 1)]))))]
5112 (define_expand "vec_unpacks_lo_v8sf"
5113 [(set (match_operand:V4DF 0 "register_operand")
5116 (match_operand:V8SF 1 "nonimmediate_operand")
5117 (parallel [(const_int 0) (const_int 1)
5118 (const_int 2) (const_int 3)]))))]
5121 (define_mode_attr sseunpackfltmode
5122 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5123 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5125 (define_expand "vec_unpacks_float_hi_<mode>"
5126 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5127 (match_operand:VI2_AVX512F 1 "register_operand")]
5130 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5132 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5133 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5134 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5138 (define_expand "vec_unpacks_float_lo_<mode>"
5139 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5140 (match_operand:VI2_AVX512F 1 "register_operand")]
5143 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5145 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5146 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5147 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5151 (define_expand "vec_unpacku_float_hi_<mode>"
5152 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5153 (match_operand:VI2_AVX512F 1 "register_operand")]
5156 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5158 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5159 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5160 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5164 (define_expand "vec_unpacku_float_lo_<mode>"
5165 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5166 (match_operand:VI2_AVX512F 1 "register_operand")]
5169 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5171 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5172 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5173 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5177 (define_expand "vec_unpacks_float_hi_v4si"
5180 (match_operand:V4SI 1 "nonimmediate_operand")
5181 (parallel [(const_int 2) (const_int 3)
5182 (const_int 2) (const_int 3)])))
5183 (set (match_operand:V2DF 0 "register_operand")
5187 (parallel [(const_int 0) (const_int 1)]))))]
5189 "operands[2] = gen_reg_rtx (V4SImode);")
5191 (define_expand "vec_unpacks_float_lo_v4si"
5192 [(set (match_operand:V2DF 0 "register_operand")
5195 (match_operand:V4SI 1 "nonimmediate_operand")
5196 (parallel [(const_int 0) (const_int 1)]))))]
5199 (define_expand "vec_unpacks_float_hi_v8si"
5202 (match_operand:V8SI 1 "nonimmediate_operand")
5203 (parallel [(const_int 4) (const_int 5)
5204 (const_int 6) (const_int 7)])))
5205 (set (match_operand:V4DF 0 "register_operand")
5209 "operands[2] = gen_reg_rtx (V4SImode);")
5211 (define_expand "vec_unpacks_float_lo_v8si"
5212 [(set (match_operand:V4DF 0 "register_operand")
5215 (match_operand:V8SI 1 "nonimmediate_operand")
5216 (parallel [(const_int 0) (const_int 1)
5217 (const_int 2) (const_int 3)]))))]
5220 (define_expand "vec_unpacks_float_hi_v16si"
5223 (match_operand:V16SI 1 "nonimmediate_operand")
5224 (parallel [(const_int 8) (const_int 9)
5225 (const_int 10) (const_int 11)
5226 (const_int 12) (const_int 13)
5227 (const_int 14) (const_int 15)])))
5228 (set (match_operand:V8DF 0 "register_operand")
5232 "operands[2] = gen_reg_rtx (V8SImode);")
5234 (define_expand "vec_unpacks_float_lo_v16si"
5235 [(set (match_operand:V8DF 0 "register_operand")
5238 (match_operand:V16SI 1 "nonimmediate_operand")
5239 (parallel [(const_int 0) (const_int 1)
5240 (const_int 2) (const_int 3)
5241 (const_int 4) (const_int 5)
5242 (const_int 6) (const_int 7)]))))]
5245 (define_expand "vec_unpacku_float_hi_v4si"
5248 (match_operand:V4SI 1 "nonimmediate_operand")
5249 (parallel [(const_int 2) (const_int 3)
5250 (const_int 2) (const_int 3)])))
5255 (parallel [(const_int 0) (const_int 1)]))))
5257 (lt:V2DF (match_dup 6) (match_dup 3)))
5259 (and:V2DF (match_dup 7) (match_dup 4)))
5260 (set (match_operand:V2DF 0 "register_operand")
5261 (plus:V2DF (match_dup 6) (match_dup 8)))]
5264 REAL_VALUE_TYPE TWO32r;
5268 real_ldexp (&TWO32r, &dconst1, 32);
5269 x = const_double_from_real_value (TWO32r, DFmode);
5271 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5272 operands[4] = force_reg (V2DFmode,
5273 ix86_build_const_vector (V2DFmode, 1, x));
5275 operands[5] = gen_reg_rtx (V4SImode);
5277 for (i = 6; i < 9; i++)
5278 operands[i] = gen_reg_rtx (V2DFmode);
5281 (define_expand "vec_unpacku_float_lo_v4si"
5285 (match_operand:V4SI 1 "nonimmediate_operand")
5286 (parallel [(const_int 0) (const_int 1)]))))
5288 (lt:V2DF (match_dup 5) (match_dup 3)))
5290 (and:V2DF (match_dup 6) (match_dup 4)))
5291 (set (match_operand:V2DF 0 "register_operand")
5292 (plus:V2DF (match_dup 5) (match_dup 7)))]
5295 REAL_VALUE_TYPE TWO32r;
5299 real_ldexp (&TWO32r, &dconst1, 32);
5300 x = const_double_from_real_value (TWO32r, DFmode);
5302 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5303 operands[4] = force_reg (V2DFmode,
5304 ix86_build_const_vector (V2DFmode, 1, x));
5306 for (i = 5; i < 8; i++)
5307 operands[i] = gen_reg_rtx (V2DFmode);
5310 (define_expand "vec_unpacku_float_hi_v8si"
5311 [(match_operand:V4DF 0 "register_operand")
5312 (match_operand:V8SI 1 "register_operand")]
5315 REAL_VALUE_TYPE TWO32r;
5319 real_ldexp (&TWO32r, &dconst1, 32);
5320 x = const_double_from_real_value (TWO32r, DFmode);
5322 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5323 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5324 tmp[5] = gen_reg_rtx (V4SImode);
5326 for (i = 2; i < 5; i++)
5327 tmp[i] = gen_reg_rtx (V4DFmode);
5328 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5329 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5330 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5331 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5332 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5333 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5337 (define_expand "vec_unpacku_float_hi_v16si"
5338 [(match_operand:V8DF 0 "register_operand")
5339 (match_operand:V16SI 1 "register_operand")]
5342 REAL_VALUE_TYPE TWO32r;
5345 real_ldexp (&TWO32r, &dconst1, 32);
5346 x = const_double_from_real_value (TWO32r, DFmode);
5348 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5349 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5350 tmp[2] = gen_reg_rtx (V8DFmode);
5351 tmp[3] = gen_reg_rtx (V8SImode);
5352 k = gen_reg_rtx (QImode);
5354 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5355 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5356 emit_insn (gen_rtx_SET (VOIDmode, k,
5357 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5358 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5359 emit_move_insn (operands[0], tmp[2]);
5363 (define_expand "vec_unpacku_float_lo_v8si"
5364 [(match_operand:V4DF 0 "register_operand")
5365 (match_operand:V8SI 1 "nonimmediate_operand")]
5368 REAL_VALUE_TYPE TWO32r;
5372 real_ldexp (&TWO32r, &dconst1, 32);
5373 x = const_double_from_real_value (TWO32r, DFmode);
5375 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5376 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5378 for (i = 2; i < 5; i++)
5379 tmp[i] = gen_reg_rtx (V4DFmode);
5380 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5381 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5382 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5383 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5384 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5388 (define_expand "vec_unpacku_float_lo_v16si"
5389 [(match_operand:V8DF 0 "register_operand")
5390 (match_operand:V16SI 1 "nonimmediate_operand")]
5393 REAL_VALUE_TYPE TWO32r;
5396 real_ldexp (&TWO32r, &dconst1, 32);
5397 x = const_double_from_real_value (TWO32r, DFmode);
5399 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5400 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5401 tmp[2] = gen_reg_rtx (V8DFmode);
5402 k = gen_reg_rtx (QImode);
5404 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5405 emit_insn (gen_rtx_SET (VOIDmode, k,
5406 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5407 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5408 emit_move_insn (operands[0], tmp[2]);
5412 (define_expand "vec_pack_trunc_<mode>"
5414 (float_truncate:<sf2dfmode>
5415 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5417 (float_truncate:<sf2dfmode>
5418 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5419 (set (match_operand:<ssePSmode> 0 "register_operand")
5420 (vec_concat:<ssePSmode>
5425 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5426 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5429 (define_expand "vec_pack_trunc_v2df"
5430 [(match_operand:V4SF 0 "register_operand")
5431 (match_operand:V2DF 1 "nonimmediate_operand")
5432 (match_operand:V2DF 2 "nonimmediate_operand")]
5437 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5439 tmp0 = gen_reg_rtx (V4DFmode);
5440 tmp1 = force_reg (V2DFmode, operands[1]);
5442 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5443 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5447 tmp0 = gen_reg_rtx (V4SFmode);
5448 tmp1 = gen_reg_rtx (V4SFmode);
5450 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5451 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5452 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5457 (define_expand "vec_pack_sfix_trunc_v8df"
5458 [(match_operand:V16SI 0 "register_operand")
5459 (match_operand:V8DF 1 "nonimmediate_operand")
5460 (match_operand:V8DF 2 "nonimmediate_operand")]
5465 r1 = gen_reg_rtx (V8SImode);
5466 r2 = gen_reg_rtx (V8SImode);
5468 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5469 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5470 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5474 (define_expand "vec_pack_sfix_trunc_v4df"
5475 [(match_operand:V8SI 0 "register_operand")
5476 (match_operand:V4DF 1 "nonimmediate_operand")
5477 (match_operand:V4DF 2 "nonimmediate_operand")]
5482 r1 = gen_reg_rtx (V4SImode);
5483 r2 = gen_reg_rtx (V4SImode);
5485 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5486 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5487 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5491 (define_expand "vec_pack_sfix_trunc_v2df"
5492 [(match_operand:V4SI 0 "register_operand")
5493 (match_operand:V2DF 1 "nonimmediate_operand")
5494 (match_operand:V2DF 2 "nonimmediate_operand")]
5497 rtx tmp0, tmp1, tmp2;
5499 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5501 tmp0 = gen_reg_rtx (V4DFmode);
5502 tmp1 = force_reg (V2DFmode, operands[1]);
5504 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5505 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5509 tmp0 = gen_reg_rtx (V4SImode);
5510 tmp1 = gen_reg_rtx (V4SImode);
5511 tmp2 = gen_reg_rtx (V2DImode);
5513 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5514 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5515 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5516 gen_lowpart (V2DImode, tmp0),
5517 gen_lowpart (V2DImode, tmp1)));
5518 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5523 (define_mode_attr ssepackfltmode
5524 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5526 (define_expand "vec_pack_ufix_trunc_<mode>"
5527 [(match_operand:<ssepackfltmode> 0 "register_operand")
5528 (match_operand:VF2 1 "register_operand")
5529 (match_operand:VF2 2 "register_operand")]
5532 if (<MODE>mode == V8DFmode)
5536 r1 = gen_reg_rtx (V8SImode);
5537 r2 = gen_reg_rtx (V8SImode);
5539 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5540 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5541 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5546 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5547 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5548 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5549 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5550 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5552 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5553 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5557 tmp[5] = gen_reg_rtx (V8SFmode);
5558 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5559 gen_lowpart (V8SFmode, tmp[3]), 0);
5560 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5562 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5563 operands[0], 0, OPTAB_DIRECT);
5564 if (tmp[6] != operands[0])
5565 emit_move_insn (operands[0], tmp[6]);
5571 (define_expand "vec_pack_sfix_v4df"
5572 [(match_operand:V8SI 0 "register_operand")
5573 (match_operand:V4DF 1 "nonimmediate_operand")
5574 (match_operand:V4DF 2 "nonimmediate_operand")]
5579 r1 = gen_reg_rtx (V4SImode);
5580 r2 = gen_reg_rtx (V4SImode);
5582 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5583 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5584 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5588 (define_expand "vec_pack_sfix_v2df"
5589 [(match_operand:V4SI 0 "register_operand")
5590 (match_operand:V2DF 1 "nonimmediate_operand")
5591 (match_operand:V2DF 2 "nonimmediate_operand")]
5594 rtx tmp0, tmp1, tmp2;
5596 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5598 tmp0 = gen_reg_rtx (V4DFmode);
5599 tmp1 = force_reg (V2DFmode, operands[1]);
5601 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5602 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5606 tmp0 = gen_reg_rtx (V4SImode);
5607 tmp1 = gen_reg_rtx (V4SImode);
5608 tmp2 = gen_reg_rtx (V2DImode);
5610 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5611 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5612 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5613 gen_lowpart (V2DImode, tmp0),
5614 gen_lowpart (V2DImode, tmp1)));
5615 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5622 ;; Parallel single-precision floating point element swizzling
5624 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5626 (define_expand "sse_movhlps_exp"
5627 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5630 (match_operand:V4SF 1 "nonimmediate_operand")
5631 (match_operand:V4SF 2 "nonimmediate_operand"))
5632 (parallel [(const_int 6)
5638 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5640 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5642 /* Fix up the destination if needed. */
5643 if (dst != operands[0])
5644 emit_move_insn (operands[0], dst);
5649 (define_insn "sse_movhlps"
5650 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5653 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5654 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5655 (parallel [(const_int 6)
5659 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5661 movhlps\t{%2, %0|%0, %2}
5662 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5663 movlps\t{%H2, %0|%0, %H2}
5664 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5665 %vmovhps\t{%2, %0|%q0, %2}"
5666 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5667 (set_attr "type" "ssemov")
5668 (set_attr "ssememalign" "64")
5669 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5670 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5672 (define_expand "sse_movlhps_exp"
5673 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5676 (match_operand:V4SF 1 "nonimmediate_operand")
5677 (match_operand:V4SF 2 "nonimmediate_operand"))
5678 (parallel [(const_int 0)
5684 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5686 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5688 /* Fix up the destination if needed. */
5689 if (dst != operands[0])
5690 emit_move_insn (operands[0], dst);
5695 (define_insn "sse_movlhps"
5696 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5699 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5700 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5701 (parallel [(const_int 0)
5705 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5707 movlhps\t{%2, %0|%0, %2}
5708 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5709 movhps\t{%2, %0|%0, %q2}
5710 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5711 %vmovlps\t{%2, %H0|%H0, %2}"
5712 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5713 (set_attr "type" "ssemov")
5714 (set_attr "ssememalign" "64")
5715 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5716 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5718 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5719 [(set (match_operand:V16SF 0 "register_operand" "=v")
5722 (match_operand:V16SF 1 "register_operand" "v")
5723 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5724 (parallel [(const_int 2) (const_int 18)
5725 (const_int 3) (const_int 19)
5726 (const_int 6) (const_int 22)
5727 (const_int 7) (const_int 23)
5728 (const_int 10) (const_int 26)
5729 (const_int 11) (const_int 27)
5730 (const_int 14) (const_int 30)
5731 (const_int 15) (const_int 31)])))]
5733 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5734 [(set_attr "type" "sselog")
5735 (set_attr "prefix" "evex")
5736 (set_attr "mode" "V16SF")])
5738 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5739 (define_insn "avx_unpckhps256<mask_name>"
5740 [(set (match_operand:V8SF 0 "register_operand" "=v")
5743 (match_operand:V8SF 1 "register_operand" "v")
5744 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5745 (parallel [(const_int 2) (const_int 10)
5746 (const_int 3) (const_int 11)
5747 (const_int 6) (const_int 14)
5748 (const_int 7) (const_int 15)])))]
5749 "TARGET_AVX && <mask_avx512vl_condition>"
5750 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5751 [(set_attr "type" "sselog")
5752 (set_attr "prefix" "vex")
5753 (set_attr "mode" "V8SF")])
5755 (define_expand "vec_interleave_highv8sf"
5759 (match_operand:V8SF 1 "register_operand" "x")
5760 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5761 (parallel [(const_int 0) (const_int 8)
5762 (const_int 1) (const_int 9)
5763 (const_int 4) (const_int 12)
5764 (const_int 5) (const_int 13)])))
5770 (parallel [(const_int 2) (const_int 10)
5771 (const_int 3) (const_int 11)
5772 (const_int 6) (const_int 14)
5773 (const_int 7) (const_int 15)])))
5774 (set (match_operand:V8SF 0 "register_operand")
5779 (parallel [(const_int 4) (const_int 5)
5780 (const_int 6) (const_int 7)
5781 (const_int 12) (const_int 13)
5782 (const_int 14) (const_int 15)])))]
5785 operands[3] = gen_reg_rtx (V8SFmode);
5786 operands[4] = gen_reg_rtx (V8SFmode);
5789 (define_insn "vec_interleave_highv4sf<mask_name>"
5790 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5793 (match_operand:V4SF 1 "register_operand" "0,v")
5794 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5795 (parallel [(const_int 2) (const_int 6)
5796 (const_int 3) (const_int 7)])))]
5797 "TARGET_SSE && <mask_avx512vl_condition>"
5799 unpckhps\t{%2, %0|%0, %2}
5800 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5801 [(set_attr "isa" "noavx,avx")
5802 (set_attr "type" "sselog")
5803 (set_attr "prefix" "orig,vex")
5804 (set_attr "mode" "V4SF")])
5806 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5807 [(set (match_operand:V16SF 0 "register_operand" "=v")
5810 (match_operand:V16SF 1 "register_operand" "v")
5811 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5812 (parallel [(const_int 0) (const_int 16)
5813 (const_int 1) (const_int 17)
5814 (const_int 4) (const_int 20)
5815 (const_int 5) (const_int 21)
5816 (const_int 8) (const_int 24)
5817 (const_int 9) (const_int 25)
5818 (const_int 12) (const_int 28)
5819 (const_int 13) (const_int 29)])))]
5821 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5822 [(set_attr "type" "sselog")
5823 (set_attr "prefix" "evex")
5824 (set_attr "mode" "V16SF")])
5826 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5827 (define_insn "avx_unpcklps256<mask_name>"
5828 [(set (match_operand:V8SF 0 "register_operand" "=v")
5831 (match_operand:V8SF 1 "register_operand" "v")
5832 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5833 (parallel [(const_int 0) (const_int 8)
5834 (const_int 1) (const_int 9)
5835 (const_int 4) (const_int 12)
5836 (const_int 5) (const_int 13)])))]
5837 "TARGET_AVX && <mask_avx512vl_condition>"
5838 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5839 [(set_attr "type" "sselog")
5840 (set_attr "prefix" "vex")
5841 (set_attr "mode" "V8SF")])
5843 (define_insn "unpcklps128_mask"
5844 [(set (match_operand:V4SF 0 "register_operand" "=v")
5848 (match_operand:V4SF 1 "register_operand" "v")
5849 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5850 (parallel [(const_int 0) (const_int 4)
5851 (const_int 1) (const_int 5)]))
5852 (match_operand:V4SF 3 "vector_move_operand" "0C")
5853 (match_operand:QI 4 "register_operand" "Yk")))]
5855 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5856 [(set_attr "type" "sselog")
5857 (set_attr "prefix" "evex")
5858 (set_attr "mode" "V4SF")])
5860 (define_expand "vec_interleave_lowv8sf"
5864 (match_operand:V8SF 1 "register_operand" "x")
5865 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5866 (parallel [(const_int 0) (const_int 8)
5867 (const_int 1) (const_int 9)
5868 (const_int 4) (const_int 12)
5869 (const_int 5) (const_int 13)])))
5875 (parallel [(const_int 2) (const_int 10)
5876 (const_int 3) (const_int 11)
5877 (const_int 6) (const_int 14)
5878 (const_int 7) (const_int 15)])))
5879 (set (match_operand:V8SF 0 "register_operand")
5884 (parallel [(const_int 0) (const_int 1)
5885 (const_int 2) (const_int 3)
5886 (const_int 8) (const_int 9)
5887 (const_int 10) (const_int 11)])))]
5890 operands[3] = gen_reg_rtx (V8SFmode);
5891 operands[4] = gen_reg_rtx (V8SFmode);
5894 (define_insn "vec_interleave_lowv4sf"
5895 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5898 (match_operand:V4SF 1 "register_operand" "0,x")
5899 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5900 (parallel [(const_int 0) (const_int 4)
5901 (const_int 1) (const_int 5)])))]
5904 unpcklps\t{%2, %0|%0, %2}
5905 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5906 [(set_attr "isa" "noavx,avx")
5907 (set_attr "type" "sselog")
5908 (set_attr "prefix" "orig,vex")
5909 (set_attr "mode" "V4SF")])
5911 ;; These are modeled with the same vec_concat as the others so that we
5912 ;; capture users of shufps that can use the new instructions
5913 (define_insn "avx_movshdup256<mask_name>"
5914 [(set (match_operand:V8SF 0 "register_operand" "=v")
5917 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5919 (parallel [(const_int 1) (const_int 1)
5920 (const_int 3) (const_int 3)
5921 (const_int 5) (const_int 5)
5922 (const_int 7) (const_int 7)])))]
5923 "TARGET_AVX && <mask_avx512vl_condition>"
5924 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5925 [(set_attr "type" "sse")
5926 (set_attr "prefix" "vex")
5927 (set_attr "mode" "V8SF")])
5929 (define_insn "sse3_movshdup<mask_name>"
5930 [(set (match_operand:V4SF 0 "register_operand" "=v")
5933 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5935 (parallel [(const_int 1)
5939 "TARGET_SSE3 && <mask_avx512vl_condition>"
5940 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5941 [(set_attr "type" "sse")
5942 (set_attr "prefix_rep" "1")
5943 (set_attr "prefix" "maybe_vex")
5944 (set_attr "mode" "V4SF")])
5946 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5947 [(set (match_operand:V16SF 0 "register_operand" "=v")
5950 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5952 (parallel [(const_int 1) (const_int 1)
5953 (const_int 3) (const_int 3)
5954 (const_int 5) (const_int 5)
5955 (const_int 7) (const_int 7)
5956 (const_int 9) (const_int 9)
5957 (const_int 11) (const_int 11)
5958 (const_int 13) (const_int 13)
5959 (const_int 15) (const_int 15)])))]
5961 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5962 [(set_attr "type" "sse")
5963 (set_attr "prefix" "evex")
5964 (set_attr "mode" "V16SF")])
5966 (define_insn "avx_movsldup256<mask_name>"
5967 [(set (match_operand:V8SF 0 "register_operand" "=v")
5970 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5972 (parallel [(const_int 0) (const_int 0)
5973 (const_int 2) (const_int 2)
5974 (const_int 4) (const_int 4)
5975 (const_int 6) (const_int 6)])))]
5976 "TARGET_AVX && <mask_avx512vl_condition>"
5977 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5978 [(set_attr "type" "sse")
5979 (set_attr "prefix" "vex")
5980 (set_attr "mode" "V8SF")])
5982 (define_insn "sse3_movsldup<mask_name>"
5983 [(set (match_operand:V4SF 0 "register_operand" "=v")
5986 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5988 (parallel [(const_int 0)
5992 "TARGET_SSE3 && <mask_avx512vl_condition>"
5993 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5994 [(set_attr "type" "sse")
5995 (set_attr "prefix_rep" "1")
5996 (set_attr "prefix" "maybe_vex")
5997 (set_attr "mode" "V4SF")])
5999 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6000 [(set (match_operand:V16SF 0 "register_operand" "=v")
6003 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6005 (parallel [(const_int 0) (const_int 0)
6006 (const_int 2) (const_int 2)
6007 (const_int 4) (const_int 4)
6008 (const_int 6) (const_int 6)
6009 (const_int 8) (const_int 8)
6010 (const_int 10) (const_int 10)
6011 (const_int 12) (const_int 12)
6012 (const_int 14) (const_int 14)])))]
6014 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6015 [(set_attr "type" "sse")
6016 (set_attr "prefix" "evex")
6017 (set_attr "mode" "V16SF")])
6019 (define_expand "avx_shufps256<mask_expand4_name>"
6020 [(match_operand:V8SF 0 "register_operand")
6021 (match_operand:V8SF 1 "register_operand")
6022 (match_operand:V8SF 2 "nonimmediate_operand")
6023 (match_operand:SI 3 "const_int_operand")]
6026 int mask = INTVAL (operands[3]);
6027 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6030 GEN_INT ((mask >> 0) & 3),
6031 GEN_INT ((mask >> 2) & 3),
6032 GEN_INT (((mask >> 4) & 3) + 8),
6033 GEN_INT (((mask >> 6) & 3) + 8),
6034 GEN_INT (((mask >> 0) & 3) + 4),
6035 GEN_INT (((mask >> 2) & 3) + 4),
6036 GEN_INT (((mask >> 4) & 3) + 12),
6037 GEN_INT (((mask >> 6) & 3) + 12)
6038 <mask_expand4_args>));
6042 ;; One bit in mask selects 2 elements.
6043 (define_insn "avx_shufps256_1<mask_name>"
6044 [(set (match_operand:V8SF 0 "register_operand" "=v")
6047 (match_operand:V8SF 1 "register_operand" "v")
6048 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6049 (parallel [(match_operand 3 "const_0_to_3_operand" )
6050 (match_operand 4 "const_0_to_3_operand" )
6051 (match_operand 5 "const_8_to_11_operand" )
6052 (match_operand 6 "const_8_to_11_operand" )
6053 (match_operand 7 "const_4_to_7_operand" )
6054 (match_operand 8 "const_4_to_7_operand" )
6055 (match_operand 9 "const_12_to_15_operand")
6056 (match_operand 10 "const_12_to_15_operand")])))]
6058 && <mask_avx512vl_condition>
6059 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6060 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6061 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6062 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6065 mask = INTVAL (operands[3]);
6066 mask |= INTVAL (operands[4]) << 2;
6067 mask |= (INTVAL (operands[5]) - 8) << 4;
6068 mask |= (INTVAL (operands[6]) - 8) << 6;
6069 operands[3] = GEN_INT (mask);
6071 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6073 [(set_attr "type" "sseshuf")
6074 (set_attr "length_immediate" "1")
6075 (set_attr "prefix" "<mask_prefix>")
6076 (set_attr "mode" "V8SF")])
6078 (define_expand "sse_shufps<mask_expand4_name>"
6079 [(match_operand:V4SF 0 "register_operand")
6080 (match_operand:V4SF 1 "register_operand")
6081 (match_operand:V4SF 2 "nonimmediate_operand")
6082 (match_operand:SI 3 "const_int_operand")]
6085 int mask = INTVAL (operands[3]);
6086 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6089 GEN_INT ((mask >> 0) & 3),
6090 GEN_INT ((mask >> 2) & 3),
6091 GEN_INT (((mask >> 4) & 3) + 4),
6092 GEN_INT (((mask >> 6) & 3) + 4)
6093 <mask_expand4_args>));
6097 (define_insn "sse_shufps_v4sf_mask"
6098 [(set (match_operand:V4SF 0 "register_operand" "=v")
6102 (match_operand:V4SF 1 "register_operand" "v")
6103 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6104 (parallel [(match_operand 3 "const_0_to_3_operand")
6105 (match_operand 4 "const_0_to_3_operand")
6106 (match_operand 5 "const_4_to_7_operand")
6107 (match_operand 6 "const_4_to_7_operand")]))
6108 (match_operand:V4SF 7 "vector_move_operand" "0C")
6109 (match_operand:QI 8 "register_operand" "Yk")))]
6113 mask |= INTVAL (operands[3]) << 0;
6114 mask |= INTVAL (operands[4]) << 2;
6115 mask |= (INTVAL (operands[5]) - 4) << 4;
6116 mask |= (INTVAL (operands[6]) - 4) << 6;
6117 operands[3] = GEN_INT (mask);
6119 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6121 [(set_attr "type" "sseshuf")
6122 (set_attr "length_immediate" "1")
6123 (set_attr "prefix" "evex")
6124 (set_attr "mode" "V4SF")])
6126 (define_insn "sse_shufps_<mode>"
6127 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6128 (vec_select:VI4F_128
6129 (vec_concat:<ssedoublevecmode>
6130 (match_operand:VI4F_128 1 "register_operand" "0,x")
6131 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6132 (parallel [(match_operand 3 "const_0_to_3_operand")
6133 (match_operand 4 "const_0_to_3_operand")
6134 (match_operand 5 "const_4_to_7_operand")
6135 (match_operand 6 "const_4_to_7_operand")])))]
6139 mask |= INTVAL (operands[3]) << 0;
6140 mask |= INTVAL (operands[4]) << 2;
6141 mask |= (INTVAL (operands[5]) - 4) << 4;
6142 mask |= (INTVAL (operands[6]) - 4) << 6;
6143 operands[3] = GEN_INT (mask);
6145 switch (which_alternative)
6148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6150 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6155 [(set_attr "isa" "noavx,avx")
6156 (set_attr "type" "sseshuf")
6157 (set_attr "length_immediate" "1")
6158 (set_attr "prefix" "orig,vex")
6159 (set_attr "mode" "V4SF")])
6161 (define_insn "sse_storehps"
6162 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6164 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6165 (parallel [(const_int 2) (const_int 3)])))]
6168 %vmovhps\t{%1, %0|%q0, %1}
6169 %vmovhlps\t{%1, %d0|%d0, %1}
6170 %vmovlps\t{%H1, %d0|%d0, %H1}"
6171 [(set_attr "type" "ssemov")
6172 (set_attr "ssememalign" "64")
6173 (set_attr "prefix" "maybe_vex")
6174 (set_attr "mode" "V2SF,V4SF,V2SF")])
6176 (define_expand "sse_loadhps_exp"
6177 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6180 (match_operand:V4SF 1 "nonimmediate_operand")
6181 (parallel [(const_int 0) (const_int 1)]))
6182 (match_operand:V2SF 2 "nonimmediate_operand")))]
6185 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6187 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6189 /* Fix up the destination if needed. */
6190 if (dst != operands[0])
6191 emit_move_insn (operands[0], dst);
6196 (define_insn "sse_loadhps"
6197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6200 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6201 (parallel [(const_int 0) (const_int 1)]))
6202 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6205 movhps\t{%2, %0|%0, %q2}
6206 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6207 movlhps\t{%2, %0|%0, %2}
6208 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6209 %vmovlps\t{%2, %H0|%H0, %2}"
6210 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6211 (set_attr "type" "ssemov")
6212 (set_attr "ssememalign" "64")
6213 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6214 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6216 (define_insn "sse_storelps"
6217 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6219 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6220 (parallel [(const_int 0) (const_int 1)])))]
6223 %vmovlps\t{%1, %0|%q0, %1}
6224 %vmovaps\t{%1, %0|%0, %1}
6225 %vmovlps\t{%1, %d0|%d0, %q1}"
6226 [(set_attr "type" "ssemov")
6227 (set_attr "prefix" "maybe_vex")
6228 (set_attr "mode" "V2SF,V4SF,V2SF")])
6230 (define_expand "sse_loadlps_exp"
6231 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6233 (match_operand:V2SF 2 "nonimmediate_operand")
6235 (match_operand:V4SF 1 "nonimmediate_operand")
6236 (parallel [(const_int 2) (const_int 3)]))))]
6239 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6241 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6243 /* Fix up the destination if needed. */
6244 if (dst != operands[0])
6245 emit_move_insn (operands[0], dst);
6250 (define_insn "sse_loadlps"
6251 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6253 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6255 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6256 (parallel [(const_int 2) (const_int 3)]))))]
6259 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6260 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6261 movlps\t{%2, %0|%0, %q2}
6262 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6263 %vmovlps\t{%2, %0|%q0, %2}"
6264 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6265 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6266 (set_attr "ssememalign" "64")
6267 (set_attr "length_immediate" "1,1,*,*,*")
6268 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6269 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6271 (define_insn "sse_movss"
6272 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6274 (match_operand:V4SF 2 "register_operand" " x,x")
6275 (match_operand:V4SF 1 "register_operand" " 0,x")
6279 movss\t{%2, %0|%0, %2}
6280 vmovss\t{%2, %1, %0|%0, %1, %2}"
6281 [(set_attr "isa" "noavx,avx")
6282 (set_attr "type" "ssemov")
6283 (set_attr "prefix" "orig,vex")
6284 (set_attr "mode" "SF")])
6286 (define_insn "avx2_vec_dup<mode>"
6287 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6288 (vec_duplicate:VF1_128_256
6290 (match_operand:V4SF 1 "register_operand" "x")
6291 (parallel [(const_int 0)]))))]
6293 "vbroadcastss\t{%1, %0|%0, %1}"
6294 [(set_attr "type" "sselog1")
6295 (set_attr "prefix" "vex")
6296 (set_attr "mode" "<MODE>")])
6298 (define_insn "avx2_vec_dupv8sf_1"
6299 [(set (match_operand:V8SF 0 "register_operand" "=x")
6302 (match_operand:V8SF 1 "register_operand" "x")
6303 (parallel [(const_int 0)]))))]
6305 "vbroadcastss\t{%x1, %0|%0, %x1}"
6306 [(set_attr "type" "sselog1")
6307 (set_attr "prefix" "vex")
6308 (set_attr "mode" "V8SF")])
6310 (define_insn "avx512f_vec_dup<mode>_1"
6311 [(set (match_operand:VF_512 0 "register_operand" "=v")
6312 (vec_duplicate:VF_512
6313 (vec_select:<ssescalarmode>
6314 (match_operand:VF_512 1 "register_operand" "v")
6315 (parallel [(const_int 0)]))))]
6317 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6318 [(set_attr "type" "sselog1")
6319 (set_attr "prefix" "evex")
6320 (set_attr "mode" "<MODE>")])
6322 ;; Although insertps takes register source, we prefer
6323 ;; unpcklps with register source since it is shorter.
6324 (define_insn "*vec_concatv2sf_sse4_1"
6325 [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6327 (match_operand:SF 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,m, 0 , m")
6328 (match_operand:SF 2 "vector_move_operand" " Yr,*x,x, m,m, m,C,*ym, C")))]
6331 unpcklps\t{%2, %0|%0, %2}
6332 unpcklps\t{%2, %0|%0, %2}
6333 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6334 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6335 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6336 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6337 %vmovss\t{%1, %0|%0, %1}
6338 punpckldq\t{%2, %0|%0, %2}
6339 movd\t{%1, %0|%0, %1}"
6340 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6341 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6342 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6343 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6344 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6345 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6346 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6348 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6349 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6350 ;; alternatives pretty much forces the MMX alternative to be chosen.
6351 (define_insn "*vec_concatv2sf_sse"
6352 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6354 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6355 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6358 unpcklps\t{%2, %0|%0, %2}
6359 movss\t{%1, %0|%0, %1}
6360 punpckldq\t{%2, %0|%0, %2}
6361 movd\t{%1, %0|%0, %1}"
6362 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6363 (set_attr "mode" "V4SF,SF,DI,DI")])
6365 (define_insn "*vec_concatv4sf"
6366 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6368 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6369 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6372 movlhps\t{%2, %0|%0, %2}
6373 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6374 movhps\t{%2, %0|%0, %q2}
6375 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6376 [(set_attr "isa" "noavx,avx,noavx,avx")
6377 (set_attr "type" "ssemov")
6378 (set_attr "prefix" "orig,vex,orig,vex")
6379 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6381 (define_expand "vec_init<mode>"
6382 [(match_operand:V_128 0 "register_operand")
6386 ix86_expand_vector_init (false, operands[0], operands[1]);
6390 ;; Avoid combining registers from different units in a single alternative,
6391 ;; see comment above inline_secondary_memory_needed function in i386.c
6392 (define_insn "vec_set<mode>_0"
6393 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6394 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6396 (vec_duplicate:VI4F_128
6397 (match_operand:<ssescalarmode> 2 "general_operand"
6398 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6399 (match_operand:VI4F_128 1 "vector_move_operand"
6400 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6404 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6405 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6406 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6407 %vmovd\t{%2, %0|%0, %2}
6408 movss\t{%2, %0|%0, %2}
6409 movss\t{%2, %0|%0, %2}
6410 vmovss\t{%2, %1, %0|%0, %1, %2}
6411 pinsrd\t{$0, %2, %0|%0, %2, 0}
6412 pinsrd\t{$0, %2, %0|%0, %2, 0}
6413 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6417 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6419 (cond [(eq_attr "alternative" "0,1,7,8,9")
6420 (const_string "sselog")
6421 (eq_attr "alternative" "11")
6422 (const_string "imov")
6423 (eq_attr "alternative" "12")
6424 (const_string "fmov")
6426 (const_string "ssemov")))
6427 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6428 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6429 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6430 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6432 ;; A subset is vec_setv4sf.
6433 (define_insn "*vec_setv4sf_sse4_1"
6434 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6437 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6438 (match_operand:V4SF 1 "register_operand" "0,0,x")
6439 (match_operand:SI 3 "const_int_operand")))]
6441 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6442 < GET_MODE_NUNITS (V4SFmode))"
6444 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6445 switch (which_alternative)
6449 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6451 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6456 [(set_attr "isa" "noavx,noavx,avx")
6457 (set_attr "type" "sselog")
6458 (set_attr "prefix_data16" "1,1,*")
6459 (set_attr "prefix_extra" "1")
6460 (set_attr "length_immediate" "1")
6461 (set_attr "prefix" "orig,orig,vex")
6462 (set_attr "mode" "V4SF")])
6464 (define_insn "sse4_1_insertps"
6465 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6466 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6467 (match_operand:V4SF 1 "register_operand" "0,0,x")
6468 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6472 if (MEM_P (operands[2]))
6474 unsigned count_s = INTVAL (operands[3]) >> 6;
6476 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6477 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6479 switch (which_alternative)
6483 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6485 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6490 [(set_attr "isa" "noavx,noavx,avx")
6491 (set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1,1,*")
6493 (set_attr "prefix_extra" "1")
6494 (set_attr "length_immediate" "1")
6495 (set_attr "prefix" "orig,orig,vex")
6496 (set_attr "mode" "V4SF")])
6499 [(set (match_operand:VI4F_128 0 "memory_operand")
6501 (vec_duplicate:VI4F_128
6502 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6505 "TARGET_SSE && reload_completed"
6506 [(set (match_dup 0) (match_dup 1))]
6507 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6509 (define_expand "vec_set<mode>"
6510 [(match_operand:V 0 "register_operand")
6511 (match_operand:<ssescalarmode> 1 "register_operand")
6512 (match_operand 2 "const_int_operand")]
6515 ix86_expand_vector_set (false, operands[0], operands[1],
6516 INTVAL (operands[2]));
6520 (define_insn_and_split "*vec_extractv4sf_0"
6521 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6523 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6524 (parallel [(const_int 0)])))]
6525 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6527 "&& reload_completed"
6528 [(set (match_dup 0) (match_dup 1))]
6530 if (REG_P (operands[1]))
6531 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6533 operands[1] = adjust_address (operands[1], SFmode, 0);
6536 (define_insn_and_split "*sse4_1_extractps"
6537 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6539 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6540 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6543 %vextractps\t{%2, %1, %0|%0, %1, %2}
6544 %vextractps\t{%2, %1, %0|%0, %1, %2}
6547 "&& reload_completed && SSE_REG_P (operands[0])"
6550 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6551 switch (INTVAL (operands[2]))
6555 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6556 operands[2], operands[2],
6557 GEN_INT (INTVAL (operands[2]) + 4),
6558 GEN_INT (INTVAL (operands[2]) + 4)));
6561 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6564 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6569 [(set_attr "isa" "*,*,noavx,avx")
6570 (set_attr "type" "sselog,sselog,*,*")
6571 (set_attr "prefix_data16" "1,1,*,*")
6572 (set_attr "prefix_extra" "1,1,*,*")
6573 (set_attr "length_immediate" "1,1,*,*")
6574 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6575 (set_attr "mode" "V4SF,V4SF,*,*")])
6577 (define_insn_and_split "*vec_extractv4sf_mem"
6578 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6580 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6581 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6584 "&& reload_completed"
6585 [(set (match_dup 0) (match_dup 1))]
6587 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6590 (define_mode_attr extract_type
6591 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6593 (define_mode_attr extract_suf
6594 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6596 (define_mode_iterator AVX512_VEC
6597 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6599 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6600 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6601 (match_operand:AVX512_VEC 1 "register_operand")
6602 (match_operand:SI 2 "const_0_to_3_operand")
6603 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6604 (match_operand:QI 4 "register_operand")]
6608 mask = INTVAL (operands[2]);
6610 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6611 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6613 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6614 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6615 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6616 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6619 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6620 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6625 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6626 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6627 (vec_merge:<ssequartermode>
6628 (vec_select:<ssequartermode>
6629 (match_operand:V8FI 1 "register_operand" "v")
6630 (parallel [(match_operand 2 "const_0_to_7_operand")
6631 (match_operand 3 "const_0_to_7_operand")]))
6632 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6633 (match_operand:QI 5 "register_operand" "k")))]
6635 && (INTVAL (operands[2]) % 2 == 0)
6636 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6637 && rtx_equal_p (operands[4], operands[0])"
6639 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6640 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6642 [(set_attr "type" "sselog")
6643 (set_attr "prefix_extra" "1")
6644 (set_attr "length_immediate" "1")
6645 (set_attr "memory" "store")
6646 (set_attr "prefix" "evex")
6647 (set_attr "mode" "<sseinsnmode>")])
6649 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6650 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6651 (vec_merge:<ssequartermode>
6652 (vec_select:<ssequartermode>
6653 (match_operand:V16FI 1 "register_operand" "v")
6654 (parallel [(match_operand 2 "const_0_to_15_operand")
6655 (match_operand 3 "const_0_to_15_operand")
6656 (match_operand 4 "const_0_to_15_operand")
6657 (match_operand 5 "const_0_to_15_operand")]))
6658 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6659 (match_operand:QI 7 "register_operand" "Yk")))]
6661 && ((INTVAL (operands[2]) % 4 == 0)
6662 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6663 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6664 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6665 && rtx_equal_p (operands[6], operands[0])"
6667 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6668 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "length_immediate" "1")
6673 (set_attr "memory" "store")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "<sseinsnmode>")])
6677 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6678 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6679 (vec_select:<ssequartermode>
6680 (match_operand:V8FI 1 "register_operand" "v")
6681 (parallel [(match_operand 2 "const_0_to_7_operand")
6682 (match_operand 3 "const_0_to_7_operand")])))]
6683 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6685 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6686 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6688 [(set_attr "type" "sselog1")
6689 (set_attr "prefix_extra" "1")
6690 (set_attr "length_immediate" "1")
6691 (set_attr "prefix" "evex")
6692 (set_attr "mode" "<sseinsnmode>")])
6694 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6695 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6696 (vec_select:<ssequartermode>
6697 (match_operand:V16FI 1 "register_operand" "v")
6698 (parallel [(match_operand 2 "const_0_to_15_operand")
6699 (match_operand 3 "const_0_to_15_operand")
6700 (match_operand 4 "const_0_to_15_operand")
6701 (match_operand 5 "const_0_to_15_operand")])))]
6703 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6704 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6705 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6707 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6708 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6710 [(set_attr "type" "sselog1")
6711 (set_attr "prefix_extra" "1")
6712 (set_attr "length_immediate" "1")
6713 (set_attr "prefix" "evex")
6714 (set_attr "mode" "<sseinsnmode>")])
6716 (define_mode_attr extract_type_2
6717 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6719 (define_mode_attr extract_suf_2
6720 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6722 (define_mode_iterator AVX512_VEC_2
6723 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6725 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6726 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6727 (match_operand:AVX512_VEC_2 1 "register_operand")
6728 (match_operand:SI 2 "const_0_to_1_operand")
6729 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6730 (match_operand:QI 4 "register_operand")]
6733 rtx (*insn)(rtx, rtx, rtx, rtx);
6735 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6736 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6738 switch (INTVAL (operands[2]))
6741 insn = gen_vec_extract_lo_<mode>_mask;
6744 insn = gen_vec_extract_hi_<mode>_mask;
6750 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6755 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6756 (vec_select:<ssehalfvecmode>
6757 (match_operand:V8FI 1 "nonimmediate_operand")
6758 (parallel [(const_int 0) (const_int 1)
6759 (const_int 2) (const_int 3)])))]
6760 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6761 && reload_completed"
6764 rtx op1 = operands[1];
6766 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6768 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6769 emit_move_insn (operands[0], op1);
6773 (define_insn "vec_extract_lo_<mode>_maskm"
6774 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6775 (vec_merge:<ssehalfvecmode>
6776 (vec_select:<ssehalfvecmode>
6777 (match_operand:V8FI 1 "register_operand" "v")
6778 (parallel [(const_int 0) (const_int 1)
6779 (const_int 2) (const_int 3)]))
6780 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6781 (match_operand:QI 3 "register_operand" "Yk")))]
6783 && rtx_equal_p (operands[2], operands[0])"
6784 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6785 [(set_attr "type" "sselog1")
6786 (set_attr "prefix_extra" "1")
6787 (set_attr "length_immediate" "1")
6788 (set_attr "prefix" "evex")
6789 (set_attr "mode" "<sseinsnmode>")])
6791 (define_insn "vec_extract_lo_<mode><mask_name>"
6792 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6793 (vec_select:<ssehalfvecmode>
6794 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6795 (parallel [(const_int 0) (const_int 1)
6796 (const_int 2) (const_int 3)])))]
6797 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6800 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6804 [(set_attr "type" "sselog1")
6805 (set_attr "prefix_extra" "1")
6806 (set_attr "length_immediate" "1")
6807 (set_attr "prefix" "evex")
6808 (set_attr "mode" "<sseinsnmode>")])
6810 (define_insn "vec_extract_hi_<mode>_maskm"
6811 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6812 (vec_merge:<ssehalfvecmode>
6813 (vec_select:<ssehalfvecmode>
6814 (match_operand:V8FI 1 "register_operand" "v")
6815 (parallel [(const_int 4) (const_int 5)
6816 (const_int 6) (const_int 7)]))
6817 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6818 (match_operand:QI 3 "register_operand" "Yk")))]
6820 && rtx_equal_p (operands[2], operands[0])"
6821 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6822 [(set_attr "type" "sselog")
6823 (set_attr "prefix_extra" "1")
6824 (set_attr "length_immediate" "1")
6825 (set_attr "memory" "store")
6826 (set_attr "prefix" "evex")
6827 (set_attr "mode" "<sseinsnmode>")])
6829 (define_insn "vec_extract_hi_<mode><mask_name>"
6830 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6831 (vec_select:<ssehalfvecmode>
6832 (match_operand:V8FI 1 "register_operand" "v")
6833 (parallel [(const_int 4) (const_int 5)
6834 (const_int 6) (const_int 7)])))]
6836 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6837 [(set_attr "type" "sselog1")
6838 (set_attr "prefix_extra" "1")
6839 (set_attr "length_immediate" "1")
6840 (set_attr "prefix" "evex")
6841 (set_attr "mode" "<sseinsnmode>")])
6843 (define_insn "vec_extract_hi_<mode>_maskm"
6844 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6845 (vec_merge:<ssehalfvecmode>
6846 (vec_select:<ssehalfvecmode>
6847 (match_operand:V16FI 1 "register_operand" "v")
6848 (parallel [(const_int 8) (const_int 9)
6849 (const_int 10) (const_int 11)
6850 (const_int 12) (const_int 13)
6851 (const_int 14) (const_int 15)]))
6852 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6853 (match_operand:QI 3 "register_operand" "k")))]
6855 && rtx_equal_p (operands[2], operands[0])"
6856 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6857 [(set_attr "type" "sselog1")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "length_immediate" "1")
6860 (set_attr "prefix" "evex")
6861 (set_attr "mode" "<sseinsnmode>")])
6863 (define_insn "vec_extract_hi_<mode><mask_name>"
6864 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6865 (vec_select:<ssehalfvecmode>
6866 (match_operand:V16FI 1 "register_operand" "v,v")
6867 (parallel [(const_int 8) (const_int 9)
6868 (const_int 10) (const_int 11)
6869 (const_int 12) (const_int 13)
6870 (const_int 14) (const_int 15)])))]
6871 "TARGET_AVX512F && <mask_avx512dq_condition>"
6873 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6874 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6875 [(set_attr "type" "sselog1")
6876 (set_attr "prefix_extra" "1")
6877 (set_attr "isa" "avx512dq,noavx512dq")
6878 (set_attr "length_immediate" "1")
6879 (set_attr "prefix" "evex")
6880 (set_attr "mode" "<sseinsnmode>")])
6882 (define_expand "avx512vl_vextractf128<mode>"
6883 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6884 (match_operand:VI48F_256 1 "register_operand")
6885 (match_operand:SI 2 "const_0_to_1_operand")
6886 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6887 (match_operand:QI 4 "register_operand")]
6888 "TARGET_AVX512DQ && TARGET_AVX512VL"
6890 rtx (*insn)(rtx, rtx, rtx, rtx);
6892 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6893 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6895 switch (INTVAL (operands[2]))
6898 insn = gen_vec_extract_lo_<mode>_mask;
6901 insn = gen_vec_extract_hi_<mode>_mask;
6907 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6911 (define_expand "avx_vextractf128<mode>"
6912 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6913 (match_operand:V_256 1 "register_operand")
6914 (match_operand:SI 2 "const_0_to_1_operand")]
6917 rtx (*insn)(rtx, rtx);
6919 switch (INTVAL (operands[2]))
6922 insn = gen_vec_extract_lo_<mode>;
6925 insn = gen_vec_extract_hi_<mode>;
6931 emit_insn (insn (operands[0], operands[1]));
6935 (define_insn "vec_extract_lo_<mode><mask_name>"
6936 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6937 (vec_select:<ssehalfvecmode>
6938 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6939 (parallel [(const_int 0) (const_int 1)
6940 (const_int 2) (const_int 3)
6941 (const_int 4) (const_int 5)
6942 (const_int 6) (const_int 7)])))]
6944 && <mask_mode512bit_condition>
6945 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6948 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6954 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6955 (vec_select:<ssehalfvecmode>
6956 (match_operand:V16FI 1 "nonimmediate_operand")
6957 (parallel [(const_int 0) (const_int 1)
6958 (const_int 2) (const_int 3)
6959 (const_int 4) (const_int 5)
6960 (const_int 6) (const_int 7)])))]
6961 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6962 && reload_completed"
6965 rtx op1 = operands[1];
6967 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6969 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6970 emit_move_insn (operands[0], op1);
6974 (define_insn "vec_extract_lo_<mode><mask_name>"
6975 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6976 (vec_select:<ssehalfvecmode>
6977 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6978 (parallel [(const_int 0) (const_int 1)])))]
6980 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
6981 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6984 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6988 [(set_attr "type" "sselog")
6989 (set_attr "prefix_extra" "1")
6990 (set_attr "length_immediate" "1")
6991 (set_attr "memory" "none,store")
6992 (set_attr "prefix" "evex")
6993 (set_attr "mode" "XI")])
6996 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6997 (vec_select:<ssehalfvecmode>
6998 (match_operand:VI8F_256 1 "nonimmediate_operand")
6999 (parallel [(const_int 0) (const_int 1)])))]
7000 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7001 && reload_completed"
7004 rtx op1 = operands[1];
7006 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7008 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7009 emit_move_insn (operands[0], op1);
7013 (define_insn "vec_extract_hi_<mode><mask_name>"
7014 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7015 (vec_select:<ssehalfvecmode>
7016 (match_operand:VI8F_256 1 "register_operand" "v,v")
7017 (parallel [(const_int 2) (const_int 3)])))]
7018 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7020 if (TARGET_AVX512VL)
7022 if (TARGET_AVX512DQ)
7023 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7025 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7028 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7030 [(set_attr "type" "sselog")
7031 (set_attr "prefix_extra" "1")
7032 (set_attr "length_immediate" "1")
7033 (set_attr "memory" "none,store")
7034 (set_attr "prefix" "vex")
7035 (set_attr "mode" "<sseinsnmode>")])
7038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7039 (vec_select:<ssehalfvecmode>
7040 (match_operand:VI4F_256 1 "nonimmediate_operand")
7041 (parallel [(const_int 0) (const_int 1)
7042 (const_int 2) (const_int 3)])))]
7043 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7046 rtx op1 = operands[1];
7048 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7050 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7051 emit_move_insn (operands[0], op1);
7056 (define_insn "vec_extract_lo_<mode><mask_name>"
7057 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7058 (vec_select:<ssehalfvecmode>
7059 (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
7060 (parallel [(const_int 0) (const_int 1)
7061 (const_int 2) (const_int 3)])))]
7062 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7065 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7069 [(set_attr "type" "sselog1")
7070 (set_attr "prefix_extra" "1")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "evex")
7073 (set_attr "mode" "<sseinsnmode>")])
7075 (define_insn "vec_extract_lo_<mode>_maskm"
7076 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7077 (vec_merge:<ssehalfvecmode>
7078 (vec_select:<ssehalfvecmode>
7079 (match_operand:VI4F_256 1 "register_operand" "v")
7080 (parallel [(const_int 0) (const_int 1)
7081 (const_int 2) (const_int 3)]))
7082 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7083 (match_operand:QI 3 "register_operand" "k")))]
7084 "TARGET_AVX512VL && TARGET_AVX512F
7085 && rtx_equal_p (operands[2], operands[0])"
7086 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7087 [(set_attr "type" "sselog1")
7088 (set_attr "prefix_extra" "1")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "prefix" "evex")
7091 (set_attr "mode" "<sseinsnmode>")])
7093 (define_insn "vec_extract_hi_<mode>_maskm"
7094 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7095 (vec_merge:<ssehalfvecmode>
7096 (vec_select:<ssehalfvecmode>
7097 (match_operand:VI4F_256 1 "register_operand" "v")
7098 (parallel [(const_int 4) (const_int 5)
7099 (const_int 6) (const_int 7)]))
7100 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7101 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7102 "TARGET_AVX512F && TARGET_AVX512VL
7103 && rtx_equal_p (operands[2], operands[0])"
7104 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7105 [(set_attr "type" "sselog1")
7106 (set_attr "prefix_extra" "1")
7107 (set_attr "length_immediate" "1")
7108 (set_attr "prefix" "evex")
7109 (set_attr "mode" "<sseinsnmode>")])
7111 (define_insn "vec_extract_hi_<mode><mask_name>"
7112 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7113 (vec_select:<ssehalfvecmode>
7114 (match_operand:VI4F_256 1 "register_operand" "v")
7115 (parallel [(const_int 4) (const_int 5)
7116 (const_int 6) (const_int 7)])))]
7117 "TARGET_AVX && <mask_avx512vl_condition>"
7119 if (TARGET_AVX512VL)
7120 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7122 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7124 [(set_attr "type" "sselog1")
7125 (set_attr "prefix_extra" "1")
7126 (set_attr "length_immediate" "1")
7127 (set (attr "prefix")
7129 (match_test "TARGET_AVX512VL")
7130 (const_string "evex")
7131 (const_string "vex")))
7132 (set_attr "mode" "<sseinsnmode>")])
7134 (define_insn_and_split "vec_extract_lo_v32hi"
7135 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7137 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7138 (parallel [(const_int 0) (const_int 1)
7139 (const_int 2) (const_int 3)
7140 (const_int 4) (const_int 5)
7141 (const_int 6) (const_int 7)
7142 (const_int 8) (const_int 9)
7143 (const_int 10) (const_int 11)
7144 (const_int 12) (const_int 13)
7145 (const_int 14) (const_int 15)])))]
7146 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7148 "&& reload_completed"
7149 [(set (match_dup 0) (match_dup 1))]
7151 if (REG_P (operands[1]))
7152 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7154 operands[1] = adjust_address (operands[1], V16HImode, 0);
7157 (define_insn "vec_extract_hi_v32hi"
7158 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7160 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
7161 (parallel [(const_int 16) (const_int 17)
7162 (const_int 18) (const_int 19)
7163 (const_int 20) (const_int 21)
7164 (const_int 22) (const_int 23)
7165 (const_int 24) (const_int 25)
7166 (const_int 26) (const_int 27)
7167 (const_int 28) (const_int 29)
7168 (const_int 30) (const_int 31)])))]
7170 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7171 [(set_attr "type" "sselog")
7172 (set_attr "prefix_extra" "1")
7173 (set_attr "length_immediate" "1")
7174 (set_attr "memory" "none,store")
7175 (set_attr "prefix" "evex")
7176 (set_attr "mode" "XI")])
7178 (define_insn_and_split "vec_extract_lo_v16hi"
7179 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7181 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7182 (parallel [(const_int 0) (const_int 1)
7183 (const_int 2) (const_int 3)
7184 (const_int 4) (const_int 5)
7185 (const_int 6) (const_int 7)])))]
7186 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7188 "&& reload_completed"
7189 [(set (match_dup 0) (match_dup 1))]
7191 if (REG_P (operands[1]))
7192 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7194 operands[1] = adjust_address (operands[1], V8HImode, 0);
7197 (define_insn "vec_extract_hi_v16hi"
7198 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7200 (match_operand:V16HI 1 "register_operand" "x,x")
7201 (parallel [(const_int 8) (const_int 9)
7202 (const_int 10) (const_int 11)
7203 (const_int 12) (const_int 13)
7204 (const_int 14) (const_int 15)])))]
7206 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7207 [(set_attr "type" "sselog")
7208 (set_attr "prefix_extra" "1")
7209 (set_attr "length_immediate" "1")
7210 (set_attr "memory" "none,store")
7211 (set_attr "prefix" "vex")
7212 (set_attr "mode" "OI")])
7214 (define_insn_and_split "vec_extract_lo_v64qi"
7215 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7217 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7218 (parallel [(const_int 0) (const_int 1)
7219 (const_int 2) (const_int 3)
7220 (const_int 4) (const_int 5)
7221 (const_int 6) (const_int 7)
7222 (const_int 8) (const_int 9)
7223 (const_int 10) (const_int 11)
7224 (const_int 12) (const_int 13)
7225 (const_int 14) (const_int 15)
7226 (const_int 16) (const_int 17)
7227 (const_int 18) (const_int 19)
7228 (const_int 20) (const_int 21)
7229 (const_int 22) (const_int 23)
7230 (const_int 24) (const_int 25)
7231 (const_int 26) (const_int 27)
7232 (const_int 28) (const_int 29)
7233 (const_int 30) (const_int 31)])))]
7234 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7236 "&& reload_completed"
7237 [(set (match_dup 0) (match_dup 1))]
7239 if (REG_P (operands[1]))
7240 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7242 operands[1] = adjust_address (operands[1], V32QImode, 0);
7245 (define_insn "vec_extract_hi_v64qi"
7246 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7248 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
7249 (parallel [(const_int 32) (const_int 33)
7250 (const_int 34) (const_int 35)
7251 (const_int 36) (const_int 37)
7252 (const_int 38) (const_int 39)
7253 (const_int 40) (const_int 41)
7254 (const_int 42) (const_int 43)
7255 (const_int 44) (const_int 45)
7256 (const_int 46) (const_int 47)
7257 (const_int 48) (const_int 49)
7258 (const_int 50) (const_int 51)
7259 (const_int 52) (const_int 53)
7260 (const_int 54) (const_int 55)
7261 (const_int 56) (const_int 57)
7262 (const_int 58) (const_int 59)
7263 (const_int 60) (const_int 61)
7264 (const_int 62) (const_int 63)])))]
7266 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7267 [(set_attr "type" "sselog")
7268 (set_attr "prefix_extra" "1")
7269 (set_attr "length_immediate" "1")
7270 (set_attr "memory" "none,store")
7271 (set_attr "prefix" "evex")
7272 (set_attr "mode" "XI")])
7274 (define_insn_and_split "vec_extract_lo_v32qi"
7275 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7277 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7278 (parallel [(const_int 0) (const_int 1)
7279 (const_int 2) (const_int 3)
7280 (const_int 4) (const_int 5)
7281 (const_int 6) (const_int 7)
7282 (const_int 8) (const_int 9)
7283 (const_int 10) (const_int 11)
7284 (const_int 12) (const_int 13)
7285 (const_int 14) (const_int 15)])))]
7286 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7288 "&& reload_completed"
7289 [(set (match_dup 0) (match_dup 1))]
7291 if (REG_P (operands[1]))
7292 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7294 operands[1] = adjust_address (operands[1], V16QImode, 0);
7297 (define_insn "vec_extract_hi_v32qi"
7298 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7300 (match_operand:V32QI 1 "register_operand" "x,x")
7301 (parallel [(const_int 16) (const_int 17)
7302 (const_int 18) (const_int 19)
7303 (const_int 20) (const_int 21)
7304 (const_int 22) (const_int 23)
7305 (const_int 24) (const_int 25)
7306 (const_int 26) (const_int 27)
7307 (const_int 28) (const_int 29)
7308 (const_int 30) (const_int 31)])))]
7310 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7311 [(set_attr "type" "sselog")
7312 (set_attr "prefix_extra" "1")
7313 (set_attr "length_immediate" "1")
7314 (set_attr "memory" "none,store")
7315 (set_attr "prefix" "vex")
7316 (set_attr "mode" "OI")])
7318 ;; Modes handled by vec_extract patterns.
7319 (define_mode_iterator VEC_EXTRACT_MODE
7320 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7321 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7322 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7323 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7324 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7325 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7327 (define_expand "vec_extract<mode>"
7328 [(match_operand:<ssescalarmode> 0 "register_operand")
7329 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7330 (match_operand 2 "const_int_operand")]
7333 ix86_expand_vector_extract (false, operands[0], operands[1],
7334 INTVAL (operands[2]));
7338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7340 ;; Parallel double-precision floating point element swizzling
7342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7344 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7345 [(set (match_operand:V8DF 0 "register_operand" "=v")
7348 (match_operand:V8DF 1 "nonimmediate_operand" "v")
7349 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7350 (parallel [(const_int 1) (const_int 9)
7351 (const_int 3) (const_int 11)
7352 (const_int 5) (const_int 13)
7353 (const_int 7) (const_int 15)])))]
7355 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7356 [(set_attr "type" "sselog")
7357 (set_attr "prefix" "evex")
7358 (set_attr "mode" "V8DF")])
7360 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7361 (define_insn "avx_unpckhpd256<mask_name>"
7362 [(set (match_operand:V4DF 0 "register_operand" "=v")
7365 (match_operand:V4DF 1 "register_operand" "v")
7366 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7367 (parallel [(const_int 1) (const_int 5)
7368 (const_int 3) (const_int 7)])))]
7369 "TARGET_AVX && <mask_avx512vl_condition>"
7370 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7371 [(set_attr "type" "sselog")
7372 (set_attr "prefix" "vex")
7373 (set_attr "mode" "V4DF")])
7375 (define_expand "vec_interleave_highv4df"
7379 (match_operand:V4DF 1 "register_operand" "x")
7380 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7381 (parallel [(const_int 0) (const_int 4)
7382 (const_int 2) (const_int 6)])))
7388 (parallel [(const_int 1) (const_int 5)
7389 (const_int 3) (const_int 7)])))
7390 (set (match_operand:V4DF 0 "register_operand")
7395 (parallel [(const_int 2) (const_int 3)
7396 (const_int 6) (const_int 7)])))]
7399 operands[3] = gen_reg_rtx (V4DFmode);
7400 operands[4] = gen_reg_rtx (V4DFmode);
7404 (define_insn "avx512vl_unpckhpd128_mask"
7405 [(set (match_operand:V2DF 0 "register_operand" "=v")
7409 (match_operand:V2DF 1 "register_operand" "v")
7410 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7411 (parallel [(const_int 1) (const_int 3)]))
7412 (match_operand:V2DF 3 "vector_move_operand" "0C")
7413 (match_operand:QI 4 "register_operand" "Yk")))]
7415 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7416 [(set_attr "type" "sselog")
7417 (set_attr "prefix" "evex")
7418 (set_attr "mode" "V2DF")])
7420 (define_expand "vec_interleave_highv2df"
7421 [(set (match_operand:V2DF 0 "register_operand")
7424 (match_operand:V2DF 1 "nonimmediate_operand")
7425 (match_operand:V2DF 2 "nonimmediate_operand"))
7426 (parallel [(const_int 1)
7430 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7431 operands[2] = force_reg (V2DFmode, operands[2]);
7434 (define_insn "*vec_interleave_highv2df"
7435 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7438 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7439 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7440 (parallel [(const_int 1)
7442 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7444 unpckhpd\t{%2, %0|%0, %2}
7445 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7446 %vmovddup\t{%H1, %0|%0, %H1}
7447 movlpd\t{%H1, %0|%0, %H1}
7448 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7449 %vmovhpd\t{%1, %0|%q0, %1}"
7450 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7451 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7452 (set_attr "ssememalign" "64")
7453 (set_attr "prefix_data16" "*,*,*,1,*,1")
7454 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7455 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7457 (define_expand "avx512f_movddup512<mask_name>"
7458 [(set (match_operand:V8DF 0 "register_operand")
7461 (match_operand:V8DF 1 "nonimmediate_operand")
7463 (parallel [(const_int 0) (const_int 8)
7464 (const_int 2) (const_int 10)
7465 (const_int 4) (const_int 12)
7466 (const_int 6) (const_int 14)])))]
7469 (define_expand "avx512f_unpcklpd512<mask_name>"
7470 [(set (match_operand:V8DF 0 "register_operand")
7473 (match_operand:V8DF 1 "register_operand")
7474 (match_operand:V8DF 2 "nonimmediate_operand"))
7475 (parallel [(const_int 0) (const_int 8)
7476 (const_int 2) (const_int 10)
7477 (const_int 4) (const_int 12)
7478 (const_int 6) (const_int 14)])))]
7481 (define_insn "*avx512f_unpcklpd512<mask_name>"
7482 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7485 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7486 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7487 (parallel [(const_int 0) (const_int 8)
7488 (const_int 2) (const_int 10)
7489 (const_int 4) (const_int 12)
7490 (const_int 6) (const_int 14)])))]
7493 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7494 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7495 [(set_attr "type" "sselog")
7496 (set_attr "prefix" "evex")
7497 (set_attr "mode" "V8DF")])
7499 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7500 (define_expand "avx_movddup256<mask_name>"
7501 [(set (match_operand:V4DF 0 "register_operand")
7504 (match_operand:V4DF 1 "nonimmediate_operand")
7506 (parallel [(const_int 0) (const_int 4)
7507 (const_int 2) (const_int 6)])))]
7508 "TARGET_AVX && <mask_avx512vl_condition>")
7510 (define_expand "avx_unpcklpd256<mask_name>"
7511 [(set (match_operand:V4DF 0 "register_operand")
7514 (match_operand:V4DF 1 "register_operand")
7515 (match_operand:V4DF 2 "nonimmediate_operand"))
7516 (parallel [(const_int 0) (const_int 4)
7517 (const_int 2) (const_int 6)])))]
7518 "TARGET_AVX && <mask_avx512vl_condition>")
7520 (define_insn "*avx_unpcklpd256<mask_name>"
7521 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7524 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7525 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7526 (parallel [(const_int 0) (const_int 4)
7527 (const_int 2) (const_int 6)])))]
7528 "TARGET_AVX && <mask_avx512vl_condition>"
7530 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7531 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7532 [(set_attr "type" "sselog")
7533 (set_attr "prefix" "vex")
7534 (set_attr "mode" "V4DF")])
7536 (define_expand "vec_interleave_lowv4df"
7540 (match_operand:V4DF 1 "register_operand" "x")
7541 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7542 (parallel [(const_int 0) (const_int 4)
7543 (const_int 2) (const_int 6)])))
7549 (parallel [(const_int 1) (const_int 5)
7550 (const_int 3) (const_int 7)])))
7551 (set (match_operand:V4DF 0 "register_operand")
7556 (parallel [(const_int 0) (const_int 1)
7557 (const_int 4) (const_int 5)])))]
7560 operands[3] = gen_reg_rtx (V4DFmode);
7561 operands[4] = gen_reg_rtx (V4DFmode);
7564 (define_insn "avx512vl_unpcklpd128_mask"
7565 [(set (match_operand:V2DF 0 "register_operand" "=v")
7569 (match_operand:V2DF 1 "register_operand" "v")
7570 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7571 (parallel [(const_int 0) (const_int 2)]))
7572 (match_operand:V2DF 3 "vector_move_operand" "0C")
7573 (match_operand:QI 4 "register_operand" "Yk")))]
7575 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7576 [(set_attr "type" "sselog")
7577 (set_attr "prefix" "evex")
7578 (set_attr "mode" "V2DF")])
7580 (define_expand "vec_interleave_lowv2df"
7581 [(set (match_operand:V2DF 0 "register_operand")
7584 (match_operand:V2DF 1 "nonimmediate_operand")
7585 (match_operand:V2DF 2 "nonimmediate_operand"))
7586 (parallel [(const_int 0)
7590 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7591 operands[1] = force_reg (V2DFmode, operands[1]);
7594 (define_insn "*vec_interleave_lowv2df"
7595 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7598 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7599 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7600 (parallel [(const_int 0)
7602 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7604 unpcklpd\t{%2, %0|%0, %2}
7605 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7606 %vmovddup\t{%1, %0|%0, %q1}
7607 movhpd\t{%2, %0|%0, %q2}
7608 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7609 %vmovlpd\t{%2, %H0|%H0, %2}"
7610 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7611 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7612 (set_attr "ssememalign" "64")
7613 (set_attr "prefix_data16" "*,*,*,1,*,1")
7614 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7615 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7618 [(set (match_operand:V2DF 0 "memory_operand")
7621 (match_operand:V2DF 1 "register_operand")
7623 (parallel [(const_int 0)
7625 "TARGET_SSE3 && reload_completed"
7628 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7629 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7630 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7635 [(set (match_operand:V2DF 0 "register_operand")
7638 (match_operand:V2DF 1 "memory_operand")
7640 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7641 (match_operand:SI 3 "const_int_operand")])))]
7642 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7643 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7645 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7648 (define_insn "avx512f_vmscalef<mode><round_name>"
7649 [(set (match_operand:VF_128 0 "register_operand" "=v")
7652 [(match_operand:VF_128 1 "register_operand" "v")
7653 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7658 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7659 [(set_attr "prefix" "evex")
7660 (set_attr "mode" "<ssescalarmode>")])
7662 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7663 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7665 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7666 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7669 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7670 [(set_attr "prefix" "evex")
7671 (set_attr "mode" "<MODE>")])
7673 (define_expand "<avx512>_vternlog<mode>_maskz"
7674 [(match_operand:VI48_AVX512VL 0 "register_operand")
7675 (match_operand:VI48_AVX512VL 1 "register_operand")
7676 (match_operand:VI48_AVX512VL 2 "register_operand")
7677 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7678 (match_operand:SI 4 "const_0_to_255_operand")
7679 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7682 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7683 operands[0], operands[1], operands[2], operands[3],
7684 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7688 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7689 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7690 (unspec:VI48_AVX512VL
7691 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7692 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7693 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7694 (match_operand:SI 4 "const_0_to_255_operand")]
7697 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7698 [(set_attr "type" "sselog")
7699 (set_attr "prefix" "evex")
7700 (set_attr "mode" "<sseinsnmode>")])
7702 (define_insn "<avx512>_vternlog<mode>_mask"
7703 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7704 (vec_merge:VI48_AVX512VL
7705 (unspec:VI48_AVX512VL
7706 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7707 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7708 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7709 (match_operand:SI 4 "const_0_to_255_operand")]
7712 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7714 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7715 [(set_attr "type" "sselog")
7716 (set_attr "prefix" "evex")
7717 (set_attr "mode" "<sseinsnmode>")])
7719 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7720 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7721 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7724 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7725 [(set_attr "prefix" "evex")
7726 (set_attr "mode" "<MODE>")])
7728 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7729 [(set (match_operand:VF_128 0 "register_operand" "=v")
7732 [(match_operand:VF_128 1 "register_operand" "v")
7733 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7738 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7739 [(set_attr "prefix" "evex")
7740 (set_attr "mode" "<ssescalarmode>")])
7742 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7743 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7744 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7745 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7746 (match_operand:SI 3 "const_0_to_255_operand")]
7749 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7750 [(set_attr "prefix" "evex")
7751 (set_attr "mode" "<sseinsnmode>")])
7753 (define_expand "avx512f_shufps512_mask"
7754 [(match_operand:V16SF 0 "register_operand")
7755 (match_operand:V16SF 1 "register_operand")
7756 (match_operand:V16SF 2 "nonimmediate_operand")
7757 (match_operand:SI 3 "const_0_to_255_operand")
7758 (match_operand:V16SF 4 "register_operand")
7759 (match_operand:HI 5 "register_operand")]
7762 int mask = INTVAL (operands[3]);
7763 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7764 GEN_INT ((mask >> 0) & 3),
7765 GEN_INT ((mask >> 2) & 3),
7766 GEN_INT (((mask >> 4) & 3) + 16),
7767 GEN_INT (((mask >> 6) & 3) + 16),
7768 GEN_INT (((mask >> 0) & 3) + 4),
7769 GEN_INT (((mask >> 2) & 3) + 4),
7770 GEN_INT (((mask >> 4) & 3) + 20),
7771 GEN_INT (((mask >> 6) & 3) + 20),
7772 GEN_INT (((mask >> 0) & 3) + 8),
7773 GEN_INT (((mask >> 2) & 3) + 8),
7774 GEN_INT (((mask >> 4) & 3) + 24),
7775 GEN_INT (((mask >> 6) & 3) + 24),
7776 GEN_INT (((mask >> 0) & 3) + 12),
7777 GEN_INT (((mask >> 2) & 3) + 12),
7778 GEN_INT (((mask >> 4) & 3) + 28),
7779 GEN_INT (((mask >> 6) & 3) + 28),
7780 operands[4], operands[5]));
7785 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7786 [(match_operand:VF_AVX512VL 0 "register_operand")
7787 (match_operand:VF_AVX512VL 1 "register_operand")
7788 (match_operand:VF_AVX512VL 2 "register_operand")
7789 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7790 (match_operand:SI 4 "const_0_to_255_operand")
7791 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7794 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7795 operands[0], operands[1], operands[2], operands[3],
7796 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7797 <round_saeonly_expand_operand6>));
7801 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7802 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7804 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7805 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7806 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7807 (match_operand:SI 4 "const_0_to_255_operand")]
7810 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7811 [(set_attr "prefix" "evex")
7812 (set_attr "mode" "<MODE>")])
7814 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7815 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7816 (vec_merge:VF_AVX512VL
7818 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7819 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7820 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7821 (match_operand:SI 4 "const_0_to_255_operand")]
7824 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7826 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7827 [(set_attr "prefix" "evex")
7828 (set_attr "mode" "<MODE>")])
7830 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7831 [(match_operand:VF_128 0 "register_operand")
7832 (match_operand:VF_128 1 "register_operand")
7833 (match_operand:VF_128 2 "register_operand")
7834 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7835 (match_operand:SI 4 "const_0_to_255_operand")
7836 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7839 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7840 operands[0], operands[1], operands[2], operands[3],
7841 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7842 <round_saeonly_expand_operand6>));
7846 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7847 [(set (match_operand:VF_128 0 "register_operand" "=v")
7850 [(match_operand:VF_128 1 "register_operand" "0")
7851 (match_operand:VF_128 2 "register_operand" "v")
7852 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7853 (match_operand:SI 4 "const_0_to_255_operand")]
7858 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7859 [(set_attr "prefix" "evex")
7860 (set_attr "mode" "<ssescalarmode>")])
7862 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7863 [(set (match_operand:VF_128 0 "register_operand" "=v")
7867 [(match_operand:VF_128 1 "register_operand" "0")
7868 (match_operand:VF_128 2 "register_operand" "v")
7869 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7870 (match_operand:SI 4 "const_0_to_255_operand")]
7875 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7877 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7878 [(set_attr "prefix" "evex")
7879 (set_attr "mode" "<ssescalarmode>")])
7881 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7882 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7884 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7885 (match_operand:SI 2 "const_0_to_255_operand")]
7888 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7889 [(set_attr "length_immediate" "1")
7890 (set_attr "prefix" "evex")
7891 (set_attr "mode" "<MODE>")])
7893 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7894 [(set (match_operand:VF_128 0 "register_operand" "=v")
7897 [(match_operand:VF_128 1 "register_operand" "v")
7898 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7899 (match_operand:SI 3 "const_0_to_255_operand")]
7904 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7905 [(set_attr "length_immediate" "1")
7906 (set_attr "prefix" "evex")
7907 (set_attr "mode" "<MODE>")])
7909 ;; One bit in mask selects 2 elements.
7910 (define_insn "avx512f_shufps512_1<mask_name>"
7911 [(set (match_operand:V16SF 0 "register_operand" "=v")
7914 (match_operand:V16SF 1 "register_operand" "v")
7915 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7916 (parallel [(match_operand 3 "const_0_to_3_operand")
7917 (match_operand 4 "const_0_to_3_operand")
7918 (match_operand 5 "const_16_to_19_operand")
7919 (match_operand 6 "const_16_to_19_operand")
7920 (match_operand 7 "const_4_to_7_operand")
7921 (match_operand 8 "const_4_to_7_operand")
7922 (match_operand 9 "const_20_to_23_operand")
7923 (match_operand 10 "const_20_to_23_operand")
7924 (match_operand 11 "const_8_to_11_operand")
7925 (match_operand 12 "const_8_to_11_operand")
7926 (match_operand 13 "const_24_to_27_operand")
7927 (match_operand 14 "const_24_to_27_operand")
7928 (match_operand 15 "const_12_to_15_operand")
7929 (match_operand 16 "const_12_to_15_operand")
7930 (match_operand 17 "const_28_to_31_operand")
7931 (match_operand 18 "const_28_to_31_operand")])))]
7933 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7934 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7935 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7936 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7937 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7938 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7939 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7940 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7941 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7942 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7943 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7944 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7947 mask = INTVAL (operands[3]);
7948 mask |= INTVAL (operands[4]) << 2;
7949 mask |= (INTVAL (operands[5]) - 16) << 4;
7950 mask |= (INTVAL (operands[6]) - 16) << 6;
7951 operands[3] = GEN_INT (mask);
7953 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7955 [(set_attr "type" "sselog")
7956 (set_attr "length_immediate" "1")
7957 (set_attr "prefix" "evex")
7958 (set_attr "mode" "V16SF")])
7960 (define_expand "avx512f_shufpd512_mask"
7961 [(match_operand:V8DF 0 "register_operand")
7962 (match_operand:V8DF 1 "register_operand")
7963 (match_operand:V8DF 2 "nonimmediate_operand")
7964 (match_operand:SI 3 "const_0_to_255_operand")
7965 (match_operand:V8DF 4 "register_operand")
7966 (match_operand:QI 5 "register_operand")]
7969 int mask = INTVAL (operands[3]);
7970 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7972 GEN_INT (mask & 2 ? 9 : 8),
7973 GEN_INT (mask & 4 ? 3 : 2),
7974 GEN_INT (mask & 8 ? 11 : 10),
7975 GEN_INT (mask & 16 ? 5 : 4),
7976 GEN_INT (mask & 32 ? 13 : 12),
7977 GEN_INT (mask & 64 ? 7 : 6),
7978 GEN_INT (mask & 128 ? 15 : 14),
7979 operands[4], operands[5]));
7983 (define_insn "avx512f_shufpd512_1<mask_name>"
7984 [(set (match_operand:V8DF 0 "register_operand" "=v")
7987 (match_operand:V8DF 1 "register_operand" "v")
7988 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7989 (parallel [(match_operand 3 "const_0_to_1_operand")
7990 (match_operand 4 "const_8_to_9_operand")
7991 (match_operand 5 "const_2_to_3_operand")
7992 (match_operand 6 "const_10_to_11_operand")
7993 (match_operand 7 "const_4_to_5_operand")
7994 (match_operand 8 "const_12_to_13_operand")
7995 (match_operand 9 "const_6_to_7_operand")
7996 (match_operand 10 "const_14_to_15_operand")])))]
8000 mask = INTVAL (operands[3]);
8001 mask |= (INTVAL (operands[4]) - 8) << 1;
8002 mask |= (INTVAL (operands[5]) - 2) << 2;
8003 mask |= (INTVAL (operands[6]) - 10) << 3;
8004 mask |= (INTVAL (operands[7]) - 4) << 4;
8005 mask |= (INTVAL (operands[8]) - 12) << 5;
8006 mask |= (INTVAL (operands[9]) - 6) << 6;
8007 mask |= (INTVAL (operands[10]) - 14) << 7;
8008 operands[3] = GEN_INT (mask);
8010 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8012 [(set_attr "type" "sselog")
8013 (set_attr "length_immediate" "1")
8014 (set_attr "prefix" "evex")
8015 (set_attr "mode" "V8DF")])
8017 (define_expand "avx_shufpd256<mask_expand4_name>"
8018 [(match_operand:V4DF 0 "register_operand")
8019 (match_operand:V4DF 1 "register_operand")
8020 (match_operand:V4DF 2 "nonimmediate_operand")
8021 (match_operand:SI 3 "const_int_operand")]
8024 int mask = INTVAL (operands[3]);
8025 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8029 GEN_INT (mask & 2 ? 5 : 4),
8030 GEN_INT (mask & 4 ? 3 : 2),
8031 GEN_INT (mask & 8 ? 7 : 6)
8032 <mask_expand4_args>));
8036 (define_insn "avx_shufpd256_1<mask_name>"
8037 [(set (match_operand:V4DF 0 "register_operand" "=v")
8040 (match_operand:V4DF 1 "register_operand" "v")
8041 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8042 (parallel [(match_operand 3 "const_0_to_1_operand")
8043 (match_operand 4 "const_4_to_5_operand")
8044 (match_operand 5 "const_2_to_3_operand")
8045 (match_operand 6 "const_6_to_7_operand")])))]
8046 "TARGET_AVX && <mask_avx512vl_condition>"
8049 mask = INTVAL (operands[3]);
8050 mask |= (INTVAL (operands[4]) - 4) << 1;
8051 mask |= (INTVAL (operands[5]) - 2) << 2;
8052 mask |= (INTVAL (operands[6]) - 6) << 3;
8053 operands[3] = GEN_INT (mask);
8055 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8057 [(set_attr "type" "sseshuf")
8058 (set_attr "length_immediate" "1")
8059 (set_attr "prefix" "vex")
8060 (set_attr "mode" "V4DF")])
8062 (define_expand "sse2_shufpd<mask_expand4_name>"
8063 [(match_operand:V2DF 0 "register_operand")
8064 (match_operand:V2DF 1 "register_operand")
8065 (match_operand:V2DF 2 "nonimmediate_operand")
8066 (match_operand:SI 3 "const_int_operand")]
8069 int mask = INTVAL (operands[3]);
8070 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8071 operands[2], GEN_INT (mask & 1),
8072 GEN_INT (mask & 2 ? 3 : 2)
8073 <mask_expand4_args>));
8077 (define_insn "sse2_shufpd_v2df_mask"
8078 [(set (match_operand:V2DF 0 "register_operand" "=v")
8082 (match_operand:V2DF 1 "register_operand" "v")
8083 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8084 (parallel [(match_operand 3 "const_0_to_1_operand")
8085 (match_operand 4 "const_2_to_3_operand")]))
8086 (match_operand:V2DF 5 "vector_move_operand" "0C")
8087 (match_operand:QI 6 "register_operand" "Yk")))]
8091 mask = INTVAL (operands[3]);
8092 mask |= (INTVAL (operands[4]) - 2) << 1;
8093 operands[3] = GEN_INT (mask);
8095 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8097 [(set_attr "type" "sseshuf")
8098 (set_attr "length_immediate" "1")
8099 (set_attr "prefix" "evex")
8100 (set_attr "mode" "V2DF")])
8102 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8103 (define_insn "avx2_interleave_highv4di<mask_name>"
8104 [(set (match_operand:V4DI 0 "register_operand" "=v")
8107 (match_operand:V4DI 1 "register_operand" "v")
8108 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8109 (parallel [(const_int 1)
8113 "TARGET_AVX2 && <mask_avx512vl_condition>"
8114 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8115 [(set_attr "type" "sselog")
8116 (set_attr "prefix" "vex")
8117 (set_attr "mode" "OI")])
8119 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8120 [(set (match_operand:V8DI 0 "register_operand" "=v")
8123 (match_operand:V8DI 1 "register_operand" "v")
8124 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8125 (parallel [(const_int 1) (const_int 9)
8126 (const_int 3) (const_int 11)
8127 (const_int 5) (const_int 13)
8128 (const_int 7) (const_int 15)])))]
8130 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8131 [(set_attr "type" "sselog")
8132 (set_attr "prefix" "evex")
8133 (set_attr "mode" "XI")])
8135 (define_insn "vec_interleave_highv2di<mask_name>"
8136 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8139 (match_operand:V2DI 1 "register_operand" "0,v")
8140 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8141 (parallel [(const_int 1)
8143 "TARGET_SSE2 && <mask_avx512vl_condition>"
8145 punpckhqdq\t{%2, %0|%0, %2}
8146 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8147 [(set_attr "isa" "noavx,avx")
8148 (set_attr "type" "sselog")
8149 (set_attr "prefix_data16" "1,*")
8150 (set_attr "prefix" "orig,<mask_prefix>")
8151 (set_attr "mode" "TI")])
8153 (define_insn "avx2_interleave_lowv4di<mask_name>"
8154 [(set (match_operand:V4DI 0 "register_operand" "=v")
8157 (match_operand:V4DI 1 "register_operand" "v")
8158 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8159 (parallel [(const_int 0)
8163 "TARGET_AVX2 && <mask_avx512vl_condition>"
8164 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8165 [(set_attr "type" "sselog")
8166 (set_attr "prefix" "vex")
8167 (set_attr "mode" "OI")])
8169 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8170 [(set (match_operand:V8DI 0 "register_operand" "=v")
8173 (match_operand:V8DI 1 "register_operand" "v")
8174 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8175 (parallel [(const_int 0) (const_int 8)
8176 (const_int 2) (const_int 10)
8177 (const_int 4) (const_int 12)
8178 (const_int 6) (const_int 14)])))]
8180 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8181 [(set_attr "type" "sselog")
8182 (set_attr "prefix" "evex")
8183 (set_attr "mode" "XI")])
8185 (define_insn "vec_interleave_lowv2di<mask_name>"
8186 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8189 (match_operand:V2DI 1 "register_operand" "0,v")
8190 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8191 (parallel [(const_int 0)
8193 "TARGET_SSE2 && <mask_avx512vl_condition>"
8195 punpcklqdq\t{%2, %0|%0, %2}
8196 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8197 [(set_attr "isa" "noavx,avx")
8198 (set_attr "type" "sselog")
8199 (set_attr "prefix_data16" "1,*")
8200 (set_attr "prefix" "orig,vex")
8201 (set_attr "mode" "TI")])
8203 (define_insn "sse2_shufpd_<mode>"
8204 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8205 (vec_select:VI8F_128
8206 (vec_concat:<ssedoublevecmode>
8207 (match_operand:VI8F_128 1 "register_operand" "0,x")
8208 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8209 (parallel [(match_operand 3 "const_0_to_1_operand")
8210 (match_operand 4 "const_2_to_3_operand")])))]
8214 mask = INTVAL (operands[3]);
8215 mask |= (INTVAL (operands[4]) - 2) << 1;
8216 operands[3] = GEN_INT (mask);
8218 switch (which_alternative)
8221 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8223 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8228 [(set_attr "isa" "noavx,avx")
8229 (set_attr "type" "sseshuf")
8230 (set_attr "length_immediate" "1")
8231 (set_attr "prefix" "orig,vex")
8232 (set_attr "mode" "V2DF")])
8234 ;; Avoid combining registers from different units in a single alternative,
8235 ;; see comment above inline_secondary_memory_needed function in i386.c
8236 (define_insn "sse2_storehpd"
8237 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8239 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8240 (parallel [(const_int 1)])))]
8241 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8243 %vmovhpd\t{%1, %0|%0, %1}
8245 vunpckhpd\t{%d1, %0|%0, %d1}
8249 [(set_attr "isa" "*,noavx,avx,*,*,*")
8250 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8251 (set (attr "prefix_data16")
8253 (and (eq_attr "alternative" "0")
8254 (not (match_test "TARGET_AVX")))
8256 (const_string "*")))
8257 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8258 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8261 [(set (match_operand:DF 0 "register_operand")
8263 (match_operand:V2DF 1 "memory_operand")
8264 (parallel [(const_int 1)])))]
8265 "TARGET_SSE2 && reload_completed"
8266 [(set (match_dup 0) (match_dup 1))]
8267 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8269 (define_insn "*vec_extractv2df_1_sse"
8270 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8272 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8273 (parallel [(const_int 1)])))]
8274 "!TARGET_SSE2 && TARGET_SSE
8275 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8277 movhps\t{%1, %0|%q0, %1}
8278 movhlps\t{%1, %0|%0, %1}
8279 movlps\t{%H1, %0|%0, %H1}"
8280 [(set_attr "type" "ssemov")
8281 (set_attr "ssememalign" "64")
8282 (set_attr "mode" "V2SF,V4SF,V2SF")])
8284 ;; Avoid combining registers from different units in a single alternative,
8285 ;; see comment above inline_secondary_memory_needed function in i386.c
8286 (define_insn "sse2_storelpd"
8287 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8289 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8290 (parallel [(const_int 0)])))]
8291 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8293 %vmovlpd\t{%1, %0|%0, %1}
8298 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8299 (set_attr "prefix_data16" "1,*,*,*,*")
8300 (set_attr "prefix" "maybe_vex")
8301 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8304 [(set (match_operand:DF 0 "register_operand")
8306 (match_operand:V2DF 1 "nonimmediate_operand")
8307 (parallel [(const_int 0)])))]
8308 "TARGET_SSE2 && reload_completed"
8309 [(set (match_dup 0) (match_dup 1))]
8311 if (REG_P (operands[1]))
8312 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8314 operands[1] = adjust_address (operands[1], DFmode, 0);
8317 (define_insn "*vec_extractv2df_0_sse"
8318 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8320 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8321 (parallel [(const_int 0)])))]
8322 "!TARGET_SSE2 && TARGET_SSE
8323 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8325 movlps\t{%1, %0|%0, %1}
8326 movaps\t{%1, %0|%0, %1}
8327 movlps\t{%1, %0|%0, %q1}"
8328 [(set_attr "type" "ssemov")
8329 (set_attr "mode" "V2SF,V4SF,V2SF")])
8331 (define_expand "sse2_loadhpd_exp"
8332 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8335 (match_operand:V2DF 1 "nonimmediate_operand")
8336 (parallel [(const_int 0)]))
8337 (match_operand:DF 2 "nonimmediate_operand")))]
8340 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8342 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8344 /* Fix up the destination if needed. */
8345 if (dst != operands[0])
8346 emit_move_insn (operands[0], dst);
8351 ;; Avoid combining registers from different units in a single alternative,
8352 ;; see comment above inline_secondary_memory_needed function in i386.c
8353 (define_insn "sse2_loadhpd"
8354 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8358 (match_operand:V2DF 1 "nonimmediate_operand"
8360 (parallel [(const_int 0)]))
8361 (match_operand:DF 2 "nonimmediate_operand"
8362 " m,m,x,x,x,*f,r")))]
8363 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8365 movhpd\t{%2, %0|%0, %2}
8366 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8367 unpcklpd\t{%2, %0|%0, %2}
8368 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8372 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8373 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8374 (set_attr "ssememalign" "64")
8375 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8376 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8377 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8380 [(set (match_operand:V2DF 0 "memory_operand")
8382 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8383 (match_operand:DF 1 "register_operand")))]
8384 "TARGET_SSE2 && reload_completed"
8385 [(set (match_dup 0) (match_dup 1))]
8386 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8388 (define_expand "sse2_loadlpd_exp"
8389 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8391 (match_operand:DF 2 "nonimmediate_operand")
8393 (match_operand:V2DF 1 "nonimmediate_operand")
8394 (parallel [(const_int 1)]))))]
8397 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8399 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8401 /* Fix up the destination if needed. */
8402 if (dst != operands[0])
8403 emit_move_insn (operands[0], dst);
8408 ;; Avoid combining registers from different units in a single alternative,
8409 ;; see comment above inline_secondary_memory_needed function in i386.c
8410 (define_insn "sse2_loadlpd"
8411 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8412 "=x,x,x,x,x,x,x,x,m,m ,m")
8414 (match_operand:DF 2 "nonimmediate_operand"
8415 " m,m,m,x,x,0,0,x,x,*f,r")
8417 (match_operand:V2DF 1 "vector_move_operand"
8418 " C,0,x,0,x,x,o,o,0,0 ,0")
8419 (parallel [(const_int 1)]))))]
8420 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8422 %vmovsd\t{%2, %0|%0, %2}
8423 movlpd\t{%2, %0|%0, %2}
8424 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8425 movsd\t{%2, %0|%0, %2}
8426 vmovsd\t{%2, %1, %0|%0, %1, %2}
8427 shufpd\t{$2, %1, %0|%0, %1, 2}
8428 movhpd\t{%H1, %0|%0, %H1}
8429 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8433 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8435 (cond [(eq_attr "alternative" "5")
8436 (const_string "sselog")
8437 (eq_attr "alternative" "9")
8438 (const_string "fmov")
8439 (eq_attr "alternative" "10")
8440 (const_string "imov")
8442 (const_string "ssemov")))
8443 (set_attr "ssememalign" "64")
8444 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8445 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8446 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8447 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8450 [(set (match_operand:V2DF 0 "memory_operand")
8452 (match_operand:DF 1 "register_operand")
8453 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8454 "TARGET_SSE2 && reload_completed"
8455 [(set (match_dup 0) (match_dup 1))]
8456 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8458 (define_insn "sse2_movsd"
8459 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8461 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8462 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8466 movsd\t{%2, %0|%0, %2}
8467 vmovsd\t{%2, %1, %0|%0, %1, %2}
8468 movlpd\t{%2, %0|%0, %q2}
8469 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8470 %vmovlpd\t{%2, %0|%q0, %2}
8471 shufpd\t{$2, %1, %0|%0, %1, 2}
8472 movhps\t{%H1, %0|%0, %H1}
8473 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8474 %vmovhps\t{%1, %H0|%H0, %1}"
8475 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8478 (eq_attr "alternative" "5")
8479 (const_string "sselog")
8480 (const_string "ssemov")))
8481 (set (attr "prefix_data16")
8483 (and (eq_attr "alternative" "2,4")
8484 (not (match_test "TARGET_AVX")))
8486 (const_string "*")))
8487 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8488 (set_attr "ssememalign" "64")
8489 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8490 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8492 (define_insn "vec_dupv2df<mask_name>"
8493 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8495 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8496 "TARGET_SSE2 && <mask_avx512vl_condition>"
8499 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8500 [(set_attr "isa" "noavx,sse3")
8501 (set_attr "type" "sselog1")
8502 (set_attr "prefix" "orig,maybe_vex")
8503 (set_attr "mode" "V2DF,DF")])
8505 (define_insn "*vec_concatv2df"
8506 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8508 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8509 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8512 unpcklpd\t{%2, %0|%0, %2}
8513 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8514 %vmovddup\t{%1, %0|%0, %1}
8515 movhpd\t{%2, %0|%0, %2}
8516 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8517 %vmovsd\t{%1, %0|%0, %1}
8518 movlhps\t{%2, %0|%0, %2}
8519 movhps\t{%2, %0|%0, %2}"
8520 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8523 (eq_attr "alternative" "0,1,2")
8524 (const_string "sselog")
8525 (const_string "ssemov")))
8526 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8527 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8528 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8532 ;; Parallel integer down-conversion operations
8534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8536 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8537 (define_mode_attr pmov_src_mode
8538 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8539 (define_mode_attr pmov_src_lower
8540 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8541 (define_mode_attr pmov_suff_1
8542 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8544 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8545 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8546 (any_truncate:PMOV_DST_MODE_1
8547 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8549 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8550 [(set_attr "type" "ssemov")
8551 (set_attr "memory" "none,store")
8552 (set_attr "prefix" "evex")
8553 (set_attr "mode" "<sseinsnmode>")])
8555 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8556 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8557 (vec_merge:PMOV_DST_MODE_1
8558 (any_truncate:PMOV_DST_MODE_1
8559 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8560 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8561 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8563 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8564 [(set_attr "type" "ssemov")
8565 (set_attr "memory" "none,store")
8566 (set_attr "prefix" "evex")
8567 (set_attr "mode" "<sseinsnmode>")])
8569 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8570 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8571 (vec_merge:PMOV_DST_MODE_1
8572 (any_truncate:PMOV_DST_MODE_1
8573 (match_operand:<pmov_src_mode> 1 "register_operand"))
8575 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8578 (define_insn "*avx512bw_<code>v32hiv32qi2"
8579 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8581 (match_operand:V32HI 1 "register_operand" "v,v")))]
8583 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8584 [(set_attr "type" "ssemov")
8585 (set_attr "memory" "none,store")
8586 (set_attr "prefix" "evex")
8587 (set_attr "mode" "XI")])
8589 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8590 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8593 (match_operand:V32HI 1 "register_operand" "v,v"))
8594 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8595 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8597 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8598 [(set_attr "type" "ssemov")
8599 (set_attr "memory" "none,store")
8600 (set_attr "prefix" "evex")
8601 (set_attr "mode" "XI")])
8603 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8604 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8607 (match_operand:V32HI 1 "register_operand"))
8609 (match_operand:SI 2 "register_operand")))]
8612 (define_mode_iterator PMOV_DST_MODE_2
8613 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8614 (define_mode_attr pmov_suff_2
8615 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8617 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8618 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8619 (any_truncate:PMOV_DST_MODE_2
8620 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8622 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8623 [(set_attr "type" "ssemov")
8624 (set_attr "memory" "none,store")
8625 (set_attr "prefix" "evex")
8626 (set_attr "mode" "<sseinsnmode>")])
8628 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8629 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8630 (vec_merge:PMOV_DST_MODE_2
8631 (any_truncate:PMOV_DST_MODE_2
8632 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8633 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8634 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8636 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8637 [(set_attr "type" "ssemov")
8638 (set_attr "memory" "none,store")
8639 (set_attr "prefix" "evex")
8640 (set_attr "mode" "<sseinsnmode>")])
8642 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8643 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8644 (vec_merge:PMOV_DST_MODE_2
8645 (any_truncate:PMOV_DST_MODE_2
8646 (match_operand:<ssedoublemode> 1 "register_operand"))
8648 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8651 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8652 (define_mode_attr pmov_dst_3
8653 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8654 (define_mode_attr pmov_dst_zeroed_3
8655 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8656 (define_mode_attr pmov_suff_3
8657 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8659 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8660 [(set (match_operand:V16QI 0 "register_operand" "=v")
8662 (any_truncate:<pmov_dst_3>
8663 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8664 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8666 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8667 [(set_attr "type" "ssemov")
8668 (set_attr "prefix" "evex")
8669 (set_attr "mode" "TI")])
8671 (define_insn "*avx512vl_<code>v2div2qi2_store"
8672 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8675 (match_operand:V2DI 1 "register_operand" "v"))
8678 (parallel [(const_int 2) (const_int 3)
8679 (const_int 4) (const_int 5)
8680 (const_int 6) (const_int 7)
8681 (const_int 8) (const_int 9)
8682 (const_int 10) (const_int 11)
8683 (const_int 12) (const_int 13)
8684 (const_int 14) (const_int 15)]))))]
8686 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8687 [(set_attr "type" "ssemov")
8688 (set_attr "memory" "store")
8689 (set_attr "prefix" "evex")
8690 (set_attr "mode" "TI")])
8692 (define_insn "avx512vl_<code>v2div2qi2_mask"
8693 [(set (match_operand:V16QI 0 "register_operand" "=v")
8697 (match_operand:V2DI 1 "register_operand" "v"))
8699 (match_operand:V16QI 2 "vector_move_operand" "0C")
8700 (parallel [(const_int 0) (const_int 1)]))
8701 (match_operand:QI 3 "register_operand" "Yk"))
8702 (const_vector:V14QI [(const_int 0) (const_int 0)
8703 (const_int 0) (const_int 0)
8704 (const_int 0) (const_int 0)
8705 (const_int 0) (const_int 0)
8706 (const_int 0) (const_int 0)
8707 (const_int 0) (const_int 0)
8708 (const_int 0) (const_int 0)])))]
8710 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8711 [(set_attr "type" "ssemov")
8712 (set_attr "prefix" "evex")
8713 (set_attr "mode" "TI")])
8715 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8716 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8720 (match_operand:V2DI 1 "register_operand" "v"))
8723 (parallel [(const_int 0) (const_int 1)]))
8724 (match_operand:QI 2 "register_operand" "Yk"))
8727 (parallel [(const_int 2) (const_int 3)
8728 (const_int 4) (const_int 5)
8729 (const_int 6) (const_int 7)
8730 (const_int 8) (const_int 9)
8731 (const_int 10) (const_int 11)
8732 (const_int 12) (const_int 13)
8733 (const_int 14) (const_int 15)]))))]
8735 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8736 [(set_attr "type" "ssemov")
8737 (set_attr "memory" "store")
8738 (set_attr "prefix" "evex")
8739 (set_attr "mode" "TI")])
8741 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8742 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8745 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8748 (parallel [(const_int 4) (const_int 5)
8749 (const_int 6) (const_int 7)
8750 (const_int 8) (const_int 9)
8751 (const_int 10) (const_int 11)
8752 (const_int 12) (const_int 13)
8753 (const_int 14) (const_int 15)]))))]
8755 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8756 [(set_attr "type" "ssemov")
8757 (set_attr "memory" "store")
8758 (set_attr "prefix" "evex")
8759 (set_attr "mode" "TI")])
8761 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8762 [(set (match_operand:V16QI 0 "register_operand" "=v")
8766 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8768 (match_operand:V16QI 2 "vector_move_operand" "0C")
8769 (parallel [(const_int 0) (const_int 1)
8770 (const_int 2) (const_int 3)]))
8771 (match_operand:QI 3 "register_operand" "Yk"))
8772 (const_vector:V12QI [(const_int 0) (const_int 0)
8773 (const_int 0) (const_int 0)
8774 (const_int 0) (const_int 0)
8775 (const_int 0) (const_int 0)
8776 (const_int 0) (const_int 0)
8777 (const_int 0) (const_int 0)])))]
8779 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8780 [(set_attr "type" "ssemov")
8781 (set_attr "prefix" "evex")
8782 (set_attr "mode" "TI")])
8784 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8785 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8789 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8792 (parallel [(const_int 0) (const_int 1)
8793 (const_int 2) (const_int 3)]))
8794 (match_operand:QI 2 "register_operand" "Yk"))
8797 (parallel [(const_int 4) (const_int 5)
8798 (const_int 6) (const_int 7)
8799 (const_int 8) (const_int 9)
8800 (const_int 10) (const_int 11)
8801 (const_int 12) (const_int 13)
8802 (const_int 14) (const_int 15)]))))]
8804 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8805 [(set_attr "type" "ssemov")
8806 (set_attr "memory" "store")
8807 (set_attr "prefix" "evex")
8808 (set_attr "mode" "TI")])
8810 (define_mode_iterator VI2_128_BW_4_256
8811 [(V8HI "TARGET_AVX512BW") V8SI])
8813 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8814 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8817 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8820 (parallel [(const_int 8) (const_int 9)
8821 (const_int 10) (const_int 11)
8822 (const_int 12) (const_int 13)
8823 (const_int 14) (const_int 15)]))))]
8825 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8826 [(set_attr "type" "ssemov")
8827 (set_attr "memory" "store")
8828 (set_attr "prefix" "evex")
8829 (set_attr "mode" "TI")])
8831 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8832 [(set (match_operand:V16QI 0 "register_operand" "=v")
8836 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8838 (match_operand:V16QI 2 "vector_move_operand" "0C")
8839 (parallel [(const_int 0) (const_int 1)
8840 (const_int 2) (const_int 3)
8841 (const_int 4) (const_int 5)
8842 (const_int 6) (const_int 7)]))
8843 (match_operand:QI 3 "register_operand" "Yk"))
8844 (const_vector:V8QI [(const_int 0) (const_int 0)
8845 (const_int 0) (const_int 0)
8846 (const_int 0) (const_int 0)
8847 (const_int 0) (const_int 0)])))]
8849 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8850 [(set_attr "type" "ssemov")
8851 (set_attr "prefix" "evex")
8852 (set_attr "mode" "TI")])
8854 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8855 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8859 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8862 (parallel [(const_int 0) (const_int 1)
8863 (const_int 2) (const_int 3)
8864 (const_int 4) (const_int 5)
8865 (const_int 6) (const_int 7)]))
8866 (match_operand:QI 2 "register_operand" "Yk"))
8869 (parallel [(const_int 8) (const_int 9)
8870 (const_int 10) (const_int 11)
8871 (const_int 12) (const_int 13)
8872 (const_int 14) (const_int 15)]))))]
8874 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8875 [(set_attr "type" "ssemov")
8876 (set_attr "memory" "store")
8877 (set_attr "prefix" "evex")
8878 (set_attr "mode" "TI")])
8880 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8881 (define_mode_attr pmov_dst_4
8882 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8883 (define_mode_attr pmov_dst_zeroed_4
8884 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8885 (define_mode_attr pmov_suff_4
8886 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8888 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8889 [(set (match_operand:V8HI 0 "register_operand" "=v")
8891 (any_truncate:<pmov_dst_4>
8892 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8893 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8895 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8896 [(set_attr "type" "ssemov")
8897 (set_attr "prefix" "evex")
8898 (set_attr "mode" "TI")])
8900 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8901 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8904 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8907 (parallel [(const_int 4) (const_int 5)
8908 (const_int 6) (const_int 7)]))))]
8910 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8911 [(set_attr "type" "ssemov")
8912 (set_attr "memory" "store")
8913 (set_attr "prefix" "evex")
8914 (set_attr "mode" "TI")])
8916 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8917 [(set (match_operand:V8HI 0 "register_operand" "=v")
8921 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8923 (match_operand:V8HI 2 "vector_move_operand" "0C")
8924 (parallel [(const_int 0) (const_int 1)
8925 (const_int 2) (const_int 3)]))
8926 (match_operand:QI 3 "register_operand" "Yk"))
8927 (const_vector:V4HI [(const_int 0) (const_int 0)
8928 (const_int 0) (const_int 0)])))]
8930 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8931 [(set_attr "type" "ssemov")
8932 (set_attr "prefix" "evex")
8933 (set_attr "mode" "TI")])
8935 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8936 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8940 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8943 (parallel [(const_int 0) (const_int 1)
8944 (const_int 2) (const_int 3)]))
8945 (match_operand:QI 2 "register_operand" "Yk"))
8948 (parallel [(const_int 4) (const_int 5)
8949 (const_int 6) (const_int 7)]))))]
8951 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8952 [(set_attr "type" "ssemov")
8953 (set_attr "memory" "store")
8954 (set_attr "prefix" "evex")
8955 (set_attr "mode" "TI")])
8957 (define_insn "*avx512vl_<code>v2div2hi2_store"
8958 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8961 (match_operand:V2DI 1 "register_operand" "v"))
8964 (parallel [(const_int 2) (const_int 3)
8965 (const_int 4) (const_int 5)
8966 (const_int 6) (const_int 7)]))))]
8968 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8969 [(set_attr "type" "ssemov")
8970 (set_attr "memory" "store")
8971 (set_attr "prefix" "evex")
8972 (set_attr "mode" "TI")])
8974 (define_insn "avx512vl_<code>v2div2hi2_mask"
8975 [(set (match_operand:V8HI 0 "register_operand" "=v")
8979 (match_operand:V2DI 1 "register_operand" "v"))
8981 (match_operand:V8HI 2 "vector_move_operand" "0C")
8982 (parallel [(const_int 0) (const_int 1)]))
8983 (match_operand:QI 3 "register_operand" "Yk"))
8984 (const_vector:V6HI [(const_int 0) (const_int 0)
8985 (const_int 0) (const_int 0)
8986 (const_int 0) (const_int 0)])))]
8988 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8989 [(set_attr "type" "ssemov")
8990 (set_attr "prefix" "evex")
8991 (set_attr "mode" "TI")])
8993 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
8994 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8998 (match_operand:V2DI 1 "register_operand" "v"))
9001 (parallel [(const_int 0) (const_int 1)]))
9002 (match_operand:QI 2 "register_operand" "Yk"))
9005 (parallel [(const_int 2) (const_int 3)
9006 (const_int 4) (const_int 5)
9007 (const_int 6) (const_int 7)]))))]
9009 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9010 [(set_attr "type" "ssemov")
9011 (set_attr "memory" "store")
9012 (set_attr "prefix" "evex")
9013 (set_attr "mode" "TI")])
9015 (define_insn "*avx512vl_<code>v2div2si2"
9016 [(set (match_operand:V4SI 0 "register_operand" "=v")
9019 (match_operand:V2DI 1 "register_operand" "v"))
9020 (match_operand:V2SI 2 "const0_operand")))]
9022 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9023 [(set_attr "type" "ssemov")
9024 (set_attr "prefix" "evex")
9025 (set_attr "mode" "TI")])
9027 (define_insn "*avx512vl_<code>v2div2si2_store"
9028 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9031 (match_operand:V2DI 1 "register_operand" "v"))
9034 (parallel [(const_int 2) (const_int 3)]))))]
9036 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9037 [(set_attr "type" "ssemov")
9038 (set_attr "memory" "store")
9039 (set_attr "prefix" "evex")
9040 (set_attr "mode" "TI")])
9042 (define_insn "avx512vl_<code>v2div2si2_mask"
9043 [(set (match_operand:V4SI 0 "register_operand" "=v")
9047 (match_operand:V2DI 1 "register_operand" "v"))
9049 (match_operand:V4SI 2 "vector_move_operand" "0C")
9050 (parallel [(const_int 0) (const_int 1)]))
9051 (match_operand:QI 3 "register_operand" "Yk"))
9052 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9054 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9055 [(set_attr "type" "ssemov")
9056 (set_attr "prefix" "evex")
9057 (set_attr "mode" "TI")])
9059 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9060 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9064 (match_operand:V2DI 1 "register_operand" "v"))
9067 (parallel [(const_int 0) (const_int 1)]))
9068 (match_operand:QI 2 "register_operand" "Yk"))
9071 (parallel [(const_int 2) (const_int 3)]))))]
9073 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9074 [(set_attr "type" "ssemov")
9075 (set_attr "memory" "store")
9076 (set_attr "prefix" "evex")
9077 (set_attr "mode" "TI")])
9079 (define_insn "*avx512f_<code>v8div16qi2"
9080 [(set (match_operand:V16QI 0 "register_operand" "=v")
9083 (match_operand:V8DI 1 "register_operand" "v"))
9084 (const_vector:V8QI [(const_int 0) (const_int 0)
9085 (const_int 0) (const_int 0)
9086 (const_int 0) (const_int 0)
9087 (const_int 0) (const_int 0)])))]
9089 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9090 [(set_attr "type" "ssemov")
9091 (set_attr "prefix" "evex")
9092 (set_attr "mode" "TI")])
9094 (define_insn "*avx512f_<code>v8div16qi2_store"
9095 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9098 (match_operand:V8DI 1 "register_operand" "v"))
9101 (parallel [(const_int 8) (const_int 9)
9102 (const_int 10) (const_int 11)
9103 (const_int 12) (const_int 13)
9104 (const_int 14) (const_int 15)]))))]
9106 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9107 [(set_attr "type" "ssemov")
9108 (set_attr "memory" "store")
9109 (set_attr "prefix" "evex")
9110 (set_attr "mode" "TI")])
9112 (define_insn "avx512f_<code>v8div16qi2_mask"
9113 [(set (match_operand:V16QI 0 "register_operand" "=v")
9117 (match_operand:V8DI 1 "register_operand" "v"))
9119 (match_operand:V16QI 2 "vector_move_operand" "0C")
9120 (parallel [(const_int 0) (const_int 1)
9121 (const_int 2) (const_int 3)
9122 (const_int 4) (const_int 5)
9123 (const_int 6) (const_int 7)]))
9124 (match_operand:QI 3 "register_operand" "Yk"))
9125 (const_vector:V8QI [(const_int 0) (const_int 0)
9126 (const_int 0) (const_int 0)
9127 (const_int 0) (const_int 0)
9128 (const_int 0) (const_int 0)])))]
9130 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9131 [(set_attr "type" "ssemov")
9132 (set_attr "prefix" "evex")
9133 (set_attr "mode" "TI")])
9135 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9136 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9140 (match_operand:V8DI 1 "register_operand" "v"))
9143 (parallel [(const_int 0) (const_int 1)
9144 (const_int 2) (const_int 3)
9145 (const_int 4) (const_int 5)
9146 (const_int 6) (const_int 7)]))
9147 (match_operand:QI 2 "register_operand" "Yk"))
9150 (parallel [(const_int 8) (const_int 9)
9151 (const_int 10) (const_int 11)
9152 (const_int 12) (const_int 13)
9153 (const_int 14) (const_int 15)]))))]
9155 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9156 [(set_attr "type" "ssemov")
9157 (set_attr "memory" "store")
9158 (set_attr "prefix" "evex")
9159 (set_attr "mode" "TI")])
9161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9163 ;; Parallel integral arithmetic
9165 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9167 (define_expand "neg<mode>2"
9168 [(set (match_operand:VI_AVX2 0 "register_operand")
9171 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9173 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9175 (define_expand "<plusminus_insn><mode>3"
9176 [(set (match_operand:VI_AVX2 0 "register_operand")
9178 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9179 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9181 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9183 (define_expand "<plusminus_insn><mode>3_mask"
9184 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9185 (vec_merge:VI48_AVX512VL
9186 (plusminus:VI48_AVX512VL
9187 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9188 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9189 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9190 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9192 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9194 (define_expand "<plusminus_insn><mode>3_mask"
9195 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9196 (vec_merge:VI12_AVX512VL
9197 (plusminus:VI12_AVX512VL
9198 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9199 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9200 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9201 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9203 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9205 (define_insn "*<plusminus_insn><mode>3"
9206 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9208 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9209 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9211 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9213 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9214 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9215 [(set_attr "isa" "noavx,avx")
9216 (set_attr "type" "sseiadd")
9217 (set_attr "prefix_data16" "1,*")
9218 (set_attr "prefix" "<mask_prefix3>")
9219 (set_attr "mode" "<sseinsnmode>")])
9221 (define_insn "*<plusminus_insn><mode>3_mask"
9222 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9223 (vec_merge:VI48_AVX512VL
9224 (plusminus:VI48_AVX512VL
9225 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9226 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9227 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9228 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9230 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9231 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9232 [(set_attr "type" "sseiadd")
9233 (set_attr "prefix" "evex")
9234 (set_attr "mode" "<sseinsnmode>")])
9236 (define_insn "*<plusminus_insn><mode>3_mask"
9237 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9238 (vec_merge:VI12_AVX512VL
9239 (plusminus:VI12_AVX512VL
9240 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9241 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9242 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9243 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9244 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9245 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9246 [(set_attr "type" "sseiadd")
9247 (set_attr "prefix" "evex")
9248 (set_attr "mode" "<sseinsnmode>")])
9250 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9251 [(set (match_operand:VI12_AVX2 0 "register_operand")
9252 (sat_plusminus:VI12_AVX2
9253 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9254 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9255 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9256 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9258 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9259 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9260 (sat_plusminus:VI12_AVX2
9261 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9262 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9263 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9264 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9266 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9267 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9268 [(set_attr "isa" "noavx,avx")
9269 (set_attr "type" "sseiadd")
9270 (set_attr "prefix_data16" "1,*")
9271 (set_attr "prefix" "orig,maybe_evex")
9272 (set_attr "mode" "TI")])
9274 (define_expand "mul<mode>3<mask_name>"
9275 [(set (match_operand:VI1_AVX512 0 "register_operand")
9276 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9277 (match_operand:VI1_AVX512 2 "register_operand")))]
9278 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9280 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9284 (define_expand "mul<mode>3<mask_name>"
9285 [(set (match_operand:VI2_AVX2 0 "register_operand")
9286 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9287 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9288 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9289 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9291 (define_insn "*mul<mode>3<mask_name>"
9292 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9293 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9294 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9296 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9297 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9299 pmullw\t{%2, %0|%0, %2}
9300 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9301 [(set_attr "isa" "noavx,avx")
9302 (set_attr "type" "sseimul")
9303 (set_attr "prefix_data16" "1,*")
9304 (set_attr "prefix" "orig,vex")
9305 (set_attr "mode" "<sseinsnmode>")])
9307 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9308 [(set (match_operand:VI2_AVX2 0 "register_operand")
9310 (lshiftrt:<ssedoublemode>
9311 (mult:<ssedoublemode>
9312 (any_extend:<ssedoublemode>
9313 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9314 (any_extend:<ssedoublemode>
9315 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9318 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9319 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9321 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9322 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9324 (lshiftrt:<ssedoublemode>
9325 (mult:<ssedoublemode>
9326 (any_extend:<ssedoublemode>
9327 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9328 (any_extend:<ssedoublemode>
9329 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9332 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9333 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9335 pmulh<u>w\t{%2, %0|%0, %2}
9336 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9337 [(set_attr "isa" "noavx,avx")
9338 (set_attr "type" "sseimul")
9339 (set_attr "prefix_data16" "1,*")
9340 (set_attr "prefix" "orig,vex")
9341 (set_attr "mode" "<sseinsnmode>")])
9343 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9344 [(set (match_operand:V8DI 0 "register_operand")
9348 (match_operand:V16SI 1 "nonimmediate_operand")
9349 (parallel [(const_int 0) (const_int 2)
9350 (const_int 4) (const_int 6)
9351 (const_int 8) (const_int 10)
9352 (const_int 12) (const_int 14)])))
9355 (match_operand:V16SI 2 "nonimmediate_operand")
9356 (parallel [(const_int 0) (const_int 2)
9357 (const_int 4) (const_int 6)
9358 (const_int 8) (const_int 10)
9359 (const_int 12) (const_int 14)])))))]
9361 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9363 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9364 [(set (match_operand:V8DI 0 "register_operand" "=v")
9368 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9369 (parallel [(const_int 0) (const_int 2)
9370 (const_int 4) (const_int 6)
9371 (const_int 8) (const_int 10)
9372 (const_int 12) (const_int 14)])))
9375 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9376 (parallel [(const_int 0) (const_int 2)
9377 (const_int 4) (const_int 6)
9378 (const_int 8) (const_int 10)
9379 (const_int 12) (const_int 14)])))))]
9380 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9381 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9382 [(set_attr "isa" "avx512f")
9383 (set_attr "type" "sseimul")
9384 (set_attr "prefix_extra" "1")
9385 (set_attr "prefix" "evex")
9386 (set_attr "mode" "XI")])
9388 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9389 [(set (match_operand:V4DI 0 "register_operand")
9393 (match_operand:V8SI 1 "nonimmediate_operand")
9394 (parallel [(const_int 0) (const_int 2)
9395 (const_int 4) (const_int 6)])))
9398 (match_operand:V8SI 2 "nonimmediate_operand")
9399 (parallel [(const_int 0) (const_int 2)
9400 (const_int 4) (const_int 6)])))))]
9401 "TARGET_AVX2 && <mask_avx512vl_condition>"
9402 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9404 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9405 [(set (match_operand:V4DI 0 "register_operand" "=v")
9409 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9410 (parallel [(const_int 0) (const_int 2)
9411 (const_int 4) (const_int 6)])))
9414 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9415 (parallel [(const_int 0) (const_int 2)
9416 (const_int 4) (const_int 6)])))))]
9417 "TARGET_AVX2 && <mask_avx512vl_condition>
9418 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9419 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9420 [(set_attr "type" "sseimul")
9421 (set_attr "prefix" "maybe_evex")
9422 (set_attr "mode" "OI")])
9424 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9425 [(set (match_operand:V2DI 0 "register_operand")
9429 (match_operand:V4SI 1 "nonimmediate_operand")
9430 (parallel [(const_int 0) (const_int 2)])))
9433 (match_operand:V4SI 2 "nonimmediate_operand")
9434 (parallel [(const_int 0) (const_int 2)])))))]
9435 "TARGET_SSE2 && <mask_avx512vl_condition>"
9436 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9438 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9439 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9443 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9444 (parallel [(const_int 0) (const_int 2)])))
9447 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9448 (parallel [(const_int 0) (const_int 2)])))))]
9449 "TARGET_SSE2 && <mask_avx512vl_condition>
9450 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9452 pmuludq\t{%2, %0|%0, %2}
9453 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9454 [(set_attr "isa" "noavx,avx")
9455 (set_attr "type" "sseimul")
9456 (set_attr "prefix_data16" "1,*")
9457 (set_attr "prefix" "orig,maybe_evex")
9458 (set_attr "mode" "TI")])
9460 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9461 [(set (match_operand:V8DI 0 "register_operand")
9465 (match_operand:V16SI 1 "nonimmediate_operand")
9466 (parallel [(const_int 0) (const_int 2)
9467 (const_int 4) (const_int 6)
9468 (const_int 8) (const_int 10)
9469 (const_int 12) (const_int 14)])))
9472 (match_operand:V16SI 2 "nonimmediate_operand")
9473 (parallel [(const_int 0) (const_int 2)
9474 (const_int 4) (const_int 6)
9475 (const_int 8) (const_int 10)
9476 (const_int 12) (const_int 14)])))))]
9478 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9480 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9481 [(set (match_operand:V8DI 0 "register_operand" "=v")
9485 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9486 (parallel [(const_int 0) (const_int 2)
9487 (const_int 4) (const_int 6)
9488 (const_int 8) (const_int 10)
9489 (const_int 12) (const_int 14)])))
9492 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9493 (parallel [(const_int 0) (const_int 2)
9494 (const_int 4) (const_int 6)
9495 (const_int 8) (const_int 10)
9496 (const_int 12) (const_int 14)])))))]
9497 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9498 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9499 [(set_attr "isa" "avx512f")
9500 (set_attr "type" "sseimul")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "prefix" "evex")
9503 (set_attr "mode" "XI")])
9505 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9506 [(set (match_operand:V4DI 0 "register_operand")
9510 (match_operand:V8SI 1 "nonimmediate_operand")
9511 (parallel [(const_int 0) (const_int 2)
9512 (const_int 4) (const_int 6)])))
9515 (match_operand:V8SI 2 "nonimmediate_operand")
9516 (parallel [(const_int 0) (const_int 2)
9517 (const_int 4) (const_int 6)])))))]
9518 "TARGET_AVX2 && <mask_avx512vl_condition>"
9519 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9521 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9522 [(set (match_operand:V4DI 0 "register_operand" "=v")
9526 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9527 (parallel [(const_int 0) (const_int 2)
9528 (const_int 4) (const_int 6)])))
9531 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9532 (parallel [(const_int 0) (const_int 2)
9533 (const_int 4) (const_int 6)])))))]
9535 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9536 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9537 [(set_attr "type" "sseimul")
9538 (set_attr "prefix_extra" "1")
9539 (set_attr "prefix" "vex")
9540 (set_attr "mode" "OI")])
9542 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9543 [(set (match_operand:V2DI 0 "register_operand")
9547 (match_operand:V4SI 1 "nonimmediate_operand")
9548 (parallel [(const_int 0) (const_int 2)])))
9551 (match_operand:V4SI 2 "nonimmediate_operand")
9552 (parallel [(const_int 0) (const_int 2)])))))]
9553 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9554 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9556 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9557 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9561 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
9562 (parallel [(const_int 0) (const_int 2)])))
9565 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
9566 (parallel [(const_int 0) (const_int 2)])))))]
9567 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9568 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9570 pmuldq\t{%2, %0|%0, %2}
9571 pmuldq\t{%2, %0|%0, %2}
9572 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9573 [(set_attr "isa" "noavx,noavx,avx")
9574 (set_attr "type" "sseimul")
9575 (set_attr "prefix_data16" "1,1,*")
9576 (set_attr "prefix_extra" "1")
9577 (set_attr "prefix" "orig,orig,vex")
9578 (set_attr "mode" "TI")])
9580 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9581 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9582 (unspec:<sseunpackmode>
9583 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9584 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9585 UNSPEC_PMADDWD512))]
9586 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9587 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9588 [(set_attr "type" "sseiadd")
9589 (set_attr "prefix" "evex")
9590 (set_attr "mode" "XI")])
9592 (define_expand "avx2_pmaddwd"
9593 [(set (match_operand:V8SI 0 "register_operand")
9598 (match_operand:V16HI 1 "nonimmediate_operand")
9599 (parallel [(const_int 0) (const_int 2)
9600 (const_int 4) (const_int 6)
9601 (const_int 8) (const_int 10)
9602 (const_int 12) (const_int 14)])))
9605 (match_operand:V16HI 2 "nonimmediate_operand")
9606 (parallel [(const_int 0) (const_int 2)
9607 (const_int 4) (const_int 6)
9608 (const_int 8) (const_int 10)
9609 (const_int 12) (const_int 14)]))))
9612 (vec_select:V8HI (match_dup 1)
9613 (parallel [(const_int 1) (const_int 3)
9614 (const_int 5) (const_int 7)
9615 (const_int 9) (const_int 11)
9616 (const_int 13) (const_int 15)])))
9618 (vec_select:V8HI (match_dup 2)
9619 (parallel [(const_int 1) (const_int 3)
9620 (const_int 5) (const_int 7)
9621 (const_int 9) (const_int 11)
9622 (const_int 13) (const_int 15)]))))))]
9624 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9626 (define_insn "*avx2_pmaddwd"
9627 [(set (match_operand:V8SI 0 "register_operand" "=x")
9632 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9633 (parallel [(const_int 0) (const_int 2)
9634 (const_int 4) (const_int 6)
9635 (const_int 8) (const_int 10)
9636 (const_int 12) (const_int 14)])))
9639 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9640 (parallel [(const_int 0) (const_int 2)
9641 (const_int 4) (const_int 6)
9642 (const_int 8) (const_int 10)
9643 (const_int 12) (const_int 14)]))))
9646 (vec_select:V8HI (match_dup 1)
9647 (parallel [(const_int 1) (const_int 3)
9648 (const_int 5) (const_int 7)
9649 (const_int 9) (const_int 11)
9650 (const_int 13) (const_int 15)])))
9652 (vec_select:V8HI (match_dup 2)
9653 (parallel [(const_int 1) (const_int 3)
9654 (const_int 5) (const_int 7)
9655 (const_int 9) (const_int 11)
9656 (const_int 13) (const_int 15)]))))))]
9657 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9658 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9659 [(set_attr "type" "sseiadd")
9660 (set_attr "prefix" "vex")
9661 (set_attr "mode" "OI")])
9663 (define_expand "sse2_pmaddwd"
9664 [(set (match_operand:V4SI 0 "register_operand")
9669 (match_operand:V8HI 1 "nonimmediate_operand")
9670 (parallel [(const_int 0) (const_int 2)
9671 (const_int 4) (const_int 6)])))
9674 (match_operand:V8HI 2 "nonimmediate_operand")
9675 (parallel [(const_int 0) (const_int 2)
9676 (const_int 4) (const_int 6)]))))
9679 (vec_select:V4HI (match_dup 1)
9680 (parallel [(const_int 1) (const_int 3)
9681 (const_int 5) (const_int 7)])))
9683 (vec_select:V4HI (match_dup 2)
9684 (parallel [(const_int 1) (const_int 3)
9685 (const_int 5) (const_int 7)]))))))]
9687 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9689 (define_insn "*sse2_pmaddwd"
9690 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9695 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9696 (parallel [(const_int 0) (const_int 2)
9697 (const_int 4) (const_int 6)])))
9700 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9701 (parallel [(const_int 0) (const_int 2)
9702 (const_int 4) (const_int 6)]))))
9705 (vec_select:V4HI (match_dup 1)
9706 (parallel [(const_int 1) (const_int 3)
9707 (const_int 5) (const_int 7)])))
9709 (vec_select:V4HI (match_dup 2)
9710 (parallel [(const_int 1) (const_int 3)
9711 (const_int 5) (const_int 7)]))))))]
9712 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9714 pmaddwd\t{%2, %0|%0, %2}
9715 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9716 [(set_attr "isa" "noavx,avx")
9717 (set_attr "type" "sseiadd")
9718 (set_attr "atom_unit" "simul")
9719 (set_attr "prefix_data16" "1,*")
9720 (set_attr "prefix" "orig,vex")
9721 (set_attr "mode" "TI")])
9723 (define_insn "avx512dq_mul<mode>3<mask_name>"
9724 [(set (match_operand:VI8 0 "register_operand" "=v")
9726 (match_operand:VI8 1 "register_operand" "v")
9727 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9728 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9729 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9730 [(set_attr "type" "sseimul")
9731 (set_attr "prefix" "evex")
9732 (set_attr "mode" "<sseinsnmode>")])
9734 (define_expand "mul<mode>3<mask_name>"
9735 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9737 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9738 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9739 "TARGET_SSE2 && <mask_mode512bit_condition>"
9743 if (!nonimmediate_operand (operands[1], <MODE>mode))
9744 operands[1] = force_reg (<MODE>mode, operands[1]);
9745 if (!nonimmediate_operand (operands[2], <MODE>mode))
9746 operands[2] = force_reg (<MODE>mode, operands[2]);
9747 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9751 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9756 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9757 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9759 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
9760 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
9761 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9763 pmulld\t{%2, %0|%0, %2}
9764 pmulld\t{%2, %0|%0, %2}
9765 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9766 [(set_attr "isa" "noavx,noavx,avx")
9767 (set_attr "type" "sseimul")
9768 (set_attr "prefix_extra" "1")
9769 (set_attr "prefix" "<mask_prefix4>")
9770 (set_attr "btver2_decode" "vector,vector,vector")
9771 (set_attr "mode" "<sseinsnmode>")])
9773 (define_expand "mul<mode>3"
9774 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9775 (mult:VI8_AVX2_AVX512F
9776 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9777 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9780 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9784 (define_expand "vec_widen_<s>mult_hi_<mode>"
9785 [(match_operand:<sseunpackmode> 0 "register_operand")
9786 (any_extend:<sseunpackmode>
9787 (match_operand:VI124_AVX2 1 "register_operand"))
9788 (match_operand:VI124_AVX2 2 "register_operand")]
9791 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9796 (define_expand "vec_widen_<s>mult_lo_<mode>"
9797 [(match_operand:<sseunpackmode> 0 "register_operand")
9798 (any_extend:<sseunpackmode>
9799 (match_operand:VI124_AVX2 1 "register_operand"))
9800 (match_operand:VI124_AVX2 2 "register_operand")]
9803 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9808 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9809 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9810 (define_expand "vec_widen_smult_even_v4si"
9811 [(match_operand:V2DI 0 "register_operand")
9812 (match_operand:V4SI 1 "nonimmediate_operand")
9813 (match_operand:V4SI 2 "nonimmediate_operand")]
9816 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9821 (define_expand "vec_widen_<s>mult_odd_<mode>"
9822 [(match_operand:<sseunpackmode> 0 "register_operand")
9823 (any_extend:<sseunpackmode>
9824 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9825 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9828 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9833 (define_mode_attr SDOT_PMADD_SUF
9834 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9836 (define_expand "sdot_prod<mode>"
9837 [(match_operand:<sseunpackmode> 0 "register_operand")
9838 (match_operand:VI2_AVX2 1 "register_operand")
9839 (match_operand:VI2_AVX2 2 "register_operand")
9840 (match_operand:<sseunpackmode> 3 "register_operand")]
9843 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9844 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9845 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9846 gen_rtx_PLUS (<sseunpackmode>mode,
9851 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9852 ;; back together when madd is available.
9853 (define_expand "sdot_prodv4si"
9854 [(match_operand:V2DI 0 "register_operand")
9855 (match_operand:V4SI 1 "register_operand")
9856 (match_operand:V4SI 2 "register_operand")
9857 (match_operand:V2DI 3 "register_operand")]
9860 rtx t = gen_reg_rtx (V2DImode);
9861 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9862 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9866 (define_expand "usadv16qi"
9867 [(match_operand:V4SI 0 "register_operand")
9868 (match_operand:V16QI 1 "register_operand")
9869 (match_operand:V16QI 2 "nonimmediate_operand")
9870 (match_operand:V4SI 3 "nonimmediate_operand")]
9873 rtx t1 = gen_reg_rtx (V2DImode);
9874 rtx t2 = gen_reg_rtx (V4SImode);
9875 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9876 convert_move (t2, t1, 0);
9877 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9881 (define_expand "usadv32qi"
9882 [(match_operand:V8SI 0 "register_operand")
9883 (match_operand:V32QI 1 "register_operand")
9884 (match_operand:V32QI 2 "nonimmediate_operand")
9885 (match_operand:V8SI 3 "nonimmediate_operand")]
9888 rtx t1 = gen_reg_rtx (V4DImode);
9889 rtx t2 = gen_reg_rtx (V8SImode);
9890 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9891 convert_move (t2, t1, 0);
9892 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9896 (define_insn "ashr<mode>3"
9897 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9899 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9900 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9903 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9904 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9905 [(set_attr "isa" "noavx,avx")
9906 (set_attr "type" "sseishft")
9907 (set (attr "length_immediate")
9908 (if_then_else (match_operand 2 "const_int_operand")
9910 (const_string "0")))
9911 (set_attr "prefix_data16" "1,*")
9912 (set_attr "prefix" "orig,vex")
9913 (set_attr "mode" "<sseinsnmode>")])
9915 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9916 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9917 (ashiftrt:VI24_AVX512BW_1
9918 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9919 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9921 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9922 [(set_attr "type" "sseishft")
9923 (set (attr "length_immediate")
9924 (if_then_else (match_operand 2 "const_int_operand")
9926 (const_string "0")))
9927 (set_attr "mode" "<sseinsnmode>")])
9929 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
9930 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9932 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9933 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9935 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9936 [(set_attr "type" "sseishft")
9937 (set (attr "length_immediate")
9938 (if_then_else (match_operand 2 "const_int_operand")
9940 (const_string "0")))
9941 (set_attr "mode" "TI")])
9943 (define_insn "ashr<mode>3<mask_name>"
9944 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9945 (ashiftrt:VI248_AVX512BW_AVX512VL
9946 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9947 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9949 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9950 [(set_attr "type" "sseishft")
9951 (set (attr "length_immediate")
9952 (if_then_else (match_operand 2 "const_int_operand")
9954 (const_string "0")))
9955 (set_attr "mode" "<sseinsnmode>")])
9957 (define_insn "<shift_insn><mode>3<mask_name>"
9958 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9959 (any_lshift:VI2_AVX2_AVX512BW
9960 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9961 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9962 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9964 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9965 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9966 [(set_attr "isa" "noavx,avx")
9967 (set_attr "type" "sseishft")
9968 (set (attr "length_immediate")
9969 (if_then_else (match_operand 2 "const_int_operand")
9971 (const_string "0")))
9972 (set_attr "prefix_data16" "1,*")
9973 (set_attr "prefix" "orig,vex")
9974 (set_attr "mode" "<sseinsnmode>")])
9976 (define_insn "<shift_insn><mode>3<mask_name>"
9977 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
9978 (any_lshift:VI48_AVX2
9979 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
9980 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9981 "TARGET_SSE2 && <mask_mode512bit_condition>"
9983 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9984 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9985 [(set_attr "isa" "noavx,avx")
9986 (set_attr "type" "sseishft")
9987 (set (attr "length_immediate")
9988 (if_then_else (match_operand 2 "const_int_operand")
9990 (const_string "0")))
9991 (set_attr "prefix_data16" "1,*")
9992 (set_attr "prefix" "orig,vex")
9993 (set_attr "mode" "<sseinsnmode>")])
9995 (define_insn "<shift_insn><mode>3<mask_name>"
9996 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9997 (any_lshift:VI48_512
9998 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
9999 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10000 "TARGET_AVX512F && <mask_mode512bit_condition>"
10001 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10002 [(set_attr "isa" "avx512f")
10003 (set_attr "type" "sseishft")
10004 (set (attr "length_immediate")
10005 (if_then_else (match_operand 2 "const_int_operand")
10007 (const_string "0")))
10008 (set_attr "prefix" "evex")
10009 (set_attr "mode" "<sseinsnmode>")])
10012 (define_expand "vec_shl_<mode>"
10013 [(set (match_dup 3)
10015 (match_operand:VI_128 1 "register_operand")
10016 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10017 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10020 operands[1] = gen_lowpart (V1TImode, operands[1]);
10021 operands[3] = gen_reg_rtx (V1TImode);
10022 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10025 (define_insn "<sse2_avx2>_ashl<mode>3"
10026 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10028 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10029 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10032 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10034 switch (which_alternative)
10037 return "pslldq\t{%2, %0|%0, %2}";
10039 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10041 gcc_unreachable ();
10044 [(set_attr "isa" "noavx,avx")
10045 (set_attr "type" "sseishft")
10046 (set_attr "length_immediate" "1")
10047 (set_attr "prefix_data16" "1,*")
10048 (set_attr "prefix" "orig,vex")
10049 (set_attr "mode" "<sseinsnmode>")])
10051 (define_expand "vec_shr_<mode>"
10052 [(set (match_dup 3)
10054 (match_operand:VI_128 1 "register_operand")
10055 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10056 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10059 operands[1] = gen_lowpart (V1TImode, operands[1]);
10060 operands[3] = gen_reg_rtx (V1TImode);
10061 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10064 (define_insn "<sse2_avx2>_lshr<mode>3"
10065 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10066 (lshiftrt:VIMAX_AVX2
10067 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10068 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10071 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10073 switch (which_alternative)
10076 return "psrldq\t{%2, %0|%0, %2}";
10078 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10080 gcc_unreachable ();
10083 [(set_attr "isa" "noavx,avx")
10084 (set_attr "type" "sseishft")
10085 (set_attr "length_immediate" "1")
10086 (set_attr "atom_unit" "sishuf")
10087 (set_attr "prefix_data16" "1,*")
10088 (set_attr "prefix" "orig,vex")
10089 (set_attr "mode" "<sseinsnmode>")])
10091 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10092 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10093 (any_rotate:VI48_AVX512VL
10094 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10095 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10097 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10098 [(set_attr "prefix" "evex")
10099 (set_attr "mode" "<sseinsnmode>")])
10101 (define_insn "<avx512>_<rotate><mode><mask_name>"
10102 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10103 (any_rotate:VI48_AVX512VL
10104 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10105 (match_operand:SI 2 "const_0_to_255_operand")))]
10107 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10108 [(set_attr "prefix" "evex")
10109 (set_attr "mode" "<sseinsnmode>")])
10111 (define_expand "<code><mode>3"
10112 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10113 (maxmin:VI124_256_AVX512F_AVX512BW
10114 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10115 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10117 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10119 (define_insn "*avx2_<code><mode>3"
10120 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10122 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10123 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10124 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10125 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10126 [(set_attr "type" "sseiadd")
10127 (set_attr "prefix_extra" "1")
10128 (set_attr "prefix" "vex")
10129 (set_attr "mode" "OI")])
10131 (define_expand "<code><mode>3_mask"
10132 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10133 (vec_merge:VI48_AVX512VL
10134 (maxmin:VI48_AVX512VL
10135 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10136 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10137 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10138 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10140 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10142 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10143 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10144 (maxmin:VI48_AVX512VL
10145 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10146 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10147 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10148 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10149 [(set_attr "type" "sseiadd")
10150 (set_attr "prefix_extra" "1")
10151 (set_attr "prefix" "maybe_evex")
10152 (set_attr "mode" "<sseinsnmode>")])
10154 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10155 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10156 (maxmin:VI12_AVX512VL
10157 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10158 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10160 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10161 [(set_attr "type" "sseiadd")
10162 (set_attr "prefix" "evex")
10163 (set_attr "mode" "<sseinsnmode>")])
10165 (define_expand "<code><mode>3"
10166 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10167 (maxmin:VI8_AVX2_AVX512BW
10168 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10169 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10173 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10174 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10177 enum rtx_code code;
10182 xops[0] = operands[0];
10184 if (<CODE> == SMAX || <CODE> == UMAX)
10186 xops[1] = operands[1];
10187 xops[2] = operands[2];
10191 xops[1] = operands[2];
10192 xops[2] = operands[1];
10195 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10197 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10198 xops[4] = operands[1];
10199 xops[5] = operands[2];
10201 ok = ix86_expand_int_vcond (xops);
10207 (define_expand "<code><mode>3"
10208 [(set (match_operand:VI124_128 0 "register_operand")
10210 (match_operand:VI124_128 1 "nonimmediate_operand")
10211 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10214 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10215 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10221 xops[0] = operands[0];
10222 operands[1] = force_reg (<MODE>mode, operands[1]);
10223 operands[2] = force_reg (<MODE>mode, operands[2]);
10225 if (<CODE> == SMAX)
10227 xops[1] = operands[1];
10228 xops[2] = operands[2];
10232 xops[1] = operands[2];
10233 xops[2] = operands[1];
10236 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10237 xops[4] = operands[1];
10238 xops[5] = operands[2];
10240 ok = ix86_expand_int_vcond (xops);
10246 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10247 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10249 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10250 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10252 && <mask_mode512bit_condition>
10253 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10255 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10256 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10257 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10258 [(set_attr "isa" "noavx,noavx,avx")
10259 (set_attr "type" "sseiadd")
10260 (set_attr "prefix_extra" "1,1,*")
10261 (set_attr "prefix" "orig,orig,vex")
10262 (set_attr "mode" "TI")])
10264 (define_insn "*<code>v8hi3"
10265 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10267 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10268 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10269 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10271 p<maxmin_int>w\t{%2, %0|%0, %2}
10272 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10273 [(set_attr "isa" "noavx,avx")
10274 (set_attr "type" "sseiadd")
10275 (set_attr "prefix_data16" "1,*")
10276 (set_attr "prefix_extra" "*,1")
10277 (set_attr "prefix" "orig,vex")
10278 (set_attr "mode" "TI")])
10280 (define_expand "<code><mode>3"
10281 [(set (match_operand:VI124_128 0 "register_operand")
10283 (match_operand:VI124_128 1 "nonimmediate_operand")
10284 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10287 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10288 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10289 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10291 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10292 operands[1] = force_reg (<MODE>mode, operands[1]);
10293 if (rtx_equal_p (op3, op2))
10294 op3 = gen_reg_rtx (V8HImode);
10295 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10296 emit_insn (gen_addv8hi3 (op0, op3, op2));
10304 operands[1] = force_reg (<MODE>mode, operands[1]);
10305 operands[2] = force_reg (<MODE>mode, operands[2]);
10307 xops[0] = operands[0];
10309 if (<CODE> == UMAX)
10311 xops[1] = operands[1];
10312 xops[2] = operands[2];
10316 xops[1] = operands[2];
10317 xops[2] = operands[1];
10320 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10321 xops[4] = operands[1];
10322 xops[5] = operands[2];
10324 ok = ix86_expand_int_vcond (xops);
10330 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10331 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10333 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10334 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10336 && <mask_mode512bit_condition>
10337 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10339 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10340 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10341 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10342 [(set_attr "isa" "noavx,noavx,avx")
10343 (set_attr "type" "sseiadd")
10344 (set_attr "prefix_extra" "1,1,*")
10345 (set_attr "prefix" "orig,orig,vex")
10346 (set_attr "mode" "TI")])
10348 (define_insn "*<code>v16qi3"
10349 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10351 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10352 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10353 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10355 p<maxmin_int>b\t{%2, %0|%0, %2}
10356 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10357 [(set_attr "isa" "noavx,avx")
10358 (set_attr "type" "sseiadd")
10359 (set_attr "prefix_data16" "1,*")
10360 (set_attr "prefix_extra" "*,1")
10361 (set_attr "prefix" "orig,vex")
10362 (set_attr "mode" "TI")])
10364 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10366 ;; Parallel integral comparisons
10368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10370 (define_expand "avx2_eq<mode>3"
10371 [(set (match_operand:VI_256 0 "register_operand")
10373 (match_operand:VI_256 1 "nonimmediate_operand")
10374 (match_operand:VI_256 2 "nonimmediate_operand")))]
10376 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10378 (define_insn "*avx2_eq<mode>3"
10379 [(set (match_operand:VI_256 0 "register_operand" "=x")
10381 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10382 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10383 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10384 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10385 [(set_attr "type" "ssecmp")
10386 (set_attr "prefix_extra" "1")
10387 (set_attr "prefix" "vex")
10388 (set_attr "mode" "OI")])
10390 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10391 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10392 (unspec:<avx512fmaskmode>
10393 [(match_operand:VI12_AVX512VL 1 "register_operand")
10394 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10395 UNSPEC_MASKED_EQ))]
10397 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10399 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10400 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10401 (unspec:<avx512fmaskmode>
10402 [(match_operand:VI48_AVX512VL 1 "register_operand")
10403 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10404 UNSPEC_MASKED_EQ))]
10406 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10408 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10409 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10410 (unspec:<avx512fmaskmode>
10411 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10412 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10413 UNSPEC_MASKED_EQ))]
10414 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10415 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10416 [(set_attr "type" "ssecmp")
10417 (set_attr "prefix_extra" "1")
10418 (set_attr "prefix" "evex")
10419 (set_attr "mode" "<sseinsnmode>")])
10421 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10422 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10423 (unspec:<avx512fmaskmode>
10424 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10425 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10426 UNSPEC_MASKED_EQ))]
10427 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10428 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10429 [(set_attr "type" "ssecmp")
10430 (set_attr "prefix_extra" "1")
10431 (set_attr "prefix" "evex")
10432 (set_attr "mode" "<sseinsnmode>")])
10434 (define_insn "*sse4_1_eqv2di3"
10435 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10437 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10438 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10439 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10441 pcmpeqq\t{%2, %0|%0, %2}
10442 pcmpeqq\t{%2, %0|%0, %2}
10443 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10444 [(set_attr "isa" "noavx,noavx,avx")
10445 (set_attr "type" "ssecmp")
10446 (set_attr "prefix_extra" "1")
10447 (set_attr "prefix" "orig,orig,vex")
10448 (set_attr "mode" "TI")])
10450 (define_insn "*sse2_eq<mode>3"
10451 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10453 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10454 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10455 "TARGET_SSE2 && !TARGET_XOP
10456 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10458 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10459 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10460 [(set_attr "isa" "noavx,avx")
10461 (set_attr "type" "ssecmp")
10462 (set_attr "prefix_data16" "1,*")
10463 (set_attr "prefix" "orig,vex")
10464 (set_attr "mode" "TI")])
10466 (define_expand "sse2_eq<mode>3"
10467 [(set (match_operand:VI124_128 0 "register_operand")
10469 (match_operand:VI124_128 1 "nonimmediate_operand")
10470 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10471 "TARGET_SSE2 && !TARGET_XOP "
10472 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10474 (define_expand "sse4_1_eqv2di3"
10475 [(set (match_operand:V2DI 0 "register_operand")
10477 (match_operand:V2DI 1 "nonimmediate_operand")
10478 (match_operand:V2DI 2 "nonimmediate_operand")))]
10480 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10482 (define_insn "sse4_2_gtv2di3"
10483 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10485 (match_operand:V2DI 1 "register_operand" "0,0,x")
10486 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10489 pcmpgtq\t{%2, %0|%0, %2}
10490 pcmpgtq\t{%2, %0|%0, %2}
10491 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10492 [(set_attr "isa" "noavx,noavx,avx")
10493 (set_attr "type" "ssecmp")
10494 (set_attr "prefix_extra" "1")
10495 (set_attr "prefix" "orig,orig,vex")
10496 (set_attr "mode" "TI")])
10498 (define_insn "avx2_gt<mode>3"
10499 [(set (match_operand:VI_256 0 "register_operand" "=x")
10501 (match_operand:VI_256 1 "register_operand" "x")
10502 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10504 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10505 [(set_attr "type" "ssecmp")
10506 (set_attr "prefix_extra" "1")
10507 (set_attr "prefix" "vex")
10508 (set_attr "mode" "OI")])
10510 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10511 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10512 (unspec:<avx512fmaskmode>
10513 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10514 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10516 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10517 [(set_attr "type" "ssecmp")
10518 (set_attr "prefix_extra" "1")
10519 (set_attr "prefix" "evex")
10520 (set_attr "mode" "<sseinsnmode>")])
10522 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10523 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10524 (unspec:<avx512fmaskmode>
10525 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10526 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10528 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10529 [(set_attr "type" "ssecmp")
10530 (set_attr "prefix_extra" "1")
10531 (set_attr "prefix" "evex")
10532 (set_attr "mode" "<sseinsnmode>")])
10534 (define_insn "sse2_gt<mode>3"
10535 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10537 (match_operand:VI124_128 1 "register_operand" "0,x")
10538 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10539 "TARGET_SSE2 && !TARGET_XOP"
10541 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10542 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10543 [(set_attr "isa" "noavx,avx")
10544 (set_attr "type" "ssecmp")
10545 (set_attr "prefix_data16" "1,*")
10546 (set_attr "prefix" "orig,vex")
10547 (set_attr "mode" "TI")])
10549 (define_expand "vcond<V_512:mode><VI_512:mode>"
10550 [(set (match_operand:V_512 0 "register_operand")
10551 (if_then_else:V_512
10552 (match_operator 3 ""
10553 [(match_operand:VI_512 4 "nonimmediate_operand")
10554 (match_operand:VI_512 5 "general_operand")])
10555 (match_operand:V_512 1)
10556 (match_operand:V_512 2)))]
10558 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10559 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10561 bool ok = ix86_expand_int_vcond (operands);
10566 (define_expand "vcond<V_256:mode><VI_256:mode>"
10567 [(set (match_operand:V_256 0 "register_operand")
10568 (if_then_else:V_256
10569 (match_operator 3 ""
10570 [(match_operand:VI_256 4 "nonimmediate_operand")
10571 (match_operand:VI_256 5 "general_operand")])
10572 (match_operand:V_256 1)
10573 (match_operand:V_256 2)))]
10575 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10576 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10578 bool ok = ix86_expand_int_vcond (operands);
10583 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10584 [(set (match_operand:V_128 0 "register_operand")
10585 (if_then_else:V_128
10586 (match_operator 3 ""
10587 [(match_operand:VI124_128 4 "nonimmediate_operand")
10588 (match_operand:VI124_128 5 "general_operand")])
10589 (match_operand:V_128 1)
10590 (match_operand:V_128 2)))]
10592 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10593 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10595 bool ok = ix86_expand_int_vcond (operands);
10600 (define_expand "vcond<VI8F_128:mode>v2di"
10601 [(set (match_operand:VI8F_128 0 "register_operand")
10602 (if_then_else:VI8F_128
10603 (match_operator 3 ""
10604 [(match_operand:V2DI 4 "nonimmediate_operand")
10605 (match_operand:V2DI 5 "general_operand")])
10606 (match_operand:VI8F_128 1)
10607 (match_operand:VI8F_128 2)))]
10610 bool ok = ix86_expand_int_vcond (operands);
10615 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10616 [(set (match_operand:V_512 0 "register_operand")
10617 (if_then_else:V_512
10618 (match_operator 3 ""
10619 [(match_operand:VI_512 4 "nonimmediate_operand")
10620 (match_operand:VI_512 5 "nonimmediate_operand")])
10621 (match_operand:V_512 1 "general_operand")
10622 (match_operand:V_512 2 "general_operand")))]
10624 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10625 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10627 bool ok = ix86_expand_int_vcond (operands);
10632 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10633 [(set (match_operand:V_256 0 "register_operand")
10634 (if_then_else:V_256
10635 (match_operator 3 ""
10636 [(match_operand:VI_256 4 "nonimmediate_operand")
10637 (match_operand:VI_256 5 "nonimmediate_operand")])
10638 (match_operand:V_256 1 "general_operand")
10639 (match_operand:V_256 2 "general_operand")))]
10641 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10642 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10644 bool ok = ix86_expand_int_vcond (operands);
10649 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10650 [(set (match_operand:V_128 0 "register_operand")
10651 (if_then_else:V_128
10652 (match_operator 3 ""
10653 [(match_operand:VI124_128 4 "nonimmediate_operand")
10654 (match_operand:VI124_128 5 "nonimmediate_operand")])
10655 (match_operand:V_128 1 "general_operand")
10656 (match_operand:V_128 2 "general_operand")))]
10658 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10659 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10661 bool ok = ix86_expand_int_vcond (operands);
10666 (define_expand "vcondu<VI8F_128:mode>v2di"
10667 [(set (match_operand:VI8F_128 0 "register_operand")
10668 (if_then_else:VI8F_128
10669 (match_operator 3 ""
10670 [(match_operand:V2DI 4 "nonimmediate_operand")
10671 (match_operand:V2DI 5 "nonimmediate_operand")])
10672 (match_operand:VI8F_128 1 "general_operand")
10673 (match_operand:VI8F_128 2 "general_operand")))]
10676 bool ok = ix86_expand_int_vcond (operands);
10681 (define_mode_iterator VEC_PERM_AVX2
10682 [V16QI V8HI V4SI V2DI V4SF V2DF
10683 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10684 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10685 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10686 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10687 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10688 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10690 (define_expand "vec_perm<mode>"
10691 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10692 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10693 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10694 (match_operand:<sseintvecmode> 3 "register_operand")]
10695 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10697 ix86_expand_vec_perm (operands);
10701 (define_mode_iterator VEC_PERM_CONST
10702 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10703 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10704 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10705 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10706 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10707 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10708 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10709 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10710 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10712 (define_expand "vec_perm_const<mode>"
10713 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10714 (match_operand:VEC_PERM_CONST 1 "register_operand")
10715 (match_operand:VEC_PERM_CONST 2 "register_operand")
10716 (match_operand:<sseintvecmode> 3)]
10719 if (ix86_expand_vec_perm_const (operands))
10725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10727 ;; Parallel bitwise logical operations
10729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10731 (define_expand "one_cmpl<mode>2"
10732 [(set (match_operand:VI 0 "register_operand")
10733 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10737 int i, n = GET_MODE_NUNITS (<MODE>mode);
10738 rtvec v = rtvec_alloc (n);
10740 for (i = 0; i < n; ++i)
10741 RTVEC_ELT (v, i) = constm1_rtx;
10743 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10746 (define_expand "<sse2_avx2>_andnot<mode>3"
10747 [(set (match_operand:VI_AVX2 0 "register_operand")
10749 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10750 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10753 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10754 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10755 (vec_merge:VI48_AVX512VL
10758 (match_operand:VI48_AVX512VL 1 "register_operand"))
10759 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10760 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10761 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10764 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10765 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10766 (vec_merge:VI12_AVX512VL
10769 (match_operand:VI12_AVX512VL 1 "register_operand"))
10770 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10771 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10772 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10775 (define_insn "*andnot<mode>3"
10776 [(set (match_operand:VI 0 "register_operand" "=x,v")
10778 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10779 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10782 static char buf[64];
10786 switch (get_attr_mode (insn))
10789 gcc_assert (TARGET_AVX512F);
10791 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10793 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10794 switch (<MODE>mode)
10798 if (TARGET_AVX512F)
10800 tmp = "pandn<ssemodesuffix>";
10807 if (TARGET_AVX512VL)
10809 tmp = "pandn<ssemodesuffix>";
10813 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10818 gcc_assert (TARGET_AVX512F);
10820 gcc_assert (TARGET_AVX);
10822 gcc_assert (TARGET_SSE);
10828 gcc_unreachable ();
10831 switch (which_alternative)
10834 ops = "%s\t{%%2, %%0|%%0, %%2}";
10837 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10840 gcc_unreachable ();
10843 snprintf (buf, sizeof (buf), ops, tmp);
10846 [(set_attr "isa" "noavx,avx")
10847 (set_attr "type" "sselog")
10848 (set (attr "prefix_data16")
10850 (and (eq_attr "alternative" "0")
10851 (eq_attr "mode" "TI"))
10853 (const_string "*")))
10854 (set_attr "prefix" "orig,vex")
10856 (cond [(and (match_test "<MODE_SIZE> == 16")
10857 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10858 (const_string "<ssePSmode>")
10859 (match_test "TARGET_AVX2")
10860 (const_string "<sseinsnmode>")
10861 (match_test "TARGET_AVX")
10863 (match_test "<MODE_SIZE> > 16")
10864 (const_string "V8SF")
10865 (const_string "<sseinsnmode>"))
10866 (ior (not (match_test "TARGET_SSE2"))
10867 (match_test "optimize_function_for_size_p (cfun)"))
10868 (const_string "V4SF")
10870 (const_string "<sseinsnmode>")))])
10872 (define_insn "*andnot<mode>3_mask"
10873 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10874 (vec_merge:VI48_AVX512VL
10877 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10878 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10879 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10880 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10882 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10883 [(set_attr "type" "sselog")
10884 (set_attr "prefix" "evex")
10885 (set_attr "mode" "<sseinsnmode>")])
10887 (define_insn "*andnot<mode>3_mask"
10888 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10889 (vec_merge:VI12_AVX512VL
10892 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
10893 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10894 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10895 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10897 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10898 [(set_attr "type" "sselog")
10899 (set_attr "prefix" "evex")
10900 (set_attr "mode" "<sseinsnmode>")])
10902 (define_expand "<code><mode>3"
10903 [(set (match_operand:VI 0 "register_operand")
10905 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10906 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10909 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10913 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10914 [(set (match_operand:VI 0 "register_operand" "=x,v")
10916 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10917 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10918 "TARGET_SSE && <mask_mode512bit_condition>
10919 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10921 static char buf[64];
10925 switch (get_attr_mode (insn))
10928 gcc_assert (TARGET_AVX512F);
10930 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10932 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10933 switch (<MODE>mode)
10937 if (TARGET_AVX512F)
10939 tmp = "p<logic><ssemodesuffix>";
10946 if (TARGET_AVX512VL)
10948 tmp = "p<logic><ssemodesuffix>";
10952 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10957 gcc_assert (TARGET_AVX512F);
10959 gcc_assert (TARGET_AVX);
10961 gcc_assert (TARGET_SSE);
10967 gcc_unreachable ();
10970 switch (which_alternative)
10973 ops = "%s\t{%%2, %%0|%%0, %%2}";
10976 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10979 gcc_unreachable ();
10982 snprintf (buf, sizeof (buf), ops, tmp);
10985 [(set_attr "isa" "noavx,avx")
10986 (set_attr "type" "sselog")
10987 (set (attr "prefix_data16")
10989 (and (eq_attr "alternative" "0")
10990 (eq_attr "mode" "TI"))
10992 (const_string "*")))
10993 (set_attr "prefix" "<mask_prefix3>")
10995 (cond [(and (match_test "<MODE_SIZE> == 16")
10996 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10997 (const_string "<ssePSmode>")
10998 (match_test "TARGET_AVX2")
10999 (const_string "<sseinsnmode>")
11000 (match_test "TARGET_AVX")
11002 (match_test "<MODE_SIZE> > 16")
11003 (const_string "V8SF")
11004 (const_string "<sseinsnmode>"))
11005 (ior (not (match_test "TARGET_SSE2"))
11006 (match_test "optimize_function_for_size_p (cfun)"))
11007 (const_string "V4SF")
11009 (const_string "<sseinsnmode>")))])
11011 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11012 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11013 (unspec:<avx512fmaskmode>
11014 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11015 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11018 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11019 [(set_attr "prefix" "evex")
11020 (set_attr "mode" "<sseinsnmode>")])
11022 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11023 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11024 (unspec:<avx512fmaskmode>
11025 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11026 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11029 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11030 [(set_attr "prefix" "evex")
11031 (set_attr "mode" "<sseinsnmode>")])
11033 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11034 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11035 (unspec:<avx512fmaskmode>
11036 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11037 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11040 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11041 [(set_attr "prefix" "evex")
11042 (set_attr "mode" "<sseinsnmode>")])
11044 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11045 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11046 (unspec:<avx512fmaskmode>
11047 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11048 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11051 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11052 [(set_attr "prefix" "evex")
11053 (set_attr "mode" "<sseinsnmode>")])
11055 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11057 ;; Parallel integral element swizzling
11059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11061 (define_expand "vec_pack_trunc_<mode>"
11062 [(match_operand:<ssepackmode> 0 "register_operand")
11063 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
11064 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
11067 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11068 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11069 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11073 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11074 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11075 (vec_concat:VI1_AVX512
11076 (ss_truncate:<ssehalfvecmode>
11077 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11078 (ss_truncate:<ssehalfvecmode>
11079 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11080 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11082 packsswb\t{%2, %0|%0, %2}
11083 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11084 [(set_attr "isa" "noavx,avx")
11085 (set_attr "type" "sselog")
11086 (set_attr "prefix_data16" "1,*")
11087 (set_attr "prefix" "orig,maybe_evex")
11088 (set_attr "mode" "<sseinsnmode>")])
11090 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11091 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11092 (vec_concat:VI2_AVX2
11093 (ss_truncate:<ssehalfvecmode>
11094 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11095 (ss_truncate:<ssehalfvecmode>
11096 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11097 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11099 packssdw\t{%2, %0|%0, %2}
11100 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11101 [(set_attr "isa" "noavx,avx")
11102 (set_attr "type" "sselog")
11103 (set_attr "prefix_data16" "1,*")
11104 (set_attr "prefix" "orig,vex")
11105 (set_attr "mode" "<sseinsnmode>")])
11107 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11108 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11109 (vec_concat:VI1_AVX512
11110 (us_truncate:<ssehalfvecmode>
11111 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11112 (us_truncate:<ssehalfvecmode>
11113 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11114 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11116 packuswb\t{%2, %0|%0, %2}
11117 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11118 [(set_attr "isa" "noavx,avx")
11119 (set_attr "type" "sselog")
11120 (set_attr "prefix_data16" "1,*")
11121 (set_attr "prefix" "orig,vex")
11122 (set_attr "mode" "<sseinsnmode>")])
11124 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11125 [(set (match_operand:V64QI 0 "register_operand" "=v")
11128 (match_operand:V64QI 1 "register_operand" "v")
11129 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11130 (parallel [(const_int 8) (const_int 72)
11131 (const_int 9) (const_int 73)
11132 (const_int 10) (const_int 74)
11133 (const_int 11) (const_int 75)
11134 (const_int 12) (const_int 76)
11135 (const_int 13) (const_int 77)
11136 (const_int 14) (const_int 78)
11137 (const_int 15) (const_int 79)
11138 (const_int 24) (const_int 88)
11139 (const_int 25) (const_int 89)
11140 (const_int 26) (const_int 90)
11141 (const_int 27) (const_int 91)
11142 (const_int 28) (const_int 92)
11143 (const_int 29) (const_int 93)
11144 (const_int 30) (const_int 94)
11145 (const_int 31) (const_int 95)
11146 (const_int 40) (const_int 104)
11147 (const_int 41) (const_int 105)
11148 (const_int 42) (const_int 106)
11149 (const_int 43) (const_int 107)
11150 (const_int 44) (const_int 108)
11151 (const_int 45) (const_int 109)
11152 (const_int 46) (const_int 110)
11153 (const_int 47) (const_int 111)
11154 (const_int 56) (const_int 120)
11155 (const_int 57) (const_int 121)
11156 (const_int 58) (const_int 122)
11157 (const_int 59) (const_int 123)
11158 (const_int 60) (const_int 124)
11159 (const_int 61) (const_int 125)
11160 (const_int 62) (const_int 126)
11161 (const_int 63) (const_int 127)])))]
11163 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11164 [(set_attr "type" "sselog")
11165 (set_attr "prefix" "evex")
11166 (set_attr "mode" "XI")])
11168 (define_insn "avx2_interleave_highv32qi<mask_name>"
11169 [(set (match_operand:V32QI 0 "register_operand" "=v")
11172 (match_operand:V32QI 1 "register_operand" "v")
11173 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11174 (parallel [(const_int 8) (const_int 40)
11175 (const_int 9) (const_int 41)
11176 (const_int 10) (const_int 42)
11177 (const_int 11) (const_int 43)
11178 (const_int 12) (const_int 44)
11179 (const_int 13) (const_int 45)
11180 (const_int 14) (const_int 46)
11181 (const_int 15) (const_int 47)
11182 (const_int 24) (const_int 56)
11183 (const_int 25) (const_int 57)
11184 (const_int 26) (const_int 58)
11185 (const_int 27) (const_int 59)
11186 (const_int 28) (const_int 60)
11187 (const_int 29) (const_int 61)
11188 (const_int 30) (const_int 62)
11189 (const_int 31) (const_int 63)])))]
11190 "TARGET_AVX2 && <mask_avx512vl_condition>"
11191 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11192 [(set_attr "type" "sselog")
11193 (set_attr "prefix" "<mask_prefix>")
11194 (set_attr "mode" "OI")])
11196 (define_insn "vec_interleave_highv16qi<mask_name>"
11197 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11200 (match_operand:V16QI 1 "register_operand" "0,v")
11201 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11202 (parallel [(const_int 8) (const_int 24)
11203 (const_int 9) (const_int 25)
11204 (const_int 10) (const_int 26)
11205 (const_int 11) (const_int 27)
11206 (const_int 12) (const_int 28)
11207 (const_int 13) (const_int 29)
11208 (const_int 14) (const_int 30)
11209 (const_int 15) (const_int 31)])))]
11210 "TARGET_SSE2 && <mask_avx512vl_condition>"
11212 punpckhbw\t{%2, %0|%0, %2}
11213 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11214 [(set_attr "isa" "noavx,avx")
11215 (set_attr "type" "sselog")
11216 (set_attr "prefix_data16" "1,*")
11217 (set_attr "prefix" "orig,<mask_prefix>")
11218 (set_attr "mode" "TI")])
11220 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11221 [(set (match_operand:V64QI 0 "register_operand" "=v")
11224 (match_operand:V64QI 1 "register_operand" "v")
11225 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11226 (parallel [(const_int 0) (const_int 64)
11227 (const_int 1) (const_int 65)
11228 (const_int 2) (const_int 66)
11229 (const_int 3) (const_int 67)
11230 (const_int 4) (const_int 68)
11231 (const_int 5) (const_int 69)
11232 (const_int 6) (const_int 70)
11233 (const_int 7) (const_int 71)
11234 (const_int 16) (const_int 80)
11235 (const_int 17) (const_int 81)
11236 (const_int 18) (const_int 82)
11237 (const_int 19) (const_int 83)
11238 (const_int 20) (const_int 84)
11239 (const_int 21) (const_int 85)
11240 (const_int 22) (const_int 86)
11241 (const_int 23) (const_int 87)
11242 (const_int 32) (const_int 96)
11243 (const_int 33) (const_int 97)
11244 (const_int 34) (const_int 98)
11245 (const_int 35) (const_int 99)
11246 (const_int 36) (const_int 100)
11247 (const_int 37) (const_int 101)
11248 (const_int 38) (const_int 102)
11249 (const_int 39) (const_int 103)
11250 (const_int 48) (const_int 112)
11251 (const_int 49) (const_int 113)
11252 (const_int 50) (const_int 114)
11253 (const_int 51) (const_int 115)
11254 (const_int 52) (const_int 116)
11255 (const_int 53) (const_int 117)
11256 (const_int 54) (const_int 118)
11257 (const_int 55) (const_int 119)])))]
11259 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11260 [(set_attr "type" "sselog")
11261 (set_attr "prefix" "evex")
11262 (set_attr "mode" "XI")])
11264 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11265 [(set (match_operand:V32QI 0 "register_operand" "=v")
11268 (match_operand:V32QI 1 "register_operand" "v")
11269 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11270 (parallel [(const_int 0) (const_int 32)
11271 (const_int 1) (const_int 33)
11272 (const_int 2) (const_int 34)
11273 (const_int 3) (const_int 35)
11274 (const_int 4) (const_int 36)
11275 (const_int 5) (const_int 37)
11276 (const_int 6) (const_int 38)
11277 (const_int 7) (const_int 39)
11278 (const_int 16) (const_int 48)
11279 (const_int 17) (const_int 49)
11280 (const_int 18) (const_int 50)
11281 (const_int 19) (const_int 51)
11282 (const_int 20) (const_int 52)
11283 (const_int 21) (const_int 53)
11284 (const_int 22) (const_int 54)
11285 (const_int 23) (const_int 55)])))]
11286 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11287 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11288 [(set_attr "type" "sselog")
11289 (set_attr "prefix" "maybe_vex")
11290 (set_attr "mode" "OI")])
11292 (define_insn "vec_interleave_lowv16qi<mask_name>"
11293 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11296 (match_operand:V16QI 1 "register_operand" "0,v")
11297 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11298 (parallel [(const_int 0) (const_int 16)
11299 (const_int 1) (const_int 17)
11300 (const_int 2) (const_int 18)
11301 (const_int 3) (const_int 19)
11302 (const_int 4) (const_int 20)
11303 (const_int 5) (const_int 21)
11304 (const_int 6) (const_int 22)
11305 (const_int 7) (const_int 23)])))]
11306 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11308 punpcklbw\t{%2, %0|%0, %2}
11309 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11310 [(set_attr "isa" "noavx,avx")
11311 (set_attr "type" "sselog")
11312 (set_attr "prefix_data16" "1,*")
11313 (set_attr "prefix" "orig,vex")
11314 (set_attr "mode" "TI")])
11316 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11317 [(set (match_operand:V32HI 0 "register_operand" "=v")
11320 (match_operand:V32HI 1 "register_operand" "v")
11321 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11322 (parallel [(const_int 4) (const_int 36)
11323 (const_int 5) (const_int 37)
11324 (const_int 6) (const_int 38)
11325 (const_int 7) (const_int 39)
11326 (const_int 12) (const_int 44)
11327 (const_int 13) (const_int 45)
11328 (const_int 14) (const_int 46)
11329 (const_int 15) (const_int 47)
11330 (const_int 20) (const_int 52)
11331 (const_int 21) (const_int 53)
11332 (const_int 22) (const_int 54)
11333 (const_int 23) (const_int 55)
11334 (const_int 28) (const_int 60)
11335 (const_int 29) (const_int 61)
11336 (const_int 30) (const_int 62)
11337 (const_int 31) (const_int 63)])))]
11339 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11340 [(set_attr "type" "sselog")
11341 (set_attr "prefix" "evex")
11342 (set_attr "mode" "XI")])
11344 (define_insn "avx2_interleave_highv16hi<mask_name>"
11345 [(set (match_operand:V16HI 0 "register_operand" "=v")
11348 (match_operand:V16HI 1 "register_operand" "v")
11349 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11350 (parallel [(const_int 4) (const_int 20)
11351 (const_int 5) (const_int 21)
11352 (const_int 6) (const_int 22)
11353 (const_int 7) (const_int 23)
11354 (const_int 12) (const_int 28)
11355 (const_int 13) (const_int 29)
11356 (const_int 14) (const_int 30)
11357 (const_int 15) (const_int 31)])))]
11358 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11359 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11360 [(set_attr "type" "sselog")
11361 (set_attr "prefix" "maybe_evex")
11362 (set_attr "mode" "OI")])
11364 (define_insn "vec_interleave_highv8hi<mask_name>"
11365 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11368 (match_operand:V8HI 1 "register_operand" "0,v")
11369 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11370 (parallel [(const_int 4) (const_int 12)
11371 (const_int 5) (const_int 13)
11372 (const_int 6) (const_int 14)
11373 (const_int 7) (const_int 15)])))]
11374 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11376 punpckhwd\t{%2, %0|%0, %2}
11377 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11378 [(set_attr "isa" "noavx,avx")
11379 (set_attr "type" "sselog")
11380 (set_attr "prefix_data16" "1,*")
11381 (set_attr "prefix" "orig,maybe_vex")
11382 (set_attr "mode" "TI")])
11384 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11385 [(set (match_operand:V32HI 0 "register_operand" "=v")
11388 (match_operand:V32HI 1 "register_operand" "v")
11389 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11390 (parallel [(const_int 0) (const_int 32)
11391 (const_int 1) (const_int 33)
11392 (const_int 2) (const_int 34)
11393 (const_int 3) (const_int 35)
11394 (const_int 8) (const_int 40)
11395 (const_int 9) (const_int 41)
11396 (const_int 10) (const_int 42)
11397 (const_int 11) (const_int 43)
11398 (const_int 16) (const_int 48)
11399 (const_int 17) (const_int 49)
11400 (const_int 18) (const_int 50)
11401 (const_int 19) (const_int 51)
11402 (const_int 24) (const_int 56)
11403 (const_int 25) (const_int 57)
11404 (const_int 26) (const_int 58)
11405 (const_int 27) (const_int 59)])))]
11407 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11408 [(set_attr "type" "sselog")
11409 (set_attr "prefix" "evex")
11410 (set_attr "mode" "XI")])
11412 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11413 [(set (match_operand:V16HI 0 "register_operand" "=v")
11416 (match_operand:V16HI 1 "register_operand" "v")
11417 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11418 (parallel [(const_int 0) (const_int 16)
11419 (const_int 1) (const_int 17)
11420 (const_int 2) (const_int 18)
11421 (const_int 3) (const_int 19)
11422 (const_int 8) (const_int 24)
11423 (const_int 9) (const_int 25)
11424 (const_int 10) (const_int 26)
11425 (const_int 11) (const_int 27)])))]
11426 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11427 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11428 [(set_attr "type" "sselog")
11429 (set_attr "prefix" "maybe_evex")
11430 (set_attr "mode" "OI")])
11432 (define_insn "vec_interleave_lowv8hi<mask_name>"
11433 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11436 (match_operand:V8HI 1 "register_operand" "0,v")
11437 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11438 (parallel [(const_int 0) (const_int 8)
11439 (const_int 1) (const_int 9)
11440 (const_int 2) (const_int 10)
11441 (const_int 3) (const_int 11)])))]
11442 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11444 punpcklwd\t{%2, %0|%0, %2}
11445 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11446 [(set_attr "isa" "noavx,avx")
11447 (set_attr "type" "sselog")
11448 (set_attr "prefix_data16" "1,*")
11449 (set_attr "prefix" "orig,maybe_evex")
11450 (set_attr "mode" "TI")])
11452 (define_insn "avx2_interleave_highv8si<mask_name>"
11453 [(set (match_operand:V8SI 0 "register_operand" "=v")
11456 (match_operand:V8SI 1 "register_operand" "v")
11457 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11458 (parallel [(const_int 2) (const_int 10)
11459 (const_int 3) (const_int 11)
11460 (const_int 6) (const_int 14)
11461 (const_int 7) (const_int 15)])))]
11462 "TARGET_AVX2 && <mask_avx512vl_condition>"
11463 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11464 [(set_attr "type" "sselog")
11465 (set_attr "prefix" "maybe_evex")
11466 (set_attr "mode" "OI")])
11468 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11469 [(set (match_operand:V16SI 0 "register_operand" "=v")
11472 (match_operand:V16SI 1 "register_operand" "v")
11473 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11474 (parallel [(const_int 2) (const_int 18)
11475 (const_int 3) (const_int 19)
11476 (const_int 6) (const_int 22)
11477 (const_int 7) (const_int 23)
11478 (const_int 10) (const_int 26)
11479 (const_int 11) (const_int 27)
11480 (const_int 14) (const_int 30)
11481 (const_int 15) (const_int 31)])))]
11483 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11484 [(set_attr "type" "sselog")
11485 (set_attr "prefix" "evex")
11486 (set_attr "mode" "XI")])
11489 (define_insn "vec_interleave_highv4si<mask_name>"
11490 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11493 (match_operand:V4SI 1 "register_operand" "0,v")
11494 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11495 (parallel [(const_int 2) (const_int 6)
11496 (const_int 3) (const_int 7)])))]
11497 "TARGET_SSE2 && <mask_avx512vl_condition>"
11499 punpckhdq\t{%2, %0|%0, %2}
11500 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11501 [(set_attr "isa" "noavx,avx")
11502 (set_attr "type" "sselog")
11503 (set_attr "prefix_data16" "1,*")
11504 (set_attr "prefix" "orig,maybe_vex")
11505 (set_attr "mode" "TI")])
11507 (define_insn "avx2_interleave_lowv8si<mask_name>"
11508 [(set (match_operand:V8SI 0 "register_operand" "=v")
11511 (match_operand:V8SI 1 "register_operand" "v")
11512 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11513 (parallel [(const_int 0) (const_int 8)
11514 (const_int 1) (const_int 9)
11515 (const_int 4) (const_int 12)
11516 (const_int 5) (const_int 13)])))]
11517 "TARGET_AVX2 && <mask_avx512vl_condition>"
11518 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11519 [(set_attr "type" "sselog")
11520 (set_attr "prefix" "maybe_evex")
11521 (set_attr "mode" "OI")])
11523 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11524 [(set (match_operand:V16SI 0 "register_operand" "=v")
11527 (match_operand:V16SI 1 "register_operand" "v")
11528 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11529 (parallel [(const_int 0) (const_int 16)
11530 (const_int 1) (const_int 17)
11531 (const_int 4) (const_int 20)
11532 (const_int 5) (const_int 21)
11533 (const_int 8) (const_int 24)
11534 (const_int 9) (const_int 25)
11535 (const_int 12) (const_int 28)
11536 (const_int 13) (const_int 29)])))]
11538 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11539 [(set_attr "type" "sselog")
11540 (set_attr "prefix" "evex")
11541 (set_attr "mode" "XI")])
11543 (define_insn "vec_interleave_lowv4si<mask_name>"
11544 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11547 (match_operand:V4SI 1 "register_operand" "0,v")
11548 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11549 (parallel [(const_int 0) (const_int 4)
11550 (const_int 1) (const_int 5)])))]
11551 "TARGET_SSE2 && <mask_avx512vl_condition>"
11553 punpckldq\t{%2, %0|%0, %2}
11554 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11555 [(set_attr "isa" "noavx,avx")
11556 (set_attr "type" "sselog")
11557 (set_attr "prefix_data16" "1,*")
11558 (set_attr "prefix" "orig,vex")
11559 (set_attr "mode" "TI")])
11561 (define_expand "vec_interleave_high<mode>"
11562 [(match_operand:VI_256 0 "register_operand" "=x")
11563 (match_operand:VI_256 1 "register_operand" "x")
11564 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11567 rtx t1 = gen_reg_rtx (<MODE>mode);
11568 rtx t2 = gen_reg_rtx (<MODE>mode);
11569 rtx t3 = gen_reg_rtx (V4DImode);
11570 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11571 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11572 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11573 gen_lowpart (V4DImode, t2),
11574 GEN_INT (1 + (3 << 4))));
11575 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11579 (define_expand "vec_interleave_low<mode>"
11580 [(match_operand:VI_256 0 "register_operand" "=x")
11581 (match_operand:VI_256 1 "register_operand" "x")
11582 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11585 rtx t1 = gen_reg_rtx (<MODE>mode);
11586 rtx t2 = gen_reg_rtx (<MODE>mode);
11587 rtx t3 = gen_reg_rtx (V4DImode);
11588 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11589 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11590 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11591 gen_lowpart (V4DImode, t2),
11592 GEN_INT (0 + (2 << 4))));
11593 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11597 ;; Modes handled by pinsr patterns.
11598 (define_mode_iterator PINSR_MODE
11599 [(V16QI "TARGET_SSE4_1") V8HI
11600 (V4SI "TARGET_SSE4_1")
11601 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11603 (define_mode_attr sse2p4_1
11604 [(V16QI "sse4_1") (V8HI "sse2")
11605 (V4SI "sse4_1") (V2DI "sse4_1")])
11607 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11608 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11609 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11610 (vec_merge:PINSR_MODE
11611 (vec_duplicate:PINSR_MODE
11612 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11613 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11614 (match_operand:SI 3 "const_int_operand")))]
11616 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11617 < GET_MODE_NUNITS (<MODE>mode))"
11619 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11621 switch (which_alternative)
11624 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11625 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11628 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11630 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11631 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11634 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11636 gcc_unreachable ();
11639 [(set_attr "isa" "noavx,noavx,avx,avx")
11640 (set_attr "type" "sselog")
11641 (set (attr "prefix_rex")
11643 (and (not (match_test "TARGET_AVX"))
11644 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11646 (const_string "*")))
11647 (set (attr "prefix_data16")
11649 (and (not (match_test "TARGET_AVX"))
11650 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11652 (const_string "*")))
11653 (set (attr "prefix_extra")
11655 (and (not (match_test "TARGET_AVX"))
11656 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11658 (const_string "1")))
11659 (set_attr "length_immediate" "1")
11660 (set_attr "prefix" "orig,orig,vex,vex")
11661 (set_attr "mode" "TI")])
11663 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11664 [(match_operand:AVX512_VEC 0 "register_operand")
11665 (match_operand:AVX512_VEC 1 "register_operand")
11666 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11667 (match_operand:SI 3 "const_0_to_3_operand")
11668 (match_operand:AVX512_VEC 4 "register_operand")
11669 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11673 mask = INTVAL (operands[3]);
11674 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11675 0xFFFF ^ (0xF000 >> mask * 4)
11676 : 0xFF ^ (0xC0 >> mask * 2);
11677 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11678 (operands[0], operands[1], operands[2], GEN_INT (selector),
11679 operands[4], operands[5]));
11683 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11684 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11685 (vec_merge:AVX512_VEC
11686 (match_operand:AVX512_VEC 1 "register_operand" "v")
11687 (vec_duplicate:AVX512_VEC
11688 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11689 (match_operand:SI 3 "const_int_operand" "n")))]
11693 int selector = INTVAL (operands[3]);
11695 if (selector == 0xFFF || selector == 0x3F)
11697 else if ( selector == 0xF0FF || selector == 0xCF)
11699 else if ( selector == 0xFF0F || selector == 0xF3)
11701 else if ( selector == 0xFFF0 || selector == 0xFC)
11704 gcc_unreachable ();
11706 operands[3] = GEN_INT (mask);
11708 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11710 [(set_attr "type" "sselog")
11711 (set_attr "length_immediate" "1")
11712 (set_attr "prefix" "evex")
11713 (set_attr "mode" "<sseinsnmode>")])
11715 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11716 [(match_operand:AVX512_VEC_2 0 "register_operand")
11717 (match_operand:AVX512_VEC_2 1 "register_operand")
11718 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11719 (match_operand:SI 3 "const_0_to_1_operand")
11720 (match_operand:AVX512_VEC_2 4 "register_operand")
11721 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11724 int mask = INTVAL (operands[3]);
11726 emit_insn (gen_vec_set_lo_<mode>_mask
11727 (operands[0], operands[1], operands[2],
11728 operands[4], operands[5]));
11730 emit_insn (gen_vec_set_hi_<mode>_mask
11731 (operands[0], operands[1], operands[2],
11732 operands[4], operands[5]));
11736 (define_insn "vec_set_lo_<mode><mask_name>"
11737 [(set (match_operand:V16FI 0 "register_operand" "=v")
11739 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11740 (vec_select:<ssehalfvecmode>
11741 (match_operand:V16FI 1 "register_operand" "v")
11742 (parallel [(const_int 8) (const_int 9)
11743 (const_int 10) (const_int 11)
11744 (const_int 12) (const_int 13)
11745 (const_int 14) (const_int 15)]))))]
11747 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11748 [(set_attr "type" "sselog")
11749 (set_attr "length_immediate" "1")
11750 (set_attr "prefix" "evex")
11751 (set_attr "mode" "<sseinsnmode>")])
11753 (define_insn "vec_set_hi_<mode><mask_name>"
11754 [(set (match_operand:V16FI 0 "register_operand" "=v")
11756 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11757 (vec_select:<ssehalfvecmode>
11758 (match_operand:V16FI 1 "register_operand" "v")
11759 (parallel [(const_int 0) (const_int 1)
11760 (const_int 2) (const_int 3)
11761 (const_int 4) (const_int 5)
11762 (const_int 6) (const_int 7)]))))]
11764 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "length_immediate" "1")
11767 (set_attr "prefix" "evex")
11768 (set_attr "mode" "<sseinsnmode>")])
11770 (define_insn "vec_set_lo_<mode><mask_name>"
11771 [(set (match_operand:V8FI 0 "register_operand" "=v")
11773 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11774 (vec_select:<ssehalfvecmode>
11775 (match_operand:V8FI 1 "register_operand" "v")
11776 (parallel [(const_int 4) (const_int 5)
11777 (const_int 6) (const_int 7)]))))]
11779 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11780 [(set_attr "type" "sselog")
11781 (set_attr "length_immediate" "1")
11782 (set_attr "prefix" "evex")
11783 (set_attr "mode" "XI")])
11785 (define_insn "vec_set_hi_<mode><mask_name>"
11786 [(set (match_operand:V8FI 0 "register_operand" "=v")
11788 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11789 (vec_select:<ssehalfvecmode>
11790 (match_operand:V8FI 1 "register_operand" "v")
11791 (parallel [(const_int 0) (const_int 1)
11792 (const_int 2) (const_int 3)]))))]
11794 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11795 [(set_attr "type" "sselog")
11796 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "evex")
11798 (set_attr "mode" "XI")])
11800 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11801 [(match_operand:VI8F_256 0 "register_operand")
11802 (match_operand:VI8F_256 1 "register_operand")
11803 (match_operand:VI8F_256 2 "nonimmediate_operand")
11804 (match_operand:SI 3 "const_0_to_3_operand")
11805 (match_operand:VI8F_256 4 "register_operand")
11806 (match_operand:QI 5 "register_operand")]
11809 int mask = INTVAL (operands[3]);
11810 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11811 (operands[0], operands[1], operands[2],
11812 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11813 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11814 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11815 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11816 operands[4], operands[5]));
11820 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11821 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11822 (vec_select:VI8F_256
11823 (vec_concat:<ssedoublemode>
11824 (match_operand:VI8F_256 1 "register_operand" "v")
11825 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11826 (parallel [(match_operand 3 "const_0_to_3_operand")
11827 (match_operand 4 "const_0_to_3_operand")
11828 (match_operand 5 "const_4_to_7_operand")
11829 (match_operand 6 "const_4_to_7_operand")])))]
11831 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11832 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11835 mask = INTVAL (operands[3]) / 2;
11836 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11837 operands[3] = GEN_INT (mask);
11838 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11840 [(set_attr "type" "sselog")
11841 (set_attr "length_immediate" "1")
11842 (set_attr "prefix" "evex")
11843 (set_attr "mode" "XI")])
11845 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11846 [(match_operand:V8FI 0 "register_operand")
11847 (match_operand:V8FI 1 "register_operand")
11848 (match_operand:V8FI 2 "nonimmediate_operand")
11849 (match_operand:SI 3 "const_0_to_255_operand")
11850 (match_operand:V8FI 4 "register_operand")
11851 (match_operand:QI 5 "register_operand")]
11854 int mask = INTVAL (operands[3]);
11855 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11856 (operands[0], operands[1], operands[2],
11857 GEN_INT (((mask >> 0) & 3) * 2),
11858 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11859 GEN_INT (((mask >> 2) & 3) * 2),
11860 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11861 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11862 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11863 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11864 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11865 operands[4], operands[5]));
11869 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11870 [(set (match_operand:V8FI 0 "register_operand" "=v")
11872 (vec_concat:<ssedoublemode>
11873 (match_operand:V8FI 1 "register_operand" "v")
11874 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11875 (parallel [(match_operand 3 "const_0_to_7_operand")
11876 (match_operand 4 "const_0_to_7_operand")
11877 (match_operand 5 "const_0_to_7_operand")
11878 (match_operand 6 "const_0_to_7_operand")
11879 (match_operand 7 "const_8_to_15_operand")
11880 (match_operand 8 "const_8_to_15_operand")
11881 (match_operand 9 "const_8_to_15_operand")
11882 (match_operand 10 "const_8_to_15_operand")])))]
11884 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11885 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11886 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11887 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11890 mask = INTVAL (operands[3]) / 2;
11891 mask |= INTVAL (operands[5]) / 2 << 2;
11892 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11893 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11894 operands[3] = GEN_INT (mask);
11896 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11898 [(set_attr "type" "sselog")
11899 (set_attr "length_immediate" "1")
11900 (set_attr "prefix" "evex")
11901 (set_attr "mode" "<sseinsnmode>")])
11903 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11904 [(match_operand:VI4F_256 0 "register_operand")
11905 (match_operand:VI4F_256 1 "register_operand")
11906 (match_operand:VI4F_256 2 "nonimmediate_operand")
11907 (match_operand:SI 3 "const_0_to_3_operand")
11908 (match_operand:VI4F_256 4 "register_operand")
11909 (match_operand:QI 5 "register_operand")]
11912 int mask = INTVAL (operands[3]);
11913 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11914 (operands[0], operands[1], operands[2],
11915 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11916 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11917 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11918 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11919 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11920 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11921 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11922 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11923 operands[4], operands[5]));
11927 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11928 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11929 (vec_select:VI4F_256
11930 (vec_concat:<ssedoublemode>
11931 (match_operand:VI4F_256 1 "register_operand" "v")
11932 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11933 (parallel [(match_operand 3 "const_0_to_7_operand")
11934 (match_operand 4 "const_0_to_7_operand")
11935 (match_operand 5 "const_0_to_7_operand")
11936 (match_operand 6 "const_0_to_7_operand")
11937 (match_operand 7 "const_8_to_15_operand")
11938 (match_operand 8 "const_8_to_15_operand")
11939 (match_operand 9 "const_8_to_15_operand")
11940 (match_operand 10 "const_8_to_15_operand")])))]
11942 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11943 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11944 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11945 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11946 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11947 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11950 mask = INTVAL (operands[3]) / 4;
11951 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11952 operands[3] = GEN_INT (mask);
11954 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11956 [(set_attr "type" "sselog")
11957 (set_attr "length_immediate" "1")
11958 (set_attr "prefix" "evex")
11959 (set_attr "mode" "<sseinsnmode>")])
11961 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11962 [(match_operand:V16FI 0 "register_operand")
11963 (match_operand:V16FI 1 "register_operand")
11964 (match_operand:V16FI 2 "nonimmediate_operand")
11965 (match_operand:SI 3 "const_0_to_255_operand")
11966 (match_operand:V16FI 4 "register_operand")
11967 (match_operand:HI 5 "register_operand")]
11970 int mask = INTVAL (operands[3]);
11971 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
11972 (operands[0], operands[1], operands[2],
11973 GEN_INT (((mask >> 0) & 3) * 4),
11974 GEN_INT (((mask >> 0) & 3) * 4 + 1),
11975 GEN_INT (((mask >> 0) & 3) * 4 + 2),
11976 GEN_INT (((mask >> 0) & 3) * 4 + 3),
11977 GEN_INT (((mask >> 2) & 3) * 4),
11978 GEN_INT (((mask >> 2) & 3) * 4 + 1),
11979 GEN_INT (((mask >> 2) & 3) * 4 + 2),
11980 GEN_INT (((mask >> 2) & 3) * 4 + 3),
11981 GEN_INT (((mask >> 4) & 3) * 4 + 16),
11982 GEN_INT (((mask >> 4) & 3) * 4 + 17),
11983 GEN_INT (((mask >> 4) & 3) * 4 + 18),
11984 GEN_INT (((mask >> 4) & 3) * 4 + 19),
11985 GEN_INT (((mask >> 6) & 3) * 4 + 16),
11986 GEN_INT (((mask >> 6) & 3) * 4 + 17),
11987 GEN_INT (((mask >> 6) & 3) * 4 + 18),
11988 GEN_INT (((mask >> 6) & 3) * 4 + 19),
11989 operands[4], operands[5]));
11993 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
11994 [(set (match_operand:V16FI 0 "register_operand" "=v")
11996 (vec_concat:<ssedoublemode>
11997 (match_operand:V16FI 1 "register_operand" "v")
11998 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
11999 (parallel [(match_operand 3 "const_0_to_15_operand")
12000 (match_operand 4 "const_0_to_15_operand")
12001 (match_operand 5 "const_0_to_15_operand")
12002 (match_operand 6 "const_0_to_15_operand")
12003 (match_operand 7 "const_0_to_15_operand")
12004 (match_operand 8 "const_0_to_15_operand")
12005 (match_operand 9 "const_0_to_15_operand")
12006 (match_operand 10 "const_0_to_15_operand")
12007 (match_operand 11 "const_16_to_31_operand")
12008 (match_operand 12 "const_16_to_31_operand")
12009 (match_operand 13 "const_16_to_31_operand")
12010 (match_operand 14 "const_16_to_31_operand")
12011 (match_operand 15 "const_16_to_31_operand")
12012 (match_operand 16 "const_16_to_31_operand")
12013 (match_operand 17 "const_16_to_31_operand")
12014 (match_operand 18 "const_16_to_31_operand")])))]
12016 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12017 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12018 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12019 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12020 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12021 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12022 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12023 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12024 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12025 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12026 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12027 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12030 mask = INTVAL (operands[3]) / 4;
12031 mask |= INTVAL (operands[7]) / 4 << 2;
12032 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12033 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12034 operands[3] = GEN_INT (mask);
12036 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12038 [(set_attr "type" "sselog")
12039 (set_attr "length_immediate" "1")
12040 (set_attr "prefix" "evex")
12041 (set_attr "mode" "<sseinsnmode>")])
12043 (define_expand "avx512f_pshufdv3_mask"
12044 [(match_operand:V16SI 0 "register_operand")
12045 (match_operand:V16SI 1 "nonimmediate_operand")
12046 (match_operand:SI 2 "const_0_to_255_operand")
12047 (match_operand:V16SI 3 "register_operand")
12048 (match_operand:HI 4 "register_operand")]
12051 int mask = INTVAL (operands[2]);
12052 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12053 GEN_INT ((mask >> 0) & 3),
12054 GEN_INT ((mask >> 2) & 3),
12055 GEN_INT ((mask >> 4) & 3),
12056 GEN_INT ((mask >> 6) & 3),
12057 GEN_INT (((mask >> 0) & 3) + 4),
12058 GEN_INT (((mask >> 2) & 3) + 4),
12059 GEN_INT (((mask >> 4) & 3) + 4),
12060 GEN_INT (((mask >> 6) & 3) + 4),
12061 GEN_INT (((mask >> 0) & 3) + 8),
12062 GEN_INT (((mask >> 2) & 3) + 8),
12063 GEN_INT (((mask >> 4) & 3) + 8),
12064 GEN_INT (((mask >> 6) & 3) + 8),
12065 GEN_INT (((mask >> 0) & 3) + 12),
12066 GEN_INT (((mask >> 2) & 3) + 12),
12067 GEN_INT (((mask >> 4) & 3) + 12),
12068 GEN_INT (((mask >> 6) & 3) + 12),
12069 operands[3], operands[4]));
12073 (define_insn "avx512f_pshufd_1<mask_name>"
12074 [(set (match_operand:V16SI 0 "register_operand" "=v")
12076 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12077 (parallel [(match_operand 2 "const_0_to_3_operand")
12078 (match_operand 3 "const_0_to_3_operand")
12079 (match_operand 4 "const_0_to_3_operand")
12080 (match_operand 5 "const_0_to_3_operand")
12081 (match_operand 6 "const_4_to_7_operand")
12082 (match_operand 7 "const_4_to_7_operand")
12083 (match_operand 8 "const_4_to_7_operand")
12084 (match_operand 9 "const_4_to_7_operand")
12085 (match_operand 10 "const_8_to_11_operand")
12086 (match_operand 11 "const_8_to_11_operand")
12087 (match_operand 12 "const_8_to_11_operand")
12088 (match_operand 13 "const_8_to_11_operand")
12089 (match_operand 14 "const_12_to_15_operand")
12090 (match_operand 15 "const_12_to_15_operand")
12091 (match_operand 16 "const_12_to_15_operand")
12092 (match_operand 17 "const_12_to_15_operand")])))]
12094 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12095 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12096 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12097 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12098 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12099 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12100 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12101 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12102 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12103 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12104 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12105 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12108 mask |= INTVAL (operands[2]) << 0;
12109 mask |= INTVAL (operands[3]) << 2;
12110 mask |= INTVAL (operands[4]) << 4;
12111 mask |= INTVAL (operands[5]) << 6;
12112 operands[2] = GEN_INT (mask);
12114 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12116 [(set_attr "type" "sselog1")
12117 (set_attr "prefix" "evex")
12118 (set_attr "length_immediate" "1")
12119 (set_attr "mode" "XI")])
12121 (define_expand "avx512vl_pshufdv3_mask"
12122 [(match_operand:V8SI 0 "register_operand")
12123 (match_operand:V8SI 1 "nonimmediate_operand")
12124 (match_operand:SI 2 "const_0_to_255_operand")
12125 (match_operand:V8SI 3 "register_operand")
12126 (match_operand:QI 4 "register_operand")]
12129 int mask = INTVAL (operands[2]);
12130 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12131 GEN_INT ((mask >> 0) & 3),
12132 GEN_INT ((mask >> 2) & 3),
12133 GEN_INT ((mask >> 4) & 3),
12134 GEN_INT ((mask >> 6) & 3),
12135 GEN_INT (((mask >> 0) & 3) + 4),
12136 GEN_INT (((mask >> 2) & 3) + 4),
12137 GEN_INT (((mask >> 4) & 3) + 4),
12138 GEN_INT (((mask >> 6) & 3) + 4),
12139 operands[3], operands[4]));
12143 (define_expand "avx2_pshufdv3"
12144 [(match_operand:V8SI 0 "register_operand")
12145 (match_operand:V8SI 1 "nonimmediate_operand")
12146 (match_operand:SI 2 "const_0_to_255_operand")]
12149 int mask = INTVAL (operands[2]);
12150 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12151 GEN_INT ((mask >> 0) & 3),
12152 GEN_INT ((mask >> 2) & 3),
12153 GEN_INT ((mask >> 4) & 3),
12154 GEN_INT ((mask >> 6) & 3),
12155 GEN_INT (((mask >> 0) & 3) + 4),
12156 GEN_INT (((mask >> 2) & 3) + 4),
12157 GEN_INT (((mask >> 4) & 3) + 4),
12158 GEN_INT (((mask >> 6) & 3) + 4)));
12162 (define_insn "avx2_pshufd_1<mask_name>"
12163 [(set (match_operand:V8SI 0 "register_operand" "=v")
12165 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12166 (parallel [(match_operand 2 "const_0_to_3_operand")
12167 (match_operand 3 "const_0_to_3_operand")
12168 (match_operand 4 "const_0_to_3_operand")
12169 (match_operand 5 "const_0_to_3_operand")
12170 (match_operand 6 "const_4_to_7_operand")
12171 (match_operand 7 "const_4_to_7_operand")
12172 (match_operand 8 "const_4_to_7_operand")
12173 (match_operand 9 "const_4_to_7_operand")])))]
12175 && <mask_avx512vl_condition>
12176 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12177 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12178 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12179 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12182 mask |= INTVAL (operands[2]) << 0;
12183 mask |= INTVAL (operands[3]) << 2;
12184 mask |= INTVAL (operands[4]) << 4;
12185 mask |= INTVAL (operands[5]) << 6;
12186 operands[2] = GEN_INT (mask);
12188 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12190 [(set_attr "type" "sselog1")
12191 (set_attr "prefix" "maybe_evex")
12192 (set_attr "length_immediate" "1")
12193 (set_attr "mode" "OI")])
12195 (define_expand "avx512vl_pshufd_mask"
12196 [(match_operand:V4SI 0 "register_operand")
12197 (match_operand:V4SI 1 "nonimmediate_operand")
12198 (match_operand:SI 2 "const_0_to_255_operand")
12199 (match_operand:V4SI 3 "register_operand")
12200 (match_operand:QI 4 "register_operand")]
12203 int mask = INTVAL (operands[2]);
12204 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12205 GEN_INT ((mask >> 0) & 3),
12206 GEN_INT ((mask >> 2) & 3),
12207 GEN_INT ((mask >> 4) & 3),
12208 GEN_INT ((mask >> 6) & 3),
12209 operands[3], operands[4]));
12213 (define_expand "sse2_pshufd"
12214 [(match_operand:V4SI 0 "register_operand")
12215 (match_operand:V4SI 1 "nonimmediate_operand")
12216 (match_operand:SI 2 "const_int_operand")]
12219 int mask = INTVAL (operands[2]);
12220 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12221 GEN_INT ((mask >> 0) & 3),
12222 GEN_INT ((mask >> 2) & 3),
12223 GEN_INT ((mask >> 4) & 3),
12224 GEN_INT ((mask >> 6) & 3)));
12228 (define_insn "sse2_pshufd_1<mask_name>"
12229 [(set (match_operand:V4SI 0 "register_operand" "=v")
12231 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12232 (parallel [(match_operand 2 "const_0_to_3_operand")
12233 (match_operand 3 "const_0_to_3_operand")
12234 (match_operand 4 "const_0_to_3_operand")
12235 (match_operand 5 "const_0_to_3_operand")])))]
12236 "TARGET_SSE2 && <mask_avx512vl_condition>"
12239 mask |= INTVAL (operands[2]) << 0;
12240 mask |= INTVAL (operands[3]) << 2;
12241 mask |= INTVAL (operands[4]) << 4;
12242 mask |= INTVAL (operands[5]) << 6;
12243 operands[2] = GEN_INT (mask);
12245 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12247 [(set_attr "type" "sselog1")
12248 (set_attr "prefix_data16" "1")
12249 (set_attr "prefix" "<mask_prefix2>")
12250 (set_attr "length_immediate" "1")
12251 (set_attr "mode" "TI")])
12253 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12254 [(set (match_operand:V32HI 0 "register_operand" "=v")
12256 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12257 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12260 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12261 [(set_attr "type" "sselog")
12262 (set_attr "prefix" "evex")
12263 (set_attr "mode" "XI")])
12265 (define_expand "avx512vl_pshuflwv3_mask"
12266 [(match_operand:V16HI 0 "register_operand")
12267 (match_operand:V16HI 1 "nonimmediate_operand")
12268 (match_operand:SI 2 "const_0_to_255_operand")
12269 (match_operand:V16HI 3 "register_operand")
12270 (match_operand:HI 4 "register_operand")]
12271 "TARGET_AVX512VL && TARGET_AVX512BW"
12273 int mask = INTVAL (operands[2]);
12274 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12275 GEN_INT ((mask >> 0) & 3),
12276 GEN_INT ((mask >> 2) & 3),
12277 GEN_INT ((mask >> 4) & 3),
12278 GEN_INT ((mask >> 6) & 3),
12279 GEN_INT (((mask >> 0) & 3) + 8),
12280 GEN_INT (((mask >> 2) & 3) + 8),
12281 GEN_INT (((mask >> 4) & 3) + 8),
12282 GEN_INT (((mask >> 6) & 3) + 8),
12283 operands[3], operands[4]));
12287 (define_expand "avx2_pshuflwv3"
12288 [(match_operand:V16HI 0 "register_operand")
12289 (match_operand:V16HI 1 "nonimmediate_operand")
12290 (match_operand:SI 2 "const_0_to_255_operand")]
12293 int mask = INTVAL (operands[2]);
12294 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12295 GEN_INT ((mask >> 0) & 3),
12296 GEN_INT ((mask >> 2) & 3),
12297 GEN_INT ((mask >> 4) & 3),
12298 GEN_INT ((mask >> 6) & 3),
12299 GEN_INT (((mask >> 0) & 3) + 8),
12300 GEN_INT (((mask >> 2) & 3) + 8),
12301 GEN_INT (((mask >> 4) & 3) + 8),
12302 GEN_INT (((mask >> 6) & 3) + 8)));
12306 (define_insn "avx2_pshuflw_1<mask_name>"
12307 [(set (match_operand:V16HI 0 "register_operand" "=v")
12309 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12310 (parallel [(match_operand 2 "const_0_to_3_operand")
12311 (match_operand 3 "const_0_to_3_operand")
12312 (match_operand 4 "const_0_to_3_operand")
12313 (match_operand 5 "const_0_to_3_operand")
12318 (match_operand 6 "const_8_to_11_operand")
12319 (match_operand 7 "const_8_to_11_operand")
12320 (match_operand 8 "const_8_to_11_operand")
12321 (match_operand 9 "const_8_to_11_operand")
12325 (const_int 15)])))]
12327 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12328 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12329 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12330 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12331 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12334 mask |= INTVAL (operands[2]) << 0;
12335 mask |= INTVAL (operands[3]) << 2;
12336 mask |= INTVAL (operands[4]) << 4;
12337 mask |= INTVAL (operands[5]) << 6;
12338 operands[2] = GEN_INT (mask);
12340 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12342 [(set_attr "type" "sselog")
12343 (set_attr "prefix" "maybe_evex")
12344 (set_attr "length_immediate" "1")
12345 (set_attr "mode" "OI")])
12347 (define_expand "avx512vl_pshuflw_mask"
12348 [(match_operand:V8HI 0 "register_operand")
12349 (match_operand:V8HI 1 "nonimmediate_operand")
12350 (match_operand:SI 2 "const_0_to_255_operand")
12351 (match_operand:V8HI 3 "register_operand")
12352 (match_operand:QI 4 "register_operand")]
12353 "TARGET_AVX512VL && TARGET_AVX512BW"
12355 int mask = INTVAL (operands[2]);
12356 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12357 GEN_INT ((mask >> 0) & 3),
12358 GEN_INT ((mask >> 2) & 3),
12359 GEN_INT ((mask >> 4) & 3),
12360 GEN_INT ((mask >> 6) & 3),
12361 operands[3], operands[4]));
12365 (define_expand "sse2_pshuflw"
12366 [(match_operand:V8HI 0 "register_operand")
12367 (match_operand:V8HI 1 "nonimmediate_operand")
12368 (match_operand:SI 2 "const_int_operand")]
12371 int mask = INTVAL (operands[2]);
12372 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12373 GEN_INT ((mask >> 0) & 3),
12374 GEN_INT ((mask >> 2) & 3),
12375 GEN_INT ((mask >> 4) & 3),
12376 GEN_INT ((mask >> 6) & 3)));
12380 (define_insn "sse2_pshuflw_1<mask_name>"
12381 [(set (match_operand:V8HI 0 "register_operand" "=v")
12383 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12384 (parallel [(match_operand 2 "const_0_to_3_operand")
12385 (match_operand 3 "const_0_to_3_operand")
12386 (match_operand 4 "const_0_to_3_operand")
12387 (match_operand 5 "const_0_to_3_operand")
12392 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12395 mask |= INTVAL (operands[2]) << 0;
12396 mask |= INTVAL (operands[3]) << 2;
12397 mask |= INTVAL (operands[4]) << 4;
12398 mask |= INTVAL (operands[5]) << 6;
12399 operands[2] = GEN_INT (mask);
12401 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12403 [(set_attr "type" "sselog")
12404 (set_attr "prefix_data16" "0")
12405 (set_attr "prefix_rep" "1")
12406 (set_attr "prefix" "maybe_vex")
12407 (set_attr "length_immediate" "1")
12408 (set_attr "mode" "TI")])
12410 (define_expand "avx2_pshufhwv3"
12411 [(match_operand:V16HI 0 "register_operand")
12412 (match_operand:V16HI 1 "nonimmediate_operand")
12413 (match_operand:SI 2 "const_0_to_255_operand")]
12416 int mask = INTVAL (operands[2]);
12417 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12418 GEN_INT (((mask >> 0) & 3) + 4),
12419 GEN_INT (((mask >> 2) & 3) + 4),
12420 GEN_INT (((mask >> 4) & 3) + 4),
12421 GEN_INT (((mask >> 6) & 3) + 4),
12422 GEN_INT (((mask >> 0) & 3) + 12),
12423 GEN_INT (((mask >> 2) & 3) + 12),
12424 GEN_INT (((mask >> 4) & 3) + 12),
12425 GEN_INT (((mask >> 6) & 3) + 12)));
12429 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12430 [(set (match_operand:V32HI 0 "register_operand" "=v")
12432 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12433 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12436 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12437 [(set_attr "type" "sselog")
12438 (set_attr "prefix" "evex")
12439 (set_attr "mode" "XI")])
12441 (define_expand "avx512vl_pshufhwv3_mask"
12442 [(match_operand:V16HI 0 "register_operand")
12443 (match_operand:V16HI 1 "nonimmediate_operand")
12444 (match_operand:SI 2 "const_0_to_255_operand")
12445 (match_operand:V16HI 3 "register_operand")
12446 (match_operand:HI 4 "register_operand")]
12447 "TARGET_AVX512VL && TARGET_AVX512BW"
12449 int mask = INTVAL (operands[2]);
12450 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12451 GEN_INT (((mask >> 0) & 3) + 4),
12452 GEN_INT (((mask >> 2) & 3) + 4),
12453 GEN_INT (((mask >> 4) & 3) + 4),
12454 GEN_INT (((mask >> 6) & 3) + 4),
12455 GEN_INT (((mask >> 0) & 3) + 12),
12456 GEN_INT (((mask >> 2) & 3) + 12),
12457 GEN_INT (((mask >> 4) & 3) + 12),
12458 GEN_INT (((mask >> 6) & 3) + 12),
12459 operands[3], operands[4]));
12463 (define_insn "avx2_pshufhw_1<mask_name>"
12464 [(set (match_operand:V16HI 0 "register_operand" "=v")
12466 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12467 (parallel [(const_int 0)
12471 (match_operand 2 "const_4_to_7_operand")
12472 (match_operand 3 "const_4_to_7_operand")
12473 (match_operand 4 "const_4_to_7_operand")
12474 (match_operand 5 "const_4_to_7_operand")
12479 (match_operand 6 "const_12_to_15_operand")
12480 (match_operand 7 "const_12_to_15_operand")
12481 (match_operand 8 "const_12_to_15_operand")
12482 (match_operand 9 "const_12_to_15_operand")])))]
12484 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12485 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12486 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12487 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12488 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12491 mask |= (INTVAL (operands[2]) - 4) << 0;
12492 mask |= (INTVAL (operands[3]) - 4) << 2;
12493 mask |= (INTVAL (operands[4]) - 4) << 4;
12494 mask |= (INTVAL (operands[5]) - 4) << 6;
12495 operands[2] = GEN_INT (mask);
12497 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12499 [(set_attr "type" "sselog")
12500 (set_attr "prefix" "maybe_evex")
12501 (set_attr "length_immediate" "1")
12502 (set_attr "mode" "OI")])
12504 (define_expand "avx512vl_pshufhw_mask"
12505 [(match_operand:V8HI 0 "register_operand")
12506 (match_operand:V8HI 1 "nonimmediate_operand")
12507 (match_operand:SI 2 "const_0_to_255_operand")
12508 (match_operand:V8HI 3 "register_operand")
12509 (match_operand:QI 4 "register_operand")]
12510 "TARGET_AVX512VL && TARGET_AVX512BW"
12512 int mask = INTVAL (operands[2]);
12513 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12514 GEN_INT (((mask >> 0) & 3) + 4),
12515 GEN_INT (((mask >> 2) & 3) + 4),
12516 GEN_INT (((mask >> 4) & 3) + 4),
12517 GEN_INT (((mask >> 6) & 3) + 4),
12518 operands[3], operands[4]));
12522 (define_expand "sse2_pshufhw"
12523 [(match_operand:V8HI 0 "register_operand")
12524 (match_operand:V8HI 1 "nonimmediate_operand")
12525 (match_operand:SI 2 "const_int_operand")]
12528 int mask = INTVAL (operands[2]);
12529 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12530 GEN_INT (((mask >> 0) & 3) + 4),
12531 GEN_INT (((mask >> 2) & 3) + 4),
12532 GEN_INT (((mask >> 4) & 3) + 4),
12533 GEN_INT (((mask >> 6) & 3) + 4)));
12537 (define_insn "sse2_pshufhw_1<mask_name>"
12538 [(set (match_operand:V8HI 0 "register_operand" "=v")
12540 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12541 (parallel [(const_int 0)
12545 (match_operand 2 "const_4_to_7_operand")
12546 (match_operand 3 "const_4_to_7_operand")
12547 (match_operand 4 "const_4_to_7_operand")
12548 (match_operand 5 "const_4_to_7_operand")])))]
12549 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12552 mask |= (INTVAL (operands[2]) - 4) << 0;
12553 mask |= (INTVAL (operands[3]) - 4) << 2;
12554 mask |= (INTVAL (operands[4]) - 4) << 4;
12555 mask |= (INTVAL (operands[5]) - 4) << 6;
12556 operands[2] = GEN_INT (mask);
12558 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12560 [(set_attr "type" "sselog")
12561 (set_attr "prefix_rep" "1")
12562 (set_attr "prefix_data16" "0")
12563 (set_attr "prefix" "maybe_vex")
12564 (set_attr "length_immediate" "1")
12565 (set_attr "mode" "TI")])
12567 (define_expand "sse2_loadd"
12568 [(set (match_operand:V4SI 0 "register_operand")
12570 (vec_duplicate:V4SI
12571 (match_operand:SI 1 "nonimmediate_operand"))
12575 "operands[2] = CONST0_RTX (V4SImode);")
12577 (define_insn "sse2_loadld"
12578 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12580 (vec_duplicate:V4SI
12581 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12582 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12586 %vmovd\t{%2, %0|%0, %2}
12587 %vmovd\t{%2, %0|%0, %2}
12588 movss\t{%2, %0|%0, %2}
12589 movss\t{%2, %0|%0, %2}
12590 vmovss\t{%2, %1, %0|%0, %1, %2}"
12591 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12592 (set_attr "type" "ssemov")
12593 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12594 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12596 (define_insn "*vec_extract<mode>"
12597 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12598 (vec_select:<ssescalarmode>
12599 (match_operand:VI12_128 1 "register_operand" "x,x")
12601 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12604 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12605 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12606 [(set_attr "type" "sselog1")
12607 (set (attr "prefix_data16")
12609 (and (eq_attr "alternative" "0")
12610 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12612 (const_string "*")))
12613 (set (attr "prefix_extra")
12615 (and (eq_attr "alternative" "0")
12616 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12618 (const_string "1")))
12619 (set_attr "length_immediate" "1")
12620 (set_attr "prefix" "maybe_vex")
12621 (set_attr "mode" "TI")])
12623 (define_insn "*vec_extractv8hi_sse2"
12624 [(set (match_operand:HI 0 "register_operand" "=r")
12626 (match_operand:V8HI 1 "register_operand" "x")
12628 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12629 "TARGET_SSE2 && !TARGET_SSE4_1"
12630 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12631 [(set_attr "type" "sselog1")
12632 (set_attr "prefix_data16" "1")
12633 (set_attr "length_immediate" "1")
12634 (set_attr "mode" "TI")])
12636 (define_insn "*vec_extractv16qi_zext"
12637 [(set (match_operand:SWI48 0 "register_operand" "=r")
12640 (match_operand:V16QI 1 "register_operand" "x")
12642 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12644 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12645 [(set_attr "type" "sselog1")
12646 (set_attr "prefix_extra" "1")
12647 (set_attr "length_immediate" "1")
12648 (set_attr "prefix" "maybe_vex")
12649 (set_attr "mode" "TI")])
12651 (define_insn "*vec_extractv8hi_zext"
12652 [(set (match_operand:SWI48 0 "register_operand" "=r")
12655 (match_operand:V8HI 1 "register_operand" "x")
12657 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12659 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12660 [(set_attr "type" "sselog1")
12661 (set_attr "prefix_data16" "1")
12662 (set_attr "length_immediate" "1")
12663 (set_attr "prefix" "maybe_vex")
12664 (set_attr "mode" "TI")])
12666 (define_insn "*vec_extract<mode>_mem"
12667 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12668 (vec_select:<ssescalarmode>
12669 (match_operand:VI12_128 1 "memory_operand" "o")
12671 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12675 (define_insn "*vec_extract<ssevecmodelower>_0"
12676 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12678 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12679 (parallel [(const_int 0)])))]
12680 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12682 [(set_attr "isa" "*,sse4,*,*")])
12684 (define_insn_and_split "*vec_extractv4si_0_zext"
12685 [(set (match_operand:DI 0 "register_operand" "=r")
12688 (match_operand:V4SI 1 "register_operand" "x")
12689 (parallel [(const_int 0)]))))]
12690 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12692 "&& reload_completed"
12693 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12694 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12696 (define_insn "*vec_extractv2di_0_sse"
12697 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12699 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12700 (parallel [(const_int 0)])))]
12701 "TARGET_SSE && !TARGET_64BIT
12702 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12706 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12708 (match_operand:<ssevecmode> 1 "register_operand")
12709 (parallel [(const_int 0)])))]
12710 "TARGET_SSE && reload_completed"
12711 [(set (match_dup 0) (match_dup 1))]
12712 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12714 (define_insn "*vec_extractv4si"
12715 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12717 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12718 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12721 switch (which_alternative)
12724 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12728 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12729 return "psrldq\t{%2, %0|%0, %2}";
12732 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12733 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12736 gcc_unreachable ();
12739 [(set_attr "isa" "*,noavx,noavx,avx")
12740 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12741 (set_attr "prefix_extra" "1,*,*,*")
12742 (set_attr "length_immediate" "1")
12743 (set_attr "prefix" "maybe_vex,orig,orig,vex")
12744 (set_attr "mode" "TI")])
12746 (define_insn "*vec_extractv4si_zext"
12747 [(set (match_operand:DI 0 "register_operand" "=r")
12750 (match_operand:V4SI 1 "register_operand" "x")
12751 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12752 "TARGET_64BIT && TARGET_SSE4_1"
12753 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12754 [(set_attr "type" "sselog1")
12755 (set_attr "prefix_extra" "1")
12756 (set_attr "length_immediate" "1")
12757 (set_attr "prefix" "maybe_vex")
12758 (set_attr "mode" "TI")])
12760 (define_insn "*vec_extractv4si_mem"
12761 [(set (match_operand:SI 0 "register_operand" "=x,r")
12763 (match_operand:V4SI 1 "memory_operand" "o,o")
12764 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12768 (define_insn_and_split "*vec_extractv4si_zext_mem"
12769 [(set (match_operand:DI 0 "register_operand" "=x,r")
12772 (match_operand:V4SI 1 "memory_operand" "o,o")
12773 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12774 "TARGET_64BIT && TARGET_SSE"
12776 "&& reload_completed"
12777 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12779 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12782 (define_insn "*vec_extractv2di_1"
12783 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12785 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12786 (parallel [(const_int 1)])))]
12787 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12789 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12790 %vmovhps\t{%1, %0|%0, %1}
12791 psrldq\t{$8, %0|%0, 8}
12792 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12793 movhlps\t{%1, %0|%0, %1}
12796 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12797 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12798 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12799 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12800 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12801 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12802 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12805 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12806 (vec_select:<ssescalarmode>
12807 (match_operand:VI_128 1 "memory_operand")
12809 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12810 "TARGET_SSE && reload_completed"
12811 [(set (match_dup 0) (match_dup 1))]
12813 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12815 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12818 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
12819 ;; vector modes into vec_extract*.
12821 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12822 (match_operand:SWI48x 1 "register_operand"))]
12823 "can_create_pseudo_p ()
12824 && GET_CODE (operands[1]) == SUBREG
12825 && REG_P (SUBREG_REG (operands[1]))
12826 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
12827 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
12828 == MODE_VECTOR_FLOAT))
12829 && SUBREG_BYTE (operands[1]) == 0
12831 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
12832 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
12834 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
12835 && TARGET_AVX512F))
12836 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
12837 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
12838 (parallel [(const_int 0)])))]
12841 operands[1] = SUBREG_REG (operands[1]);
12842 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
12845 if (<MODE>mode == SImode)
12847 tmp = gen_reg_rtx (V8SImode);
12848 emit_insn (gen_vec_extract_lo_v16si (tmp,
12849 gen_lowpart (V16SImode,
12854 tmp = gen_reg_rtx (V4DImode);
12855 emit_insn (gen_vec_extract_lo_v8di (tmp,
12856 gen_lowpart (V8DImode,
12862 tmp = gen_reg_rtx (<ssevecmode>mode);
12863 if (<MODE>mode == SImode)
12864 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
12867 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
12872 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
12877 (define_insn "*vec_concatv2si_sse4_1"
12878 [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
12880 (match_operand:SI 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,rm, 0,rm")
12881 (match_operand:SI 2 "vector_move_operand" " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
12884 pinsrd\t{$1, %2, %0|%0, %2, 1}
12885 pinsrd\t{$1, %2, %0|%0, %2, 1}
12886 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12887 punpckldq\t{%2, %0|%0, %2}
12888 punpckldq\t{%2, %0|%0, %2}
12889 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12890 %vmovd\t{%1, %0|%0, %1}
12891 punpckldq\t{%2, %0|%0, %2}
12892 movd\t{%1, %0|%0, %1}"
12893 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
12894 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12895 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
12896 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
12897 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
12898 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
12900 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12901 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12902 ;; alternatives pretty much forces the MMX alternative to be chosen.
12903 (define_insn "*vec_concatv2si"
12904 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12906 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12907 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12908 "TARGET_SSE && !TARGET_SSE4_1"
12910 punpckldq\t{%2, %0|%0, %2}
12911 movd\t{%1, %0|%0, %1}
12912 movd\t{%1, %0|%0, %1}
12913 unpcklps\t{%2, %0|%0, %2}
12914 movss\t{%1, %0|%0, %1}
12915 punpckldq\t{%2, %0|%0, %2}
12916 movd\t{%1, %0|%0, %1}"
12917 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12918 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12919 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12921 (define_insn "*vec_concatv4si"
12922 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12924 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12925 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12928 punpcklqdq\t{%2, %0|%0, %2}
12929 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12930 movlhps\t{%2, %0|%0, %2}
12931 movhps\t{%2, %0|%0, %q2}
12932 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12933 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12934 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12935 (set_attr "prefix" "orig,vex,orig,orig,vex")
12936 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12938 ;; movd instead of movq is required to handle broken assemblers.
12939 (define_insn "vec_concatv2di"
12940 [(set (match_operand:V2DI 0 "register_operand"
12941 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
12943 (match_operand:DI 1 "nonimmediate_operand"
12944 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
12945 (match_operand:DI 2 "vector_move_operand"
12946 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
12949 pinsrq\t{$1, %2, %0|%0, %2, 1}
12950 pinsrq\t{$1, %2, %0|%0, %2, 1}
12951 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12952 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12953 %vmovq\t{%1, %0|%0, %1}
12954 movq2dq\t{%1, %0|%0, %1}
12955 punpcklqdq\t{%2, %0|%0, %2}
12956 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12957 movlhps\t{%2, %0|%0, %2}
12958 movhps\t{%2, %0|%0, %2}
12959 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12960 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12963 (eq_attr "alternative" "0,1,2,6,7")
12964 (const_string "sselog")
12965 (const_string "ssemov")))
12966 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
12967 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
12968 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
12969 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
12970 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
12972 (define_expand "vec_unpacks_lo_<mode>"
12973 [(match_operand:<sseunpackmode> 0 "register_operand")
12974 (match_operand:VI124_AVX512F 1 "register_operand")]
12976 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
12978 (define_expand "vec_unpacks_hi_<mode>"
12979 [(match_operand:<sseunpackmode> 0 "register_operand")
12980 (match_operand:VI124_AVX512F 1 "register_operand")]
12982 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
12984 (define_expand "vec_unpacku_lo_<mode>"
12985 [(match_operand:<sseunpackmode> 0 "register_operand")
12986 (match_operand:VI124_AVX512F 1 "register_operand")]
12988 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
12990 (define_expand "vec_unpacku_hi_<mode>"
12991 [(match_operand:<sseunpackmode> 0 "register_operand")
12992 (match_operand:VI124_AVX512F 1 "register_operand")]
12994 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
12996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13002 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13003 [(set (match_operand:VI12_AVX2 0 "register_operand")
13004 (truncate:VI12_AVX2
13005 (lshiftrt:<ssedoublemode>
13006 (plus:<ssedoublemode>
13007 (plus:<ssedoublemode>
13008 (zero_extend:<ssedoublemode>
13009 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13010 (zero_extend:<ssedoublemode>
13011 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
13012 (match_dup <mask_expand_op3>))
13014 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13017 if (<mask_applied>)
13019 operands[3] = CONST1_RTX(<MODE>mode);
13020 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13022 if (<mask_applied>)
13024 operands[5] = operands[3];
13029 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13030 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13031 (truncate:VI12_AVX2
13032 (lshiftrt:<ssedoublemode>
13033 (plus:<ssedoublemode>
13034 (plus:<ssedoublemode>
13035 (zero_extend:<ssedoublemode>
13036 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
13037 (zero_extend:<ssedoublemode>
13038 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13039 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13041 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13042 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13044 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13045 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13046 [(set_attr "isa" "noavx,avx")
13047 (set_attr "type" "sseiadd")
13048 (set_attr "prefix_data16" "1,*")
13049 (set_attr "prefix" "orig,<mask_prefix>")
13050 (set_attr "mode" "<sseinsnmode>")])
13052 ;; The correct representation for this is absolutely enormous, and
13053 ;; surely not generally useful.
13054 (define_insn "<sse2_avx2>_psadbw"
13055 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13056 (unspec:VI8_AVX2_AVX512BW
13057 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13058 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
13062 psadbw\t{%2, %0|%0, %2}
13063 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13064 [(set_attr "isa" "noavx,avx")
13065 (set_attr "type" "sseiadd")
13066 (set_attr "atom_unit" "simul")
13067 (set_attr "prefix_data16" "1,*")
13068 (set_attr "prefix" "orig,maybe_evex")
13069 (set_attr "mode" "<sseinsnmode>")])
13071 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13072 [(set (match_operand:SI 0 "register_operand" "=r")
13074 [(match_operand:VF_128_256 1 "register_operand" "x")]
13077 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13078 [(set_attr "type" "ssemov")
13079 (set_attr "prefix" "maybe_vex")
13080 (set_attr "mode" "<MODE>")])
13082 (define_insn "avx2_pmovmskb"
13083 [(set (match_operand:SI 0 "register_operand" "=r")
13084 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13087 "vpmovmskb\t{%1, %0|%0, %1}"
13088 [(set_attr "type" "ssemov")
13089 (set_attr "prefix" "vex")
13090 (set_attr "mode" "DI")])
13092 (define_insn "sse2_pmovmskb"
13093 [(set (match_operand:SI 0 "register_operand" "=r")
13094 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13097 "%vpmovmskb\t{%1, %0|%0, %1}"
13098 [(set_attr "type" "ssemov")
13099 (set_attr "prefix_data16" "1")
13100 (set_attr "prefix" "maybe_vex")
13101 (set_attr "mode" "SI")])
13103 (define_expand "sse2_maskmovdqu"
13104 [(set (match_operand:V16QI 0 "memory_operand")
13105 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13106 (match_operand:V16QI 2 "register_operand")
13111 (define_insn "*sse2_maskmovdqu"
13112 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13113 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13114 (match_operand:V16QI 2 "register_operand" "x")
13115 (mem:V16QI (match_dup 0))]
13119 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13120 that requires %v to be at the beginning of the opcode name. */
13121 if (Pmode != word_mode)
13122 fputs ("\taddr32", asm_out_file);
13123 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13125 [(set_attr "type" "ssemov")
13126 (set_attr "prefix_data16" "1")
13127 (set (attr "length_address")
13128 (symbol_ref ("Pmode != word_mode")))
13129 ;; The implicit %rdi operand confuses default length_vex computation.
13130 (set (attr "length_vex")
13131 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13132 (set_attr "prefix" "maybe_vex")
13133 (set_attr "mode" "TI")])
13135 (define_insn "sse_ldmxcsr"
13136 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13140 [(set_attr "type" "sse")
13141 (set_attr "atom_sse_attr" "mxcsr")
13142 (set_attr "prefix" "maybe_vex")
13143 (set_attr "memory" "load")])
13145 (define_insn "sse_stmxcsr"
13146 [(set (match_operand:SI 0 "memory_operand" "=m")
13147 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13150 [(set_attr "type" "sse")
13151 (set_attr "atom_sse_attr" "mxcsr")
13152 (set_attr "prefix" "maybe_vex")
13153 (set_attr "memory" "store")])
13155 (define_insn "sse2_clflush"
13156 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13160 [(set_attr "type" "sse")
13161 (set_attr "atom_sse_attr" "fence")
13162 (set_attr "memory" "unknown")])
13165 (define_insn "sse3_mwait"
13166 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
13167 (match_operand:SI 1 "register_operand" "c")]
13170 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13171 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13172 ;; we only need to set up 32bit registers.
13174 [(set_attr "length" "3")])
13176 (define_insn "sse3_monitor_<mode>"
13177 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13178 (match_operand:SI 1 "register_operand" "c")
13179 (match_operand:SI 2 "register_operand" "d")]
13182 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13183 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13184 ;; zero extended to 64bit, we only need to set up 32bit registers.
13186 [(set (attr "length")
13187 (symbol_ref ("(Pmode != word_mode) + 3")))])
13189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13191 ;; SSSE3 instructions
13193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13195 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13197 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13198 [(set (match_operand:V16HI 0 "register_operand" "=x")
13203 (ssse3_plusminus:HI
13205 (match_operand:V16HI 1 "register_operand" "x")
13206 (parallel [(const_int 0)]))
13207 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13208 (ssse3_plusminus:HI
13209 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13210 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13212 (ssse3_plusminus:HI
13213 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13214 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13215 (ssse3_plusminus:HI
13216 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13217 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13220 (ssse3_plusminus:HI
13221 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13222 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13223 (ssse3_plusminus:HI
13224 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13225 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13227 (ssse3_plusminus:HI
13228 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13229 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13230 (ssse3_plusminus:HI
13231 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13232 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13236 (ssse3_plusminus:HI
13238 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13239 (parallel [(const_int 0)]))
13240 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13241 (ssse3_plusminus:HI
13242 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13243 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13245 (ssse3_plusminus:HI
13246 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13247 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13248 (ssse3_plusminus:HI
13249 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13250 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13253 (ssse3_plusminus:HI
13254 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13255 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13256 (ssse3_plusminus:HI
13257 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13258 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13260 (ssse3_plusminus:HI
13261 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13262 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13263 (ssse3_plusminus:HI
13264 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13265 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13267 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13268 [(set_attr "type" "sseiadd")
13269 (set_attr "prefix_extra" "1")
13270 (set_attr "prefix" "vex")
13271 (set_attr "mode" "OI")])
13273 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13274 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13278 (ssse3_plusminus:HI
13280 (match_operand:V8HI 1 "register_operand" "0,x")
13281 (parallel [(const_int 0)]))
13282 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13283 (ssse3_plusminus:HI
13284 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13285 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13287 (ssse3_plusminus:HI
13288 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13289 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13290 (ssse3_plusminus:HI
13291 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13292 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13295 (ssse3_plusminus:HI
13297 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13298 (parallel [(const_int 0)]))
13299 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13300 (ssse3_plusminus:HI
13301 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13302 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13304 (ssse3_plusminus:HI
13305 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13306 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13307 (ssse3_plusminus:HI
13308 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13309 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13312 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13313 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13314 [(set_attr "isa" "noavx,avx")
13315 (set_attr "type" "sseiadd")
13316 (set_attr "atom_unit" "complex")
13317 (set_attr "prefix_data16" "1,*")
13318 (set_attr "prefix_extra" "1")
13319 (set_attr "prefix" "orig,vex")
13320 (set_attr "mode" "TI")])
13322 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13323 [(set (match_operand:V4HI 0 "register_operand" "=y")
13326 (ssse3_plusminus:HI
13328 (match_operand:V4HI 1 "register_operand" "0")
13329 (parallel [(const_int 0)]))
13330 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13331 (ssse3_plusminus:HI
13332 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13333 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13335 (ssse3_plusminus:HI
13337 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13338 (parallel [(const_int 0)]))
13339 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13340 (ssse3_plusminus:HI
13341 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13342 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13344 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13345 [(set_attr "type" "sseiadd")
13346 (set_attr "atom_unit" "complex")
13347 (set_attr "prefix_extra" "1")
13348 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13349 (set_attr "mode" "DI")])
13351 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13352 [(set (match_operand:V8SI 0 "register_operand" "=x")
13358 (match_operand:V8SI 1 "register_operand" "x")
13359 (parallel [(const_int 0)]))
13360 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13362 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13363 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13366 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13367 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13369 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13370 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13375 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13376 (parallel [(const_int 0)]))
13377 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13379 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13380 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13383 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13384 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13386 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13387 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13389 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13390 [(set_attr "type" "sseiadd")
13391 (set_attr "prefix_extra" "1")
13392 (set_attr "prefix" "vex")
13393 (set_attr "mode" "OI")])
13395 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13396 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13401 (match_operand:V4SI 1 "register_operand" "0,x")
13402 (parallel [(const_int 0)]))
13403 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13405 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13406 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13410 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
13411 (parallel [(const_int 0)]))
13412 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13414 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13415 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13418 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13419 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13420 [(set_attr "isa" "noavx,avx")
13421 (set_attr "type" "sseiadd")
13422 (set_attr "atom_unit" "complex")
13423 (set_attr "prefix_data16" "1,*")
13424 (set_attr "prefix_extra" "1")
13425 (set_attr "prefix" "orig,vex")
13426 (set_attr "mode" "TI")])
13428 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13429 [(set (match_operand:V2SI 0 "register_operand" "=y")
13433 (match_operand:V2SI 1 "register_operand" "0")
13434 (parallel [(const_int 0)]))
13435 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13438 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13439 (parallel [(const_int 0)]))
13440 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13442 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13443 [(set_attr "type" "sseiadd")
13444 (set_attr "atom_unit" "complex")
13445 (set_attr "prefix_extra" "1")
13446 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13447 (set_attr "mode" "DI")])
13449 (define_insn "avx2_pmaddubsw256"
13450 [(set (match_operand:V16HI 0 "register_operand" "=x")
13455 (match_operand:V32QI 1 "register_operand" "x")
13456 (parallel [(const_int 0) (const_int 2)
13457 (const_int 4) (const_int 6)
13458 (const_int 8) (const_int 10)
13459 (const_int 12) (const_int 14)
13460 (const_int 16) (const_int 18)
13461 (const_int 20) (const_int 22)
13462 (const_int 24) (const_int 26)
13463 (const_int 28) (const_int 30)])))
13466 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13467 (parallel [(const_int 0) (const_int 2)
13468 (const_int 4) (const_int 6)
13469 (const_int 8) (const_int 10)
13470 (const_int 12) (const_int 14)
13471 (const_int 16) (const_int 18)
13472 (const_int 20) (const_int 22)
13473 (const_int 24) (const_int 26)
13474 (const_int 28) (const_int 30)]))))
13477 (vec_select:V16QI (match_dup 1)
13478 (parallel [(const_int 1) (const_int 3)
13479 (const_int 5) (const_int 7)
13480 (const_int 9) (const_int 11)
13481 (const_int 13) (const_int 15)
13482 (const_int 17) (const_int 19)
13483 (const_int 21) (const_int 23)
13484 (const_int 25) (const_int 27)
13485 (const_int 29) (const_int 31)])))
13487 (vec_select:V16QI (match_dup 2)
13488 (parallel [(const_int 1) (const_int 3)
13489 (const_int 5) (const_int 7)
13490 (const_int 9) (const_int 11)
13491 (const_int 13) (const_int 15)
13492 (const_int 17) (const_int 19)
13493 (const_int 21) (const_int 23)
13494 (const_int 25) (const_int 27)
13495 (const_int 29) (const_int 31)]))))))]
13497 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13498 [(set_attr "type" "sseiadd")
13499 (set_attr "prefix_extra" "1")
13500 (set_attr "prefix" "vex")
13501 (set_attr "mode" "OI")])
13503 ;; The correct representation for this is absolutely enormous, and
13504 ;; surely not generally useful.
13505 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13506 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13507 (unspec:VI2_AVX512VL
13508 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13509 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13510 UNSPEC_PMADDUBSW512))]
13512 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13513 [(set_attr "type" "sseiadd")
13514 (set_attr "prefix" "evex")
13515 (set_attr "mode" "XI")])
13517 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13518 [(set (match_operand:V32HI 0 "register_operand" "=v")
13525 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13527 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13529 (const_vector:V32HI [(const_int 1) (const_int 1)
13530 (const_int 1) (const_int 1)
13531 (const_int 1) (const_int 1)
13532 (const_int 1) (const_int 1)
13533 (const_int 1) (const_int 1)
13534 (const_int 1) (const_int 1)
13535 (const_int 1) (const_int 1)
13536 (const_int 1) (const_int 1)
13537 (const_int 1) (const_int 1)
13538 (const_int 1) (const_int 1)
13539 (const_int 1) (const_int 1)
13540 (const_int 1) (const_int 1)
13541 (const_int 1) (const_int 1)
13542 (const_int 1) (const_int 1)
13543 (const_int 1) (const_int 1)
13544 (const_int 1) (const_int 1)]))
13547 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13548 [(set_attr "type" "sseimul")
13549 (set_attr "prefix" "evex")
13550 (set_attr "mode" "XI")])
13552 (define_insn "ssse3_pmaddubsw128"
13553 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13558 (match_operand:V16QI 1 "register_operand" "0,x")
13559 (parallel [(const_int 0) (const_int 2)
13560 (const_int 4) (const_int 6)
13561 (const_int 8) (const_int 10)
13562 (const_int 12) (const_int 14)])))
13565 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13566 (parallel [(const_int 0) (const_int 2)
13567 (const_int 4) (const_int 6)
13568 (const_int 8) (const_int 10)
13569 (const_int 12) (const_int 14)]))))
13572 (vec_select:V8QI (match_dup 1)
13573 (parallel [(const_int 1) (const_int 3)
13574 (const_int 5) (const_int 7)
13575 (const_int 9) (const_int 11)
13576 (const_int 13) (const_int 15)])))
13578 (vec_select:V8QI (match_dup 2)
13579 (parallel [(const_int 1) (const_int 3)
13580 (const_int 5) (const_int 7)
13581 (const_int 9) (const_int 11)
13582 (const_int 13) (const_int 15)]))))))]
13585 pmaddubsw\t{%2, %0|%0, %2}
13586 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13587 [(set_attr "isa" "noavx,avx")
13588 (set_attr "type" "sseiadd")
13589 (set_attr "atom_unit" "simul")
13590 (set_attr "prefix_data16" "1,*")
13591 (set_attr "prefix_extra" "1")
13592 (set_attr "prefix" "orig,vex")
13593 (set_attr "mode" "TI")])
13595 (define_insn "ssse3_pmaddubsw"
13596 [(set (match_operand:V4HI 0 "register_operand" "=y")
13601 (match_operand:V8QI 1 "register_operand" "0")
13602 (parallel [(const_int 0) (const_int 2)
13603 (const_int 4) (const_int 6)])))
13606 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13607 (parallel [(const_int 0) (const_int 2)
13608 (const_int 4) (const_int 6)]))))
13611 (vec_select:V4QI (match_dup 1)
13612 (parallel [(const_int 1) (const_int 3)
13613 (const_int 5) (const_int 7)])))
13615 (vec_select:V4QI (match_dup 2)
13616 (parallel [(const_int 1) (const_int 3)
13617 (const_int 5) (const_int 7)]))))))]
13619 "pmaddubsw\t{%2, %0|%0, %2}"
13620 [(set_attr "type" "sseiadd")
13621 (set_attr "atom_unit" "simul")
13622 (set_attr "prefix_extra" "1")
13623 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13624 (set_attr "mode" "DI")])
13626 (define_mode_iterator PMULHRSW
13627 [V4HI V8HI (V16HI "TARGET_AVX2")])
13629 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13630 [(set (match_operand:PMULHRSW 0 "register_operand")
13631 (vec_merge:PMULHRSW
13633 (lshiftrt:<ssedoublemode>
13634 (plus:<ssedoublemode>
13635 (lshiftrt:<ssedoublemode>
13636 (mult:<ssedoublemode>
13637 (sign_extend:<ssedoublemode>
13638 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13639 (sign_extend:<ssedoublemode>
13640 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13644 (match_operand:PMULHRSW 3 "register_operand")
13645 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13646 "TARGET_AVX512BW && TARGET_AVX512VL"
13648 operands[5] = CONST1_RTX(<MODE>mode);
13649 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13652 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13653 [(set (match_operand:PMULHRSW 0 "register_operand")
13655 (lshiftrt:<ssedoublemode>
13656 (plus:<ssedoublemode>
13657 (lshiftrt:<ssedoublemode>
13658 (mult:<ssedoublemode>
13659 (sign_extend:<ssedoublemode>
13660 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13661 (sign_extend:<ssedoublemode>
13662 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13668 operands[3] = CONST1_RTX(<MODE>mode);
13669 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13672 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13673 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13675 (lshiftrt:<ssedoublemode>
13676 (plus:<ssedoublemode>
13677 (lshiftrt:<ssedoublemode>
13678 (mult:<ssedoublemode>
13679 (sign_extend:<ssedoublemode>
13680 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13681 (sign_extend:<ssedoublemode>
13682 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13684 (match_operand:VI2_AVX2 3 "const1_operand"))
13686 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13687 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13689 pmulhrsw\t{%2, %0|%0, %2}
13690 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13691 [(set_attr "isa" "noavx,avx")
13692 (set_attr "type" "sseimul")
13693 (set_attr "prefix_data16" "1,*")
13694 (set_attr "prefix_extra" "1")
13695 (set_attr "prefix" "orig,maybe_evex")
13696 (set_attr "mode" "<sseinsnmode>")])
13698 (define_insn "*ssse3_pmulhrswv4hi3"
13699 [(set (match_operand:V4HI 0 "register_operand" "=y")
13706 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13708 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13710 (match_operand:V4HI 3 "const1_operand"))
13712 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13713 "pmulhrsw\t{%2, %0|%0, %2}"
13714 [(set_attr "type" "sseimul")
13715 (set_attr "prefix_extra" "1")
13716 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13717 (set_attr "mode" "DI")])
13719 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13720 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13722 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13723 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
13725 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13727 pshufb\t{%2, %0|%0, %2}
13728 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13729 [(set_attr "isa" "noavx,avx")
13730 (set_attr "type" "sselog1")
13731 (set_attr "prefix_data16" "1,*")
13732 (set_attr "prefix_extra" "1")
13733 (set_attr "prefix" "orig,maybe_evex")
13734 (set_attr "btver2_decode" "vector,vector")
13735 (set_attr "mode" "<sseinsnmode>")])
13737 (define_insn "ssse3_pshufbv8qi3"
13738 [(set (match_operand:V8QI 0 "register_operand" "=y")
13739 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13740 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13743 "pshufb\t{%2, %0|%0, %2}";
13744 [(set_attr "type" "sselog1")
13745 (set_attr "prefix_extra" "1")
13746 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13747 (set_attr "mode" "DI")])
13749 (define_insn "<ssse3_avx2>_psign<mode>3"
13750 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13752 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13753 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13757 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13758 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13759 [(set_attr "isa" "noavx,avx")
13760 (set_attr "type" "sselog1")
13761 (set_attr "prefix_data16" "1,*")
13762 (set_attr "prefix_extra" "1")
13763 (set_attr "prefix" "orig,vex")
13764 (set_attr "mode" "<sseinsnmode>")])
13766 (define_insn "ssse3_psign<mode>3"
13767 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13769 [(match_operand:MMXMODEI 1 "register_operand" "0")
13770 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13773 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13774 [(set_attr "type" "sselog1")
13775 (set_attr "prefix_extra" "1")
13776 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13777 (set_attr "mode" "DI")])
13779 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
13780 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13781 (vec_merge:VI1_AVX512
13783 [(match_operand:VI1_AVX512 1 "register_operand" "v")
13784 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
13785 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13787 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
13788 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13789 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13791 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13792 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13794 [(set_attr "type" "sseishft")
13795 (set_attr "atom_unit" "sishuf")
13796 (set_attr "prefix_extra" "1")
13797 (set_attr "length_immediate" "1")
13798 (set_attr "prefix" "evex")
13799 (set_attr "mode" "<sseinsnmode>")])
13801 (define_insn "<ssse3_avx2>_palignr<mode>"
13802 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13803 (unspec:SSESCALARMODE
13804 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13805 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13806 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13810 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13812 switch (which_alternative)
13815 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13817 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13819 gcc_unreachable ();
13822 [(set_attr "isa" "noavx,avx")
13823 (set_attr "type" "sseishft")
13824 (set_attr "atom_unit" "sishuf")
13825 (set_attr "prefix_data16" "1,*")
13826 (set_attr "prefix_extra" "1")
13827 (set_attr "length_immediate" "1")
13828 (set_attr "prefix" "orig,vex")
13829 (set_attr "mode" "<sseinsnmode>")])
13831 (define_insn "ssse3_palignrdi"
13832 [(set (match_operand:DI 0 "register_operand" "=y")
13833 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13834 (match_operand:DI 2 "nonimmediate_operand" "ym")
13835 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13839 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13840 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13842 [(set_attr "type" "sseishft")
13843 (set_attr "atom_unit" "sishuf")
13844 (set_attr "prefix_extra" "1")
13845 (set_attr "length_immediate" "1")
13846 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13847 (set_attr "mode" "DI")])
13849 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13850 ;; modes for abs instruction on pre AVX-512 targets.
13851 (define_mode_iterator VI1248_AVX512VL_AVX512BW
13852 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13853 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13854 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13855 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13857 (define_insn "*abs<mode>2"
13858 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13859 (abs:VI1248_AVX512VL_AVX512BW
13860 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
13862 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
13863 [(set_attr "type" "sselog1")
13864 (set_attr "prefix_data16" "1")
13865 (set_attr "prefix_extra" "1")
13866 (set_attr "prefix" "maybe_vex")
13867 (set_attr "mode" "<sseinsnmode>")])
13869 (define_insn "abs<mode>2_mask"
13870 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13871 (vec_merge:VI48_AVX512VL
13873 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13874 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13875 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13877 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13878 [(set_attr "type" "sselog1")
13879 (set_attr "prefix" "evex")
13880 (set_attr "mode" "<sseinsnmode>")])
13882 (define_insn "abs<mode>2_mask"
13883 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13884 (vec_merge:VI12_AVX512VL
13886 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
13887 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
13888 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13890 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13891 [(set_attr "type" "sselog1")
13892 (set_attr "prefix" "evex")
13893 (set_attr "mode" "<sseinsnmode>")])
13895 (define_expand "abs<mode>2"
13896 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
13897 (abs:VI1248_AVX512VL_AVX512BW
13898 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
13903 ix86_expand_sse2_abs (operands[0], operands[1]);
13908 (define_insn "abs<mode>2"
13909 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13911 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13913 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13914 [(set_attr "type" "sselog1")
13915 (set_attr "prefix_rep" "0")
13916 (set_attr "prefix_extra" "1")
13917 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13918 (set_attr "mode" "DI")])
13920 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13922 ;; AMD SSE4A instructions
13924 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13926 (define_insn "sse4a_movnt<mode>"
13927 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13929 [(match_operand:MODEF 1 "register_operand" "x")]
13932 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13933 [(set_attr "type" "ssemov")
13934 (set_attr "mode" "<MODE>")])
13936 (define_insn "sse4a_vmmovnt<mode>"
13937 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13938 (unspec:<ssescalarmode>
13939 [(vec_select:<ssescalarmode>
13940 (match_operand:VF_128 1 "register_operand" "x")
13941 (parallel [(const_int 0)]))]
13944 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13945 [(set_attr "type" "ssemov")
13946 (set_attr "mode" "<ssescalarmode>")])
13948 (define_insn "sse4a_extrqi"
13949 [(set (match_operand:V2DI 0 "register_operand" "=x")
13950 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13951 (match_operand 2 "const_0_to_255_operand")
13952 (match_operand 3 "const_0_to_255_operand")]
13955 "extrq\t{%3, %2, %0|%0, %2, %3}"
13956 [(set_attr "type" "sse")
13957 (set_attr "prefix_data16" "1")
13958 (set_attr "length_immediate" "2")
13959 (set_attr "mode" "TI")])
13961 (define_insn "sse4a_extrq"
13962 [(set (match_operand:V2DI 0 "register_operand" "=x")
13963 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13964 (match_operand:V16QI 2 "register_operand" "x")]
13967 "extrq\t{%2, %0|%0, %2}"
13968 [(set_attr "type" "sse")
13969 (set_attr "prefix_data16" "1")
13970 (set_attr "mode" "TI")])
13972 (define_insn "sse4a_insertqi"
13973 [(set (match_operand:V2DI 0 "register_operand" "=x")
13974 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13975 (match_operand:V2DI 2 "register_operand" "x")
13976 (match_operand 3 "const_0_to_255_operand")
13977 (match_operand 4 "const_0_to_255_operand")]
13980 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
13981 [(set_attr "type" "sseins")
13982 (set_attr "prefix_data16" "0")
13983 (set_attr "prefix_rep" "1")
13984 (set_attr "length_immediate" "2")
13985 (set_attr "mode" "TI")])
13987 (define_insn "sse4a_insertq"
13988 [(set (match_operand:V2DI 0 "register_operand" "=x")
13989 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13990 (match_operand:V2DI 2 "register_operand" "x")]
13993 "insertq\t{%2, %0|%0, %2}"
13994 [(set_attr "type" "sseins")
13995 (set_attr "prefix_data16" "0")
13996 (set_attr "prefix_rep" "1")
13997 (set_attr "mode" "TI")])
13999 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14001 ;; Intel SSE4.1 instructions
14003 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14005 ;; Mapping of immediate bits for blend instructions
14006 (define_mode_attr blendbits
14007 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14009 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14010 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14011 (vec_merge:VF_128_256
14012 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14013 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14014 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14017 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14018 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14019 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14020 [(set_attr "isa" "noavx,noavx,avx")
14021 (set_attr "type" "ssemov")
14022 (set_attr "length_immediate" "1")
14023 (set_attr "prefix_data16" "1,1,*")
14024 (set_attr "prefix_extra" "1")
14025 (set_attr "prefix" "orig,orig,vex")
14026 (set_attr "mode" "<MODE>")])
14028 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14029 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14031 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14032 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14033 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14037 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14038 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14039 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14040 [(set_attr "isa" "noavx,noavx,avx")
14041 (set_attr "type" "ssemov")
14042 (set_attr "length_immediate" "1")
14043 (set_attr "prefix_data16" "1,1,*")
14044 (set_attr "prefix_extra" "1")
14045 (set_attr "prefix" "orig,orig,vex")
14046 (set_attr "btver2_decode" "vector,vector,vector")
14047 (set_attr "mode" "<MODE>")])
14049 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14050 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14052 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14053 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14054 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14058 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14059 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14060 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14061 [(set_attr "isa" "noavx,noavx,avx")
14062 (set_attr "type" "ssemul")
14063 (set_attr "length_immediate" "1")
14064 (set_attr "prefix_data16" "1,1,*")
14065 (set_attr "prefix_extra" "1")
14066 (set_attr "prefix" "orig,orig,vex")
14067 (set_attr "btver2_decode" "vector,vector,vector")
14068 (set_attr "mode" "<MODE>")])
14070 ;; Mode attribute used by `vmovntdqa' pattern
14071 (define_mode_attr vi8_sse4_1_avx2_avx512
14072 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14074 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14075 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14076 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14079 "%vmovntdqa\t{%1, %0|%0, %1}"
14080 [(set_attr "type" "ssemov")
14081 (set_attr "prefix_extra" "1,1,*")
14082 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14083 (set_attr "mode" "<sseinsnmode>")])
14085 (define_insn "<sse4_1_avx2>_mpsadbw"
14086 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14088 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14089 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14090 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14094 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14095 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14096 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14097 [(set_attr "isa" "noavx,noavx,avx")
14098 (set_attr "type" "sselog1")
14099 (set_attr "length_immediate" "1")
14100 (set_attr "prefix_extra" "1")
14101 (set_attr "prefix" "orig,orig,vex")
14102 (set_attr "btver2_decode" "vector,vector,vector")
14103 (set_attr "mode" "<sseinsnmode>")])
14105 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14106 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14107 (vec_concat:VI2_AVX2
14108 (us_truncate:<ssehalfvecmode>
14109 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14110 (us_truncate:<ssehalfvecmode>
14111 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
14112 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14114 packusdw\t{%2, %0|%0, %2}
14115 packusdw\t{%2, %0|%0, %2}
14116 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14117 [(set_attr "isa" "noavx,noavx,avx")
14118 (set_attr "type" "sselog")
14119 (set_attr "prefix_extra" "1")
14120 (set_attr "prefix" "orig,orig,maybe_evex")
14121 (set_attr "mode" "<sseinsnmode>")])
14123 (define_insn "<sse4_1_avx2>_pblendvb"
14124 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14126 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14127 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14128 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14132 pblendvb\t{%3, %2, %0|%0, %2, %3}
14133 pblendvb\t{%3, %2, %0|%0, %2, %3}
14134 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14135 [(set_attr "isa" "noavx,noavx,avx")
14136 (set_attr "type" "ssemov")
14137 (set_attr "prefix_extra" "1")
14138 (set_attr "length_immediate" "*,*,1")
14139 (set_attr "prefix" "orig,orig,vex")
14140 (set_attr "btver2_decode" "vector,vector,vector")
14141 (set_attr "mode" "<sseinsnmode>")])
14143 (define_insn "sse4_1_pblendw"
14144 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14146 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14147 (match_operand:V8HI 1 "register_operand" "0,0,x")
14148 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14151 pblendw\t{%3, %2, %0|%0, %2, %3}
14152 pblendw\t{%3, %2, %0|%0, %2, %3}
14153 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14154 [(set_attr "isa" "noavx,noavx,avx")
14155 (set_attr "type" "ssemov")
14156 (set_attr "prefix_extra" "1")
14157 (set_attr "length_immediate" "1")
14158 (set_attr "prefix" "orig,orig,vex")
14159 (set_attr "mode" "TI")])
14161 ;; The builtin uses an 8-bit immediate. Expand that.
14162 (define_expand "avx2_pblendw"
14163 [(set (match_operand:V16HI 0 "register_operand")
14165 (match_operand:V16HI 2 "nonimmediate_operand")
14166 (match_operand:V16HI 1 "register_operand")
14167 (match_operand:SI 3 "const_0_to_255_operand")))]
14170 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14171 operands[3] = GEN_INT (val << 8 | val);
14174 (define_insn "*avx2_pblendw"
14175 [(set (match_operand:V16HI 0 "register_operand" "=x")
14177 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14178 (match_operand:V16HI 1 "register_operand" "x")
14179 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14182 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14183 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14185 [(set_attr "type" "ssemov")
14186 (set_attr "prefix_extra" "1")
14187 (set_attr "length_immediate" "1")
14188 (set_attr "prefix" "vex")
14189 (set_attr "mode" "OI")])
14191 (define_insn "avx2_pblendd<mode>"
14192 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14193 (vec_merge:VI4_AVX2
14194 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14195 (match_operand:VI4_AVX2 1 "register_operand" "x")
14196 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14198 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14199 [(set_attr "type" "ssemov")
14200 (set_attr "prefix_extra" "1")
14201 (set_attr "length_immediate" "1")
14202 (set_attr "prefix" "vex")
14203 (set_attr "mode" "<sseinsnmode>")])
14205 (define_insn "sse4_1_phminposuw"
14206 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14207 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
14208 UNSPEC_PHMINPOSUW))]
14210 "%vphminposuw\t{%1, %0|%0, %1}"
14211 [(set_attr "type" "sselog1")
14212 (set_attr "prefix_extra" "1")
14213 (set_attr "prefix" "maybe_vex")
14214 (set_attr "mode" "TI")])
14216 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14217 [(set (match_operand:V16HI 0 "register_operand" "=v")
14219 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14220 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14221 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14222 [(set_attr "type" "ssemov")
14223 (set_attr "prefix_extra" "1")
14224 (set_attr "prefix" "maybe_evex")
14225 (set_attr "mode" "OI")])
14227 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14228 [(set (match_operand:V32HI 0 "register_operand" "=v")
14230 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14232 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14233 [(set_attr "type" "ssemov")
14234 (set_attr "prefix_extra" "1")
14235 (set_attr "prefix" "evex")
14236 (set_attr "mode" "XI")])
14238 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14239 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14242 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14243 (parallel [(const_int 0) (const_int 1)
14244 (const_int 2) (const_int 3)
14245 (const_int 4) (const_int 5)
14246 (const_int 6) (const_int 7)]))))]
14247 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14248 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14249 [(set_attr "type" "ssemov")
14250 (set_attr "ssememalign" "64")
14251 (set_attr "prefix_extra" "1")
14252 (set_attr "prefix" "maybe_vex")
14253 (set_attr "mode" "TI")])
14255 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14256 [(set (match_operand:V16SI 0 "register_operand" "=v")
14258 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14260 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14261 [(set_attr "type" "ssemov")
14262 (set_attr "prefix" "evex")
14263 (set_attr "mode" "XI")])
14265 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14266 [(set (match_operand:V8SI 0 "register_operand" "=v")
14269 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14270 (parallel [(const_int 0) (const_int 1)
14271 (const_int 2) (const_int 3)
14272 (const_int 4) (const_int 5)
14273 (const_int 6) (const_int 7)]))))]
14274 "TARGET_AVX2 && <mask_avx512vl_condition>"
14275 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14276 [(set_attr "type" "ssemov")
14277 (set_attr "prefix_extra" "1")
14278 (set_attr "prefix" "maybe_evex")
14279 (set_attr "mode" "OI")])
14281 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14282 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14285 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14286 (parallel [(const_int 0) (const_int 1)
14287 (const_int 2) (const_int 3)]))))]
14288 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14289 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14290 [(set_attr "type" "ssemov")
14291 (set_attr "ssememalign" "32")
14292 (set_attr "prefix_extra" "1")
14293 (set_attr "prefix" "maybe_vex")
14294 (set_attr "mode" "TI")])
14296 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14297 [(set (match_operand:V16SI 0 "register_operand" "=v")
14299 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14301 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14302 [(set_attr "type" "ssemov")
14303 (set_attr "prefix" "evex")
14304 (set_attr "mode" "XI")])
14306 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14307 [(set (match_operand:V8SI 0 "register_operand" "=v")
14309 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14310 "TARGET_AVX2 && <mask_avx512vl_condition>"
14311 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14312 [(set_attr "type" "ssemov")
14313 (set_attr "prefix_extra" "1")
14314 (set_attr "prefix" "maybe_evex")
14315 (set_attr "mode" "OI")])
14317 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14318 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14321 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14322 (parallel [(const_int 0) (const_int 1)
14323 (const_int 2) (const_int 3)]))))]
14324 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14325 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14326 [(set_attr "type" "ssemov")
14327 (set_attr "ssememalign" "64")
14328 (set_attr "prefix_extra" "1")
14329 (set_attr "prefix" "maybe_vex")
14330 (set_attr "mode" "TI")])
14332 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14333 [(set (match_operand:V8DI 0 "register_operand" "=v")
14336 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14337 (parallel [(const_int 0) (const_int 1)
14338 (const_int 2) (const_int 3)
14339 (const_int 4) (const_int 5)
14340 (const_int 6) (const_int 7)]))))]
14342 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14343 [(set_attr "type" "ssemov")
14344 (set_attr "prefix" "evex")
14345 (set_attr "mode" "XI")])
14347 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14348 [(set (match_operand:V4DI 0 "register_operand" "=v")
14351 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14352 (parallel [(const_int 0) (const_int 1)
14353 (const_int 2) (const_int 3)]))))]
14354 "TARGET_AVX2 && <mask_avx512vl_condition>"
14355 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14356 [(set_attr "type" "ssemov")
14357 (set_attr "prefix_extra" "1")
14358 (set_attr "prefix" "maybe_evex")
14359 (set_attr "mode" "OI")])
14361 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14362 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14365 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14366 (parallel [(const_int 0) (const_int 1)]))))]
14367 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14368 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14369 [(set_attr "type" "ssemov")
14370 (set_attr "ssememalign" "16")
14371 (set_attr "prefix_extra" "1")
14372 (set_attr "prefix" "maybe_vex")
14373 (set_attr "mode" "TI")])
14375 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14376 [(set (match_operand:V8DI 0 "register_operand" "=v")
14378 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14380 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14381 [(set_attr "type" "ssemov")
14382 (set_attr "prefix" "evex")
14383 (set_attr "mode" "XI")])
14385 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14386 [(set (match_operand:V4DI 0 "register_operand" "=v")
14389 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14390 (parallel [(const_int 0) (const_int 1)
14391 (const_int 2) (const_int 3)]))))]
14392 "TARGET_AVX2 && <mask_avx512vl_condition>"
14393 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14394 [(set_attr "type" "ssemov")
14395 (set_attr "prefix_extra" "1")
14396 (set_attr "prefix" "maybe_evex")
14397 (set_attr "mode" "OI")])
14399 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14400 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14403 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14404 (parallel [(const_int 0) (const_int 1)]))))]
14405 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14406 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14407 [(set_attr "type" "ssemov")
14408 (set_attr "ssememalign" "32")
14409 (set_attr "prefix_extra" "1")
14410 (set_attr "prefix" "maybe_vex")
14411 (set_attr "mode" "TI")])
14413 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14414 [(set (match_operand:V8DI 0 "register_operand" "=v")
14416 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14418 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14419 [(set_attr "type" "ssemov")
14420 (set_attr "prefix" "evex")
14421 (set_attr "mode" "XI")])
14423 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14424 [(set (match_operand:V4DI 0 "register_operand" "=v")
14426 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14427 "TARGET_AVX2 && <mask_avx512vl_condition>"
14428 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14429 [(set_attr "type" "ssemov")
14430 (set_attr "prefix" "maybe_evex")
14431 (set_attr "prefix_extra" "1")
14432 (set_attr "mode" "OI")])
14434 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14435 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14438 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14439 (parallel [(const_int 0) (const_int 1)]))))]
14440 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14441 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14442 [(set_attr "type" "ssemov")
14443 (set_attr "ssememalign" "64")
14444 (set_attr "prefix_extra" "1")
14445 (set_attr "prefix" "maybe_vex")
14446 (set_attr "mode" "TI")])
14448 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14449 ;; setting FLAGS_REG. But it is not a really compare instruction.
14450 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14451 [(set (reg:CC FLAGS_REG)
14452 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14453 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14456 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14457 [(set_attr "type" "ssecomi")
14458 (set_attr "prefix_extra" "1")
14459 (set_attr "prefix" "vex")
14460 (set_attr "mode" "<MODE>")])
14462 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14463 ;; But it is not a really compare instruction.
14464 (define_insn "avx_ptest256"
14465 [(set (reg:CC FLAGS_REG)
14466 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14467 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14470 "vptest\t{%1, %0|%0, %1}"
14471 [(set_attr "type" "ssecomi")
14472 (set_attr "prefix_extra" "1")
14473 (set_attr "prefix" "vex")
14474 (set_attr "btver2_decode" "vector")
14475 (set_attr "mode" "OI")])
14477 (define_insn "sse4_1_ptest"
14478 [(set (reg:CC FLAGS_REG)
14479 (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
14480 (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
14483 "%vptest\t{%1, %0|%0, %1}"
14484 [(set_attr "type" "ssecomi")
14485 (set_attr "prefix_extra" "1")
14486 (set_attr "prefix" "maybe_vex")
14487 (set_attr "mode" "TI")])
14489 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14490 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14492 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
14493 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14496 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14497 [(set_attr "type" "ssecvt")
14498 (set (attr "prefix_data16")
14500 (match_test "TARGET_AVX")
14502 (const_string "1")))
14503 (set_attr "prefix_extra" "1")
14504 (set_attr "length_immediate" "1")
14505 (set_attr "prefix" "maybe_vex")
14506 (set_attr "mode" "<MODE>")])
14508 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14509 [(match_operand:<sseintvecmode> 0 "register_operand")
14510 (match_operand:VF1_128_256 1 "nonimmediate_operand")
14511 (match_operand:SI 2 "const_0_to_15_operand")]
14514 rtx tmp = gen_reg_rtx (<MODE>mode);
14517 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14520 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14524 (define_expand "avx512f_roundpd512"
14525 [(match_operand:V8DF 0 "register_operand")
14526 (match_operand:V8DF 1 "nonimmediate_operand")
14527 (match_operand:SI 2 "const_0_to_15_operand")]
14530 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14534 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14535 [(match_operand:<ssepackfltmode> 0 "register_operand")
14536 (match_operand:VF2 1 "nonimmediate_operand")
14537 (match_operand:VF2 2 "nonimmediate_operand")
14538 (match_operand:SI 3 "const_0_to_15_operand")]
14543 if (<MODE>mode == V2DFmode
14544 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14546 rtx tmp2 = gen_reg_rtx (V4DFmode);
14548 tmp0 = gen_reg_rtx (V4DFmode);
14549 tmp1 = force_reg (V2DFmode, operands[1]);
14551 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14552 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14553 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14557 tmp0 = gen_reg_rtx (<MODE>mode);
14558 tmp1 = gen_reg_rtx (<MODE>mode);
14561 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14564 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14567 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14572 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14573 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
14576 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
14577 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
14579 (match_operand:VF_128 1 "register_operand" "0,0,x")
14583 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14584 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14585 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14586 [(set_attr "isa" "noavx,noavx,avx")
14587 (set_attr "type" "ssecvt")
14588 (set_attr "length_immediate" "1")
14589 (set_attr "prefix_data16" "1,1,*")
14590 (set_attr "prefix_extra" "1")
14591 (set_attr "prefix" "orig,orig,vex")
14592 (set_attr "mode" "<MODE>")])
14594 (define_expand "round<mode>2"
14595 [(set (match_dup 4)
14597 (match_operand:VF 1 "register_operand")
14599 (set (match_operand:VF 0 "register_operand")
14601 [(match_dup 4) (match_dup 5)]
14603 "TARGET_ROUND && !flag_trapping_math"
14605 machine_mode scalar_mode;
14606 const struct real_format *fmt;
14607 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14608 rtx half, vec_half;
14610 scalar_mode = GET_MODE_INNER (<MODE>mode);
14612 /* load nextafter (0.5, 0.0) */
14613 fmt = REAL_MODE_FORMAT (scalar_mode);
14614 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14615 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14616 half = const_double_from_real_value (pred_half, scalar_mode);
14618 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14619 vec_half = force_reg (<MODE>mode, vec_half);
14621 operands[3] = gen_reg_rtx (<MODE>mode);
14622 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14624 operands[4] = gen_reg_rtx (<MODE>mode);
14625 operands[5] = GEN_INT (ROUND_TRUNC);
14628 (define_expand "round<mode>2_sfix"
14629 [(match_operand:<sseintvecmode> 0 "register_operand")
14630 (match_operand:VF1_128_256 1 "register_operand")]
14631 "TARGET_ROUND && !flag_trapping_math"
14633 rtx tmp = gen_reg_rtx (<MODE>mode);
14635 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14638 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14642 (define_expand "round<mode>2_vec_pack_sfix"
14643 [(match_operand:<ssepackfltmode> 0 "register_operand")
14644 (match_operand:VF2 1 "register_operand")
14645 (match_operand:VF2 2 "register_operand")]
14646 "TARGET_ROUND && !flag_trapping_math"
14650 if (<MODE>mode == V2DFmode
14651 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14653 rtx tmp2 = gen_reg_rtx (V4DFmode);
14655 tmp0 = gen_reg_rtx (V4DFmode);
14656 tmp1 = force_reg (V2DFmode, operands[1]);
14658 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14659 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14660 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14664 tmp0 = gen_reg_rtx (<MODE>mode);
14665 tmp1 = gen_reg_rtx (<MODE>mode);
14667 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14668 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14671 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14676 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14678 ;; Intel SSE4.2 string/text processing instructions
14680 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14682 (define_insn_and_split "sse4_2_pcmpestr"
14683 [(set (match_operand:SI 0 "register_operand" "=c,c")
14685 [(match_operand:V16QI 2 "register_operand" "x,x")
14686 (match_operand:SI 3 "register_operand" "a,a")
14687 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14688 (match_operand:SI 5 "register_operand" "d,d")
14689 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14691 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14699 (set (reg:CC FLAGS_REG)
14708 && can_create_pseudo_p ()"
14713 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14714 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14715 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14718 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14719 operands[3], operands[4],
14720 operands[5], operands[6]));
14722 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14723 operands[3], operands[4],
14724 operands[5], operands[6]));
14725 if (flags && !(ecx || xmm0))
14726 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14727 operands[2], operands[3],
14728 operands[4], operands[5],
14730 if (!(flags || ecx || xmm0))
14731 emit_note (NOTE_INSN_DELETED);
14735 [(set_attr "type" "sselog")
14736 (set_attr "prefix_data16" "1")
14737 (set_attr "prefix_extra" "1")
14738 (set_attr "ssememalign" "8")
14739 (set_attr "length_immediate" "1")
14740 (set_attr "memory" "none,load")
14741 (set_attr "mode" "TI")])
14743 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14744 [(set (match_operand:SI 0 "register_operand" "=c")
14746 [(match_operand:V16QI 2 "register_operand" "x")
14747 (match_operand:SI 3 "register_operand" "a")
14749 [(match_operand:V16QI 4 "memory_operand" "m")]
14751 (match_operand:SI 5 "register_operand" "d")
14752 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14754 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14758 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14762 (set (reg:CC FLAGS_REG)
14766 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14771 && can_create_pseudo_p ()"
14776 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14777 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14778 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14781 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14782 operands[3], operands[4],
14783 operands[5], operands[6]));
14785 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14786 operands[3], operands[4],
14787 operands[5], operands[6]));
14788 if (flags && !(ecx || xmm0))
14789 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14790 operands[2], operands[3],
14791 operands[4], operands[5],
14793 if (!(flags || ecx || xmm0))
14794 emit_note (NOTE_INSN_DELETED);
14798 [(set_attr "type" "sselog")
14799 (set_attr "prefix_data16" "1")
14800 (set_attr "prefix_extra" "1")
14801 (set_attr "ssememalign" "8")
14802 (set_attr "length_immediate" "1")
14803 (set_attr "memory" "load")
14804 (set_attr "mode" "TI")])
14806 (define_insn "sse4_2_pcmpestri"
14807 [(set (match_operand:SI 0 "register_operand" "=c,c")
14809 [(match_operand:V16QI 1 "register_operand" "x,x")
14810 (match_operand:SI 2 "register_operand" "a,a")
14811 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14812 (match_operand:SI 4 "register_operand" "d,d")
14813 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14815 (set (reg:CC FLAGS_REG)
14824 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14825 [(set_attr "type" "sselog")
14826 (set_attr "prefix_data16" "1")
14827 (set_attr "prefix_extra" "1")
14828 (set_attr "prefix" "maybe_vex")
14829 (set_attr "ssememalign" "8")
14830 (set_attr "length_immediate" "1")
14831 (set_attr "btver2_decode" "vector")
14832 (set_attr "memory" "none,load")
14833 (set_attr "mode" "TI")])
14835 (define_insn "sse4_2_pcmpestrm"
14836 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14838 [(match_operand:V16QI 1 "register_operand" "x,x")
14839 (match_operand:SI 2 "register_operand" "a,a")
14840 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14841 (match_operand:SI 4 "register_operand" "d,d")
14842 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14844 (set (reg:CC FLAGS_REG)
14853 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14854 [(set_attr "type" "sselog")
14855 (set_attr "prefix_data16" "1")
14856 (set_attr "prefix_extra" "1")
14857 (set_attr "ssememalign" "8")
14858 (set_attr "length_immediate" "1")
14859 (set_attr "prefix" "maybe_vex")
14860 (set_attr "btver2_decode" "vector")
14861 (set_attr "memory" "none,load")
14862 (set_attr "mode" "TI")])
14864 (define_insn "sse4_2_pcmpestr_cconly"
14865 [(set (reg:CC FLAGS_REG)
14867 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14868 (match_operand:SI 3 "register_operand" "a,a,a,a")
14869 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14870 (match_operand:SI 5 "register_operand" "d,d,d,d")
14871 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14873 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14874 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14877 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14878 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14879 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14880 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14881 [(set_attr "type" "sselog")
14882 (set_attr "prefix_data16" "1")
14883 (set_attr "prefix_extra" "1")
14884 (set_attr "ssememalign" "8")
14885 (set_attr "length_immediate" "1")
14886 (set_attr "memory" "none,load,none,load")
14887 (set_attr "btver2_decode" "vector,vector,vector,vector")
14888 (set_attr "prefix" "maybe_vex")
14889 (set_attr "mode" "TI")])
14891 (define_insn_and_split "sse4_2_pcmpistr"
14892 [(set (match_operand:SI 0 "register_operand" "=c,c")
14894 [(match_operand:V16QI 2 "register_operand" "x,x")
14895 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14896 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14898 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14904 (set (reg:CC FLAGS_REG)
14911 && can_create_pseudo_p ()"
14916 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14917 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14918 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14921 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14922 operands[3], operands[4]));
14924 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14925 operands[3], operands[4]));
14926 if (flags && !(ecx || xmm0))
14927 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14928 operands[2], operands[3],
14930 if (!(flags || ecx || xmm0))
14931 emit_note (NOTE_INSN_DELETED);
14935 [(set_attr "type" "sselog")
14936 (set_attr "prefix_data16" "1")
14937 (set_attr "prefix_extra" "1")
14938 (set_attr "ssememalign" "8")
14939 (set_attr "length_immediate" "1")
14940 (set_attr "memory" "none,load")
14941 (set_attr "mode" "TI")])
14943 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14944 [(set (match_operand:SI 0 "register_operand" "=c")
14946 [(match_operand:V16QI 2 "register_operand" "x")
14948 [(match_operand:V16QI 3 "memory_operand" "m")]
14950 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14952 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14955 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14958 (set (reg:CC FLAGS_REG)
14961 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14965 && can_create_pseudo_p ()"
14970 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14971 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14972 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14975 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14976 operands[3], operands[4]));
14978 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14979 operands[3], operands[4]));
14980 if (flags && !(ecx || xmm0))
14981 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14982 operands[2], operands[3],
14984 if (!(flags || ecx || xmm0))
14985 emit_note (NOTE_INSN_DELETED);
14989 [(set_attr "type" "sselog")
14990 (set_attr "prefix_data16" "1")
14991 (set_attr "prefix_extra" "1")
14992 (set_attr "ssememalign" "8")
14993 (set_attr "length_immediate" "1")
14994 (set_attr "memory" "load")
14995 (set_attr "mode" "TI")])
14997 (define_insn "sse4_2_pcmpistri"
14998 [(set (match_operand:SI 0 "register_operand" "=c,c")
15000 [(match_operand:V16QI 1 "register_operand" "x,x")
15001 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15002 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15004 (set (reg:CC FLAGS_REG)
15011 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15012 [(set_attr "type" "sselog")
15013 (set_attr "prefix_data16" "1")
15014 (set_attr "prefix_extra" "1")
15015 (set_attr "ssememalign" "8")
15016 (set_attr "length_immediate" "1")
15017 (set_attr "prefix" "maybe_vex")
15018 (set_attr "memory" "none,load")
15019 (set_attr "btver2_decode" "vector")
15020 (set_attr "mode" "TI")])
15022 (define_insn "sse4_2_pcmpistrm"
15023 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15025 [(match_operand:V16QI 1 "register_operand" "x,x")
15026 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15027 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15029 (set (reg:CC FLAGS_REG)
15036 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15037 [(set_attr "type" "sselog")
15038 (set_attr "prefix_data16" "1")
15039 (set_attr "prefix_extra" "1")
15040 (set_attr "ssememalign" "8")
15041 (set_attr "length_immediate" "1")
15042 (set_attr "prefix" "maybe_vex")
15043 (set_attr "memory" "none,load")
15044 (set_attr "btver2_decode" "vector")
15045 (set_attr "mode" "TI")])
15047 (define_insn "sse4_2_pcmpistr_cconly"
15048 [(set (reg:CC FLAGS_REG)
15050 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15051 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15052 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15054 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15055 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15058 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15059 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15060 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15061 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15062 [(set_attr "type" "sselog")
15063 (set_attr "prefix_data16" "1")
15064 (set_attr "prefix_extra" "1")
15065 (set_attr "ssememalign" "8")
15066 (set_attr "length_immediate" "1")
15067 (set_attr "memory" "none,load,none,load")
15068 (set_attr "prefix" "maybe_vex")
15069 (set_attr "btver2_decode" "vector,vector,vector,vector")
15070 (set_attr "mode" "TI")])
15072 ;; Packed float variants
15073 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15074 [(V8DI "V8SF") (V16SI "V16SF")])
15076 (define_expand "avx512pf_gatherpf<mode>sf"
15078 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15079 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15081 [(match_operand 2 "vsib_address_operand")
15082 (match_operand:VI48_512 1 "register_operand")
15083 (match_operand:SI 3 "const1248_operand")]))
15084 (match_operand:SI 4 "const_2_to_3_operand")]
15085 UNSPEC_GATHER_PREFETCH)]
15089 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15090 operands[3]), UNSPEC_VSIBADDR);
15093 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15095 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15096 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15098 [(match_operand:P 2 "vsib_address_operand" "Tv")
15099 (match_operand:VI48_512 1 "register_operand" "v")
15100 (match_operand:SI 3 "const1248_operand" "n")]
15102 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15103 UNSPEC_GATHER_PREFETCH)]
15106 switch (INTVAL (operands[4]))
15109 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15111 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15113 gcc_unreachable ();
15116 [(set_attr "type" "sse")
15117 (set_attr "prefix" "evex")
15118 (set_attr "mode" "XI")])
15120 (define_insn "*avx512pf_gatherpf<mode>sf"
15123 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15125 [(match_operand:P 1 "vsib_address_operand" "Tv")
15126 (match_operand:VI48_512 0 "register_operand" "v")
15127 (match_operand:SI 2 "const1248_operand" "n")]
15129 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15130 UNSPEC_GATHER_PREFETCH)]
15133 switch (INTVAL (operands[3]))
15136 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15138 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15140 gcc_unreachable ();
15143 [(set_attr "type" "sse")
15144 (set_attr "prefix" "evex")
15145 (set_attr "mode" "XI")])
15147 ;; Packed double variants
15148 (define_expand "avx512pf_gatherpf<mode>df"
15150 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15153 [(match_operand 2 "vsib_address_operand")
15154 (match_operand:VI4_256_8_512 1 "register_operand")
15155 (match_operand:SI 3 "const1248_operand")]))
15156 (match_operand:SI 4 "const_2_to_3_operand")]
15157 UNSPEC_GATHER_PREFETCH)]
15161 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15162 operands[3]), UNSPEC_VSIBADDR);
15165 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15167 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15168 (match_operator:V8DF 5 "vsib_mem_operator"
15170 [(match_operand:P 2 "vsib_address_operand" "Tv")
15171 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15172 (match_operand:SI 3 "const1248_operand" "n")]
15174 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15175 UNSPEC_GATHER_PREFETCH)]
15178 switch (INTVAL (operands[4]))
15181 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15183 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15185 gcc_unreachable ();
15188 [(set_attr "type" "sse")
15189 (set_attr "prefix" "evex")
15190 (set_attr "mode" "XI")])
15192 (define_insn "*avx512pf_gatherpf<mode>df"
15195 (match_operator:V8DF 4 "vsib_mem_operator"
15197 [(match_operand:P 1 "vsib_address_operand" "Tv")
15198 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15199 (match_operand:SI 2 "const1248_operand" "n")]
15201 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15202 UNSPEC_GATHER_PREFETCH)]
15205 switch (INTVAL (operands[3]))
15208 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15210 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15212 gcc_unreachable ();
15215 [(set_attr "type" "sse")
15216 (set_attr "prefix" "evex")
15217 (set_attr "mode" "XI")])
15219 ;; Packed float variants
15220 (define_expand "avx512pf_scatterpf<mode>sf"
15222 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15223 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15225 [(match_operand 2 "vsib_address_operand")
15226 (match_operand:VI48_512 1 "register_operand")
15227 (match_operand:SI 3 "const1248_operand")]))
15228 (match_operand:SI 4 "const2367_operand")]
15229 UNSPEC_SCATTER_PREFETCH)]
15233 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15234 operands[3]), UNSPEC_VSIBADDR);
15237 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15239 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15240 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15242 [(match_operand:P 2 "vsib_address_operand" "Tv")
15243 (match_operand:VI48_512 1 "register_operand" "v")
15244 (match_operand:SI 3 "const1248_operand" "n")]
15246 (match_operand:SI 4 "const2367_operand" "n")]
15247 UNSPEC_SCATTER_PREFETCH)]
15250 switch (INTVAL (operands[4]))
15254 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15257 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15259 gcc_unreachable ();
15262 [(set_attr "type" "sse")
15263 (set_attr "prefix" "evex")
15264 (set_attr "mode" "XI")])
15266 (define_insn "*avx512pf_scatterpf<mode>sf"
15269 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15271 [(match_operand:P 1 "vsib_address_operand" "Tv")
15272 (match_operand:VI48_512 0 "register_operand" "v")
15273 (match_operand:SI 2 "const1248_operand" "n")]
15275 (match_operand:SI 3 "const2367_operand" "n")]
15276 UNSPEC_SCATTER_PREFETCH)]
15279 switch (INTVAL (operands[3]))
15283 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15286 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15288 gcc_unreachable ();
15291 [(set_attr "type" "sse")
15292 (set_attr "prefix" "evex")
15293 (set_attr "mode" "XI")])
15295 ;; Packed double variants
15296 (define_expand "avx512pf_scatterpf<mode>df"
15298 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15301 [(match_operand 2 "vsib_address_operand")
15302 (match_operand:VI4_256_8_512 1 "register_operand")
15303 (match_operand:SI 3 "const1248_operand")]))
15304 (match_operand:SI 4 "const2367_operand")]
15305 UNSPEC_SCATTER_PREFETCH)]
15309 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15310 operands[3]), UNSPEC_VSIBADDR);
15313 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15315 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15316 (match_operator:V8DF 5 "vsib_mem_operator"
15318 [(match_operand:P 2 "vsib_address_operand" "Tv")
15319 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15320 (match_operand:SI 3 "const1248_operand" "n")]
15322 (match_operand:SI 4 "const2367_operand" "n")]
15323 UNSPEC_SCATTER_PREFETCH)]
15326 switch (INTVAL (operands[4]))
15330 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15333 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15335 gcc_unreachable ();
15338 [(set_attr "type" "sse")
15339 (set_attr "prefix" "evex")
15340 (set_attr "mode" "XI")])
15342 (define_insn "*avx512pf_scatterpf<mode>df"
15345 (match_operator:V8DF 4 "vsib_mem_operator"
15347 [(match_operand:P 1 "vsib_address_operand" "Tv")
15348 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15349 (match_operand:SI 2 "const1248_operand" "n")]
15351 (match_operand:SI 3 "const2367_operand" "n")]
15352 UNSPEC_SCATTER_PREFETCH)]
15355 switch (INTVAL (operands[3]))
15359 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15362 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15364 gcc_unreachable ();
15367 [(set_attr "type" "sse")
15368 (set_attr "prefix" "evex")
15369 (set_attr "mode" "XI")])
15371 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15372 [(set (match_operand:VF_512 0 "register_operand" "=v")
15374 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15377 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15378 [(set_attr "prefix" "evex")
15379 (set_attr "type" "sse")
15380 (set_attr "mode" "<MODE>")])
15382 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15383 [(set (match_operand:VF_512 0 "register_operand" "=v")
15385 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15388 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15389 [(set_attr "prefix" "evex")
15390 (set_attr "type" "sse")
15391 (set_attr "mode" "<MODE>")])
15393 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15394 [(set (match_operand:VF_128 0 "register_operand" "=v")
15397 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15399 (match_operand:VF_128 2 "register_operand" "v")
15402 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15403 [(set_attr "length_immediate" "1")
15404 (set_attr "prefix" "evex")
15405 (set_attr "type" "sse")
15406 (set_attr "mode" "<MODE>")])
15408 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15409 [(set (match_operand:VF_512 0 "register_operand" "=v")
15411 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15414 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15415 [(set_attr "prefix" "evex")
15416 (set_attr "type" "sse")
15417 (set_attr "mode" "<MODE>")])
15419 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15420 [(set (match_operand:VF_128 0 "register_operand" "=v")
15423 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15425 (match_operand:VF_128 2 "register_operand" "v")
15428 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15429 [(set_attr "length_immediate" "1")
15430 (set_attr "type" "sse")
15431 (set_attr "prefix" "evex")
15432 (set_attr "mode" "<MODE>")])
15434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15436 ;; XOP instructions
15438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15440 (define_code_iterator xop_plus [plus ss_plus])
15442 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15443 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15445 ;; XOP parallel integer multiply/add instructions.
15447 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15448 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15451 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15452 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15453 (match_operand:VI24_128 3 "register_operand" "x")))]
15455 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15456 [(set_attr "type" "ssemuladd")
15457 (set_attr "mode" "TI")])
15459 (define_insn "xop_p<macs>dql"
15460 [(set (match_operand:V2DI 0 "register_operand" "=x")
15465 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15466 (parallel [(const_int 0) (const_int 2)])))
15469 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15470 (parallel [(const_int 0) (const_int 2)]))))
15471 (match_operand:V2DI 3 "register_operand" "x")))]
15473 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15474 [(set_attr "type" "ssemuladd")
15475 (set_attr "mode" "TI")])
15477 (define_insn "xop_p<macs>dqh"
15478 [(set (match_operand:V2DI 0 "register_operand" "=x")
15483 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15484 (parallel [(const_int 1) (const_int 3)])))
15487 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15488 (parallel [(const_int 1) (const_int 3)]))))
15489 (match_operand:V2DI 3 "register_operand" "x")))]
15491 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15492 [(set_attr "type" "ssemuladd")
15493 (set_attr "mode" "TI")])
15495 ;; XOP parallel integer multiply/add instructions for the intrinisics
15496 (define_insn "xop_p<macs>wd"
15497 [(set (match_operand:V4SI 0 "register_operand" "=x")
15502 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15503 (parallel [(const_int 1) (const_int 3)
15504 (const_int 5) (const_int 7)])))
15507 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15508 (parallel [(const_int 1) (const_int 3)
15509 (const_int 5) (const_int 7)]))))
15510 (match_operand:V4SI 3 "register_operand" "x")))]
15512 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15513 [(set_attr "type" "ssemuladd")
15514 (set_attr "mode" "TI")])
15516 (define_insn "xop_p<madcs>wd"
15517 [(set (match_operand:V4SI 0 "register_operand" "=x")
15523 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15524 (parallel [(const_int 0) (const_int 2)
15525 (const_int 4) (const_int 6)])))
15528 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15529 (parallel [(const_int 0) (const_int 2)
15530 (const_int 4) (const_int 6)]))))
15535 (parallel [(const_int 1) (const_int 3)
15536 (const_int 5) (const_int 7)])))
15540 (parallel [(const_int 1) (const_int 3)
15541 (const_int 5) (const_int 7)])))))
15542 (match_operand:V4SI 3 "register_operand" "x")))]
15544 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15545 [(set_attr "type" "ssemuladd")
15546 (set_attr "mode" "TI")])
15548 ;; XOP parallel XMM conditional moves
15549 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15550 [(set (match_operand:V 0 "register_operand" "=x,x")
15552 (match_operand:V 3 "nonimmediate_operand" "x,m")
15553 (match_operand:V 1 "register_operand" "x,x")
15554 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15556 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15557 [(set_attr "type" "sse4arg")])
15559 ;; XOP horizontal add/subtract instructions
15560 (define_insn "xop_phadd<u>bw"
15561 [(set (match_operand:V8HI 0 "register_operand" "=x")
15565 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15566 (parallel [(const_int 0) (const_int 2)
15567 (const_int 4) (const_int 6)
15568 (const_int 8) (const_int 10)
15569 (const_int 12) (const_int 14)])))
15573 (parallel [(const_int 1) (const_int 3)
15574 (const_int 5) (const_int 7)
15575 (const_int 9) (const_int 11)
15576 (const_int 13) (const_int 15)])))))]
15578 "vphadd<u>bw\t{%1, %0|%0, %1}"
15579 [(set_attr "type" "sseiadd1")])
15581 (define_insn "xop_phadd<u>bd"
15582 [(set (match_operand:V4SI 0 "register_operand" "=x")
15587 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15588 (parallel [(const_int 0) (const_int 4)
15589 (const_int 8) (const_int 12)])))
15593 (parallel [(const_int 1) (const_int 5)
15594 (const_int 9) (const_int 13)]))))
15599 (parallel [(const_int 2) (const_int 6)
15600 (const_int 10) (const_int 14)])))
15604 (parallel [(const_int 3) (const_int 7)
15605 (const_int 11) (const_int 15)]))))))]
15607 "vphadd<u>bd\t{%1, %0|%0, %1}"
15608 [(set_attr "type" "sseiadd1")])
15610 (define_insn "xop_phadd<u>bq"
15611 [(set (match_operand:V2DI 0 "register_operand" "=x")
15617 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15618 (parallel [(const_int 0) (const_int 8)])))
15622 (parallel [(const_int 1) (const_int 9)]))))
15627 (parallel [(const_int 2) (const_int 10)])))
15631 (parallel [(const_int 3) (const_int 11)])))))
15637 (parallel [(const_int 4) (const_int 12)])))
15641 (parallel [(const_int 5) (const_int 13)]))))
15646 (parallel [(const_int 6) (const_int 14)])))
15650 (parallel [(const_int 7) (const_int 15)])))))))]
15652 "vphadd<u>bq\t{%1, %0|%0, %1}"
15653 [(set_attr "type" "sseiadd1")])
15655 (define_insn "xop_phadd<u>wd"
15656 [(set (match_operand:V4SI 0 "register_operand" "=x")
15660 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15661 (parallel [(const_int 0) (const_int 2)
15662 (const_int 4) (const_int 6)])))
15666 (parallel [(const_int 1) (const_int 3)
15667 (const_int 5) (const_int 7)])))))]
15669 "vphadd<u>wd\t{%1, %0|%0, %1}"
15670 [(set_attr "type" "sseiadd1")])
15672 (define_insn "xop_phadd<u>wq"
15673 [(set (match_operand:V2DI 0 "register_operand" "=x")
15678 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15679 (parallel [(const_int 0) (const_int 4)])))
15683 (parallel [(const_int 1) (const_int 5)]))))
15688 (parallel [(const_int 2) (const_int 6)])))
15692 (parallel [(const_int 3) (const_int 7)]))))))]
15694 "vphadd<u>wq\t{%1, %0|%0, %1}"
15695 [(set_attr "type" "sseiadd1")])
15697 (define_insn "xop_phadd<u>dq"
15698 [(set (match_operand:V2DI 0 "register_operand" "=x")
15702 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15703 (parallel [(const_int 0) (const_int 2)])))
15707 (parallel [(const_int 1) (const_int 3)])))))]
15709 "vphadd<u>dq\t{%1, %0|%0, %1}"
15710 [(set_attr "type" "sseiadd1")])
15712 (define_insn "xop_phsubbw"
15713 [(set (match_operand:V8HI 0 "register_operand" "=x")
15717 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15718 (parallel [(const_int 0) (const_int 2)
15719 (const_int 4) (const_int 6)
15720 (const_int 8) (const_int 10)
15721 (const_int 12) (const_int 14)])))
15725 (parallel [(const_int 1) (const_int 3)
15726 (const_int 5) (const_int 7)
15727 (const_int 9) (const_int 11)
15728 (const_int 13) (const_int 15)])))))]
15730 "vphsubbw\t{%1, %0|%0, %1}"
15731 [(set_attr "type" "sseiadd1")])
15733 (define_insn "xop_phsubwd"
15734 [(set (match_operand:V4SI 0 "register_operand" "=x")
15738 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15739 (parallel [(const_int 0) (const_int 2)
15740 (const_int 4) (const_int 6)])))
15744 (parallel [(const_int 1) (const_int 3)
15745 (const_int 5) (const_int 7)])))))]
15747 "vphsubwd\t{%1, %0|%0, %1}"
15748 [(set_attr "type" "sseiadd1")])
15750 (define_insn "xop_phsubdq"
15751 [(set (match_operand:V2DI 0 "register_operand" "=x")
15755 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15756 (parallel [(const_int 0) (const_int 2)])))
15760 (parallel [(const_int 1) (const_int 3)])))))]
15762 "vphsubdq\t{%1, %0|%0, %1}"
15763 [(set_attr "type" "sseiadd1")])
15765 ;; XOP permute instructions
15766 (define_insn "xop_pperm"
15767 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15769 [(match_operand:V16QI 1 "register_operand" "x,x")
15770 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15771 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15772 UNSPEC_XOP_PERMUTE))]
15773 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15774 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15775 [(set_attr "type" "sse4arg")
15776 (set_attr "mode" "TI")])
15778 ;; XOP pack instructions that combine two vectors into a smaller vector
15779 (define_insn "xop_pperm_pack_v2di_v4si"
15780 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15783 (match_operand:V2DI 1 "register_operand" "x,x"))
15785 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15786 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15787 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15788 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15789 [(set_attr "type" "sse4arg")
15790 (set_attr "mode" "TI")])
15792 (define_insn "xop_pperm_pack_v4si_v8hi"
15793 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15796 (match_operand:V4SI 1 "register_operand" "x,x"))
15798 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15799 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15800 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15801 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15802 [(set_attr "type" "sse4arg")
15803 (set_attr "mode" "TI")])
15805 (define_insn "xop_pperm_pack_v8hi_v16qi"
15806 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15809 (match_operand:V8HI 1 "register_operand" "x,x"))
15811 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15812 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15813 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15814 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15815 [(set_attr "type" "sse4arg")
15816 (set_attr "mode" "TI")])
15818 ;; XOP packed rotate instructions
15819 (define_expand "rotl<mode>3"
15820 [(set (match_operand:VI_128 0 "register_operand")
15822 (match_operand:VI_128 1 "nonimmediate_operand")
15823 (match_operand:SI 2 "general_operand")))]
15826 /* If we were given a scalar, convert it to parallel */
15827 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15829 rtvec vs = rtvec_alloc (<ssescalarnum>);
15830 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15831 rtx reg = gen_reg_rtx (<MODE>mode);
15832 rtx op2 = operands[2];
15835 if (GET_MODE (op2) != <ssescalarmode>mode)
15837 op2 = gen_reg_rtx (<ssescalarmode>mode);
15838 convert_move (op2, operands[2], false);
15841 for (i = 0; i < <ssescalarnum>; i++)
15842 RTVEC_ELT (vs, i) = op2;
15844 emit_insn (gen_vec_init<mode> (reg, par));
15845 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15850 (define_expand "rotr<mode>3"
15851 [(set (match_operand:VI_128 0 "register_operand")
15853 (match_operand:VI_128 1 "nonimmediate_operand")
15854 (match_operand:SI 2 "general_operand")))]
15857 /* If we were given a scalar, convert it to parallel */
15858 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15860 rtvec vs = rtvec_alloc (<ssescalarnum>);
15861 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15862 rtx neg = gen_reg_rtx (<MODE>mode);
15863 rtx reg = gen_reg_rtx (<MODE>mode);
15864 rtx op2 = operands[2];
15867 if (GET_MODE (op2) != <ssescalarmode>mode)
15869 op2 = gen_reg_rtx (<ssescalarmode>mode);
15870 convert_move (op2, operands[2], false);
15873 for (i = 0; i < <ssescalarnum>; i++)
15874 RTVEC_ELT (vs, i) = op2;
15876 emit_insn (gen_vec_init<mode> (reg, par));
15877 emit_insn (gen_neg<mode>2 (neg, reg));
15878 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15883 (define_insn "xop_rotl<mode>3"
15884 [(set (match_operand:VI_128 0 "register_operand" "=x")
15886 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15887 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15889 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15890 [(set_attr "type" "sseishft")
15891 (set_attr "length_immediate" "1")
15892 (set_attr "mode" "TI")])
15894 (define_insn "xop_rotr<mode>3"
15895 [(set (match_operand:VI_128 0 "register_operand" "=x")
15897 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15898 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15902 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15903 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15905 [(set_attr "type" "sseishft")
15906 (set_attr "length_immediate" "1")
15907 (set_attr "mode" "TI")])
15909 (define_expand "vrotr<mode>3"
15910 [(match_operand:VI_128 0 "register_operand")
15911 (match_operand:VI_128 1 "register_operand")
15912 (match_operand:VI_128 2 "register_operand")]
15915 rtx reg = gen_reg_rtx (<MODE>mode);
15916 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15917 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15921 (define_expand "vrotl<mode>3"
15922 [(match_operand:VI_128 0 "register_operand")
15923 (match_operand:VI_128 1 "register_operand")
15924 (match_operand:VI_128 2 "register_operand")]
15927 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15931 (define_insn "xop_vrotl<mode>3"
15932 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15933 (if_then_else:VI_128
15935 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15938 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15942 (neg:VI_128 (match_dup 2)))))]
15943 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15944 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15945 [(set_attr "type" "sseishft")
15946 (set_attr "prefix_data16" "0")
15947 (set_attr "prefix_extra" "2")
15948 (set_attr "mode" "TI")])
15950 ;; XOP packed shift instructions.
15951 (define_expand "vlshr<mode>3"
15952 [(set (match_operand:VI12_128 0 "register_operand")
15954 (match_operand:VI12_128 1 "register_operand")
15955 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15958 rtx neg = gen_reg_rtx (<MODE>mode);
15959 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15960 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15964 (define_expand "vlshr<mode>3"
15965 [(set (match_operand:VI48_128 0 "register_operand")
15967 (match_operand:VI48_128 1 "register_operand")
15968 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15969 "TARGET_AVX2 || TARGET_XOP"
15973 rtx neg = gen_reg_rtx (<MODE>mode);
15974 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15975 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15980 (define_expand "vlshr<mode>3"
15981 [(set (match_operand:VI48_512 0 "register_operand")
15983 (match_operand:VI48_512 1 "register_operand")
15984 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15987 (define_expand "vlshr<mode>3"
15988 [(set (match_operand:VI48_256 0 "register_operand")
15990 (match_operand:VI48_256 1 "register_operand")
15991 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15994 (define_expand "vashrv8hi3<mask_name>"
15995 [(set (match_operand:V8HI 0 "register_operand")
15997 (match_operand:V8HI 1 "register_operand")
15998 (match_operand:V8HI 2 "nonimmediate_operand")))]
15999 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16003 rtx neg = gen_reg_rtx (V8HImode);
16004 emit_insn (gen_negv8hi2 (neg, operands[2]));
16005 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16010 (define_expand "vashrv16qi3"
16011 [(set (match_operand:V16QI 0 "register_operand")
16013 (match_operand:V16QI 1 "register_operand")
16014 (match_operand:V16QI 2 "nonimmediate_operand")))]
16017 rtx neg = gen_reg_rtx (V16QImode);
16018 emit_insn (gen_negv16qi2 (neg, operands[2]));
16019 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16023 (define_expand "vashrv2di3<mask_name>"
16024 [(set (match_operand:V2DI 0 "register_operand")
16026 (match_operand:V2DI 1 "register_operand")
16027 (match_operand:V2DI 2 "nonimmediate_operand")))]
16028 "TARGET_XOP || TARGET_AVX512VL"
16032 rtx neg = gen_reg_rtx (V2DImode);
16033 emit_insn (gen_negv2di2 (neg, operands[2]));
16034 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16039 (define_expand "vashrv4si3"
16040 [(set (match_operand:V4SI 0 "register_operand")
16041 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16042 (match_operand:V4SI 2 "nonimmediate_operand")))]
16043 "TARGET_AVX2 || TARGET_XOP"
16047 rtx neg = gen_reg_rtx (V4SImode);
16048 emit_insn (gen_negv4si2 (neg, operands[2]));
16049 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16054 (define_expand "vashrv16si3"
16055 [(set (match_operand:V16SI 0 "register_operand")
16056 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16057 (match_operand:V16SI 2 "nonimmediate_operand")))]
16060 (define_expand "vashrv8si3"
16061 [(set (match_operand:V8SI 0 "register_operand")
16062 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16063 (match_operand:V8SI 2 "nonimmediate_operand")))]
16066 (define_expand "vashl<mode>3"
16067 [(set (match_operand:VI12_128 0 "register_operand")
16069 (match_operand:VI12_128 1 "register_operand")
16070 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16073 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16077 (define_expand "vashl<mode>3"
16078 [(set (match_operand:VI48_128 0 "register_operand")
16080 (match_operand:VI48_128 1 "register_operand")
16081 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16082 "TARGET_AVX2 || TARGET_XOP"
16086 operands[2] = force_reg (<MODE>mode, operands[2]);
16087 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16092 (define_expand "vashl<mode>3"
16093 [(set (match_operand:VI48_512 0 "register_operand")
16095 (match_operand:VI48_512 1 "register_operand")
16096 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16099 (define_expand "vashl<mode>3"
16100 [(set (match_operand:VI48_256 0 "register_operand")
16102 (match_operand:VI48_256 1 "register_operand")
16103 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16106 (define_insn "xop_sha<mode>3"
16107 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16108 (if_then_else:VI_128
16110 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16113 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16117 (neg:VI_128 (match_dup 2)))))]
16118 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16119 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16120 [(set_attr "type" "sseishft")
16121 (set_attr "prefix_data16" "0")
16122 (set_attr "prefix_extra" "2")
16123 (set_attr "mode" "TI")])
16125 (define_insn "xop_shl<mode>3"
16126 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16127 (if_then_else:VI_128
16129 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16132 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16136 (neg:VI_128 (match_dup 2)))))]
16137 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16138 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16139 [(set_attr "type" "sseishft")
16140 (set_attr "prefix_data16" "0")
16141 (set_attr "prefix_extra" "2")
16142 (set_attr "mode" "TI")])
16144 (define_expand "<shift_insn><mode>3"
16145 [(set (match_operand:VI1_AVX512 0 "register_operand")
16146 (any_shift:VI1_AVX512
16147 (match_operand:VI1_AVX512 1 "register_operand")
16148 (match_operand:SI 2 "nonmemory_operand")))]
16151 if (TARGET_XOP && <MODE>mode == V16QImode)
16153 bool negate = false;
16154 rtx (*gen) (rtx, rtx, rtx);
16158 if (<CODE> != ASHIFT)
16160 if (CONST_INT_P (operands[2]))
16161 operands[2] = GEN_INT (-INTVAL (operands[2]));
16165 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16166 for (i = 0; i < 16; i++)
16167 XVECEXP (par, 0, i) = operands[2];
16169 tmp = gen_reg_rtx (V16QImode);
16170 emit_insn (gen_vec_initv16qi (tmp, par));
16173 emit_insn (gen_negv16qi2 (tmp, tmp));
16175 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16176 emit_insn (gen (operands[0], operands[1], tmp));
16179 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16183 (define_expand "ashrv2di3"
16184 [(set (match_operand:V2DI 0 "register_operand")
16186 (match_operand:V2DI 1 "register_operand")
16187 (match_operand:DI 2 "nonmemory_operand")))]
16188 "TARGET_XOP || TARGET_AVX512VL"
16190 if (!TARGET_AVX512VL)
16192 rtx reg = gen_reg_rtx (V2DImode);
16194 bool negate = false;
16197 if (CONST_INT_P (operands[2]))
16198 operands[2] = GEN_INT (-INTVAL (operands[2]));
16202 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16203 for (i = 0; i < 2; i++)
16204 XVECEXP (par, 0, i) = operands[2];
16206 emit_insn (gen_vec_initv2di (reg, par));
16209 emit_insn (gen_negv2di2 (reg, reg));
16211 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16216 ;; XOP FRCZ support
16217 (define_insn "xop_frcz<mode>2"
16218 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16220 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16223 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16224 [(set_attr "type" "ssecvt1")
16225 (set_attr "mode" "<MODE>")])
16227 (define_expand "xop_vmfrcz<mode>2"
16228 [(set (match_operand:VF_128 0 "register_operand")
16231 [(match_operand:VF_128 1 "nonimmediate_operand")]
16236 "operands[2] = CONST0_RTX (<MODE>mode);")
16238 (define_insn "*xop_vmfrcz<mode>2"
16239 [(set (match_operand:VF_128 0 "register_operand" "=x")
16242 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16244 (match_operand:VF_128 2 "const0_operand")
16247 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16248 [(set_attr "type" "ssecvt1")
16249 (set_attr "mode" "<MODE>")])
16251 (define_insn "xop_maskcmp<mode>3"
16252 [(set (match_operand:VI_128 0 "register_operand" "=x")
16253 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16254 [(match_operand:VI_128 2 "register_operand" "x")
16255 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16257 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16258 [(set_attr "type" "sse4arg")
16259 (set_attr "prefix_data16" "0")
16260 (set_attr "prefix_rep" "0")
16261 (set_attr "prefix_extra" "2")
16262 (set_attr "length_immediate" "1")
16263 (set_attr "mode" "TI")])
16265 (define_insn "xop_maskcmp_uns<mode>3"
16266 [(set (match_operand:VI_128 0 "register_operand" "=x")
16267 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16268 [(match_operand:VI_128 2 "register_operand" "x")
16269 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16271 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16272 [(set_attr "type" "ssecmp")
16273 (set_attr "prefix_data16" "0")
16274 (set_attr "prefix_rep" "0")
16275 (set_attr "prefix_extra" "2")
16276 (set_attr "length_immediate" "1")
16277 (set_attr "mode" "TI")])
16279 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16280 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16281 ;; the exact instruction generated for the intrinsic.
16282 (define_insn "xop_maskcmp_uns2<mode>3"
16283 [(set (match_operand:VI_128 0 "register_operand" "=x")
16285 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16286 [(match_operand:VI_128 2 "register_operand" "x")
16287 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16288 UNSPEC_XOP_UNSIGNED_CMP))]
16290 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16291 [(set_attr "type" "ssecmp")
16292 (set_attr "prefix_data16" "0")
16293 (set_attr "prefix_extra" "2")
16294 (set_attr "length_immediate" "1")
16295 (set_attr "mode" "TI")])
16297 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16298 ;; being added here to be complete.
16299 (define_insn "xop_pcom_tf<mode>3"
16300 [(set (match_operand:VI_128 0 "register_operand" "=x")
16302 [(match_operand:VI_128 1 "register_operand" "x")
16303 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16304 (match_operand:SI 3 "const_int_operand" "n")]
16305 UNSPEC_XOP_TRUEFALSE))]
16308 return ((INTVAL (operands[3]) != 0)
16309 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16310 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16312 [(set_attr "type" "ssecmp")
16313 (set_attr "prefix_data16" "0")
16314 (set_attr "prefix_extra" "2")
16315 (set_attr "length_immediate" "1")
16316 (set_attr "mode" "TI")])
16318 (define_insn "xop_vpermil2<mode>3"
16319 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16321 [(match_operand:VF_128_256 1 "register_operand" "x")
16322 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16323 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16324 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16327 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16328 [(set_attr "type" "sse4arg")
16329 (set_attr "length_immediate" "1")
16330 (set_attr "mode" "<MODE>")])
16332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16334 (define_insn "aesenc"
16335 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16336 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16337 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16341 aesenc\t{%2, %0|%0, %2}
16342 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16343 [(set_attr "isa" "noavx,avx")
16344 (set_attr "type" "sselog1")
16345 (set_attr "prefix_extra" "1")
16346 (set_attr "prefix" "orig,vex")
16347 (set_attr "btver2_decode" "double,double")
16348 (set_attr "mode" "TI")])
16350 (define_insn "aesenclast"
16351 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16352 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16353 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16354 UNSPEC_AESENCLAST))]
16357 aesenclast\t{%2, %0|%0, %2}
16358 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16359 [(set_attr "isa" "noavx,avx")
16360 (set_attr "type" "sselog1")
16361 (set_attr "prefix_extra" "1")
16362 (set_attr "prefix" "orig,vex")
16363 (set_attr "btver2_decode" "double,double")
16364 (set_attr "mode" "TI")])
16366 (define_insn "aesdec"
16367 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16368 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16369 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16373 aesdec\t{%2, %0|%0, %2}
16374 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16375 [(set_attr "isa" "noavx,avx")
16376 (set_attr "type" "sselog1")
16377 (set_attr "prefix_extra" "1")
16378 (set_attr "prefix" "orig,vex")
16379 (set_attr "btver2_decode" "double,double")
16380 (set_attr "mode" "TI")])
16382 (define_insn "aesdeclast"
16383 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16384 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16385 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16386 UNSPEC_AESDECLAST))]
16389 aesdeclast\t{%2, %0|%0, %2}
16390 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16391 [(set_attr "isa" "noavx,avx")
16392 (set_attr "type" "sselog1")
16393 (set_attr "prefix_extra" "1")
16394 (set_attr "prefix" "orig,vex")
16395 (set_attr "btver2_decode" "double,double")
16396 (set_attr "mode" "TI")])
16398 (define_insn "aesimc"
16399 [(set (match_operand:V2DI 0 "register_operand" "=x")
16400 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16403 "%vaesimc\t{%1, %0|%0, %1}"
16404 [(set_attr "type" "sselog1")
16405 (set_attr "prefix_extra" "1")
16406 (set_attr "prefix" "maybe_vex")
16407 (set_attr "mode" "TI")])
16409 (define_insn "aeskeygenassist"
16410 [(set (match_operand:V2DI 0 "register_operand" "=x")
16411 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16412 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16413 UNSPEC_AESKEYGENASSIST))]
16415 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16416 [(set_attr "type" "sselog1")
16417 (set_attr "prefix_extra" "1")
16418 (set_attr "length_immediate" "1")
16419 (set_attr "prefix" "maybe_vex")
16420 (set_attr "mode" "TI")])
16422 (define_insn "pclmulqdq"
16423 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16424 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16425 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16426 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16430 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16431 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16432 [(set_attr "isa" "noavx,avx")
16433 (set_attr "type" "sselog1")
16434 (set_attr "prefix_extra" "1")
16435 (set_attr "length_immediate" "1")
16436 (set_attr "prefix" "orig,vex")
16437 (set_attr "mode" "TI")])
16439 (define_expand "avx_vzeroall"
16440 [(match_par_dup 0 [(const_int 0)])]
16443 int nregs = TARGET_64BIT ? 16 : 8;
16446 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16448 XVECEXP (operands[0], 0, 0)
16449 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16452 for (regno = 0; regno < nregs; regno++)
16453 XVECEXP (operands[0], 0, regno + 1)
16454 = gen_rtx_SET (VOIDmode,
16455 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16456 CONST0_RTX (V8SImode));
16459 (define_insn "*avx_vzeroall"
16460 [(match_parallel 0 "vzeroall_operation"
16461 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16464 [(set_attr "type" "sse")
16465 (set_attr "modrm" "0")
16466 (set_attr "memory" "none")
16467 (set_attr "prefix" "vex")
16468 (set_attr "btver2_decode" "vector")
16469 (set_attr "mode" "OI")])
16471 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16472 ;; if the upper 128bits are unused.
16473 (define_insn "avx_vzeroupper"
16474 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16477 [(set_attr "type" "sse")
16478 (set_attr "modrm" "0")
16479 (set_attr "memory" "none")
16480 (set_attr "prefix" "vex")
16481 (set_attr "btver2_decode" "vector")
16482 (set_attr "mode" "OI")])
16484 (define_insn "avx2_pbroadcast<mode>"
16485 [(set (match_operand:VI 0 "register_operand" "=x")
16487 (vec_select:<ssescalarmode>
16488 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16489 (parallel [(const_int 0)]))))]
16491 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16492 [(set_attr "type" "ssemov")
16493 (set_attr "prefix_extra" "1")
16494 (set_attr "prefix" "vex")
16495 (set_attr "mode" "<sseinsnmode>")])
16497 (define_insn "avx2_pbroadcast<mode>_1"
16498 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16499 (vec_duplicate:VI_256
16500 (vec_select:<ssescalarmode>
16501 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16502 (parallel [(const_int 0)]))))]
16505 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16506 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16507 [(set_attr "type" "ssemov")
16508 (set_attr "prefix_extra" "1")
16509 (set_attr "prefix" "vex")
16510 (set_attr "mode" "<sseinsnmode>")])
16512 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16513 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16514 (unspec:VI48F_256_512
16515 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16516 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16518 "TARGET_AVX2 && <mask_mode512bit_condition>"
16519 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16520 [(set_attr "type" "sselog")
16521 (set_attr "prefix" "<mask_prefix2>")
16522 (set_attr "mode" "<sseinsnmode>")])
16524 (define_insn "<avx512>_permvar<mode><mask_name>"
16525 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16526 (unspec:VI1_AVX512VL
16527 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16528 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16530 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16531 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16532 [(set_attr "type" "sselog")
16533 (set_attr "prefix" "<mask_prefix2>")
16534 (set_attr "mode" "<sseinsnmode>")])
16536 (define_insn "<avx512>_permvar<mode><mask_name>"
16537 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16538 (unspec:VI2_AVX512VL
16539 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16540 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16542 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16543 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16544 [(set_attr "type" "sselog")
16545 (set_attr "prefix" "<mask_prefix2>")
16546 (set_attr "mode" "<sseinsnmode>")])
16548 (define_expand "<avx2_avx512>_perm<mode>"
16549 [(match_operand:VI8F_256_512 0 "register_operand")
16550 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16551 (match_operand:SI 2 "const_0_to_255_operand")]
16554 int mask = INTVAL (operands[2]);
16555 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16556 GEN_INT ((mask >> 0) & 3),
16557 GEN_INT ((mask >> 2) & 3),
16558 GEN_INT ((mask >> 4) & 3),
16559 GEN_INT ((mask >> 6) & 3)));
16563 (define_expand "<avx512>_perm<mode>_mask"
16564 [(match_operand:VI8F_256_512 0 "register_operand")
16565 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16566 (match_operand:SI 2 "const_0_to_255_operand")
16567 (match_operand:VI8F_256_512 3 "vector_move_operand")
16568 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16571 int mask = INTVAL (operands[2]);
16572 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16573 GEN_INT ((mask >> 0) & 3),
16574 GEN_INT ((mask >> 2) & 3),
16575 GEN_INT ((mask >> 4) & 3),
16576 GEN_INT ((mask >> 6) & 3),
16577 operands[3], operands[4]));
16581 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16582 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16583 (vec_select:VI8F_256_512
16584 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16585 (parallel [(match_operand 2 "const_0_to_3_operand")
16586 (match_operand 3 "const_0_to_3_operand")
16587 (match_operand 4 "const_0_to_3_operand")
16588 (match_operand 5 "const_0_to_3_operand")])))]
16589 "TARGET_AVX2 && <mask_mode512bit_condition>"
16592 mask |= INTVAL (operands[2]) << 0;
16593 mask |= INTVAL (operands[3]) << 2;
16594 mask |= INTVAL (operands[4]) << 4;
16595 mask |= INTVAL (operands[5]) << 6;
16596 operands[2] = GEN_INT (mask);
16597 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16599 [(set_attr "type" "sselog")
16600 (set_attr "prefix" "<mask_prefix2>")
16601 (set_attr "mode" "<sseinsnmode>")])
16603 (define_insn "avx2_permv2ti"
16604 [(set (match_operand:V4DI 0 "register_operand" "=x")
16606 [(match_operand:V4DI 1 "register_operand" "x")
16607 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16608 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16611 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16612 [(set_attr "type" "sselog")
16613 (set_attr "prefix" "vex")
16614 (set_attr "mode" "OI")])
16616 (define_insn "avx2_vec_dupv4df"
16617 [(set (match_operand:V4DF 0 "register_operand" "=x")
16618 (vec_duplicate:V4DF
16620 (match_operand:V2DF 1 "register_operand" "x")
16621 (parallel [(const_int 0)]))))]
16623 "vbroadcastsd\t{%1, %0|%0, %1}"
16624 [(set_attr "type" "sselog1")
16625 (set_attr "prefix" "vex")
16626 (set_attr "mode" "V4DF")])
16628 (define_insn "<avx512>_vec_dup<mode>_1"
16629 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16630 (vec_duplicate:VI_AVX512BW
16631 (vec_select:VI_AVX512BW
16632 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16633 (parallel [(const_int 0)]))))]
16635 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16636 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16637 [(set_attr "type" "ssemov")
16638 (set_attr "prefix" "evex")
16639 (set_attr "mode" "<sseinsnmode>")])
16641 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16642 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16643 (vec_duplicate:V48_AVX512VL
16644 (vec_select:<ssescalarmode>
16645 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16646 (parallel [(const_int 0)]))))]
16648 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16649 [(set_attr "type" "ssemov")
16650 (set_attr "prefix" "evex")
16651 (set_attr "mode" "<sseinsnmode>")])
16653 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16654 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16655 (vec_duplicate:VI12_AVX512VL
16656 (vec_select:<ssescalarmode>
16657 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16658 (parallel [(const_int 0)]))))]
16660 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16661 [(set_attr "type" "ssemov")
16662 (set_attr "prefix" "evex")
16663 (set_attr "mode" "<sseinsnmode>")])
16665 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16666 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16667 (vec_duplicate:V16FI
16668 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16671 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16672 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16673 [(set_attr "type" "ssemov")
16674 (set_attr "prefix" "evex")
16675 (set_attr "mode" "<sseinsnmode>")])
16677 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16678 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16679 (vec_duplicate:V8FI
16680 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16683 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16684 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16685 [(set_attr "type" "ssemov")
16686 (set_attr "prefix" "evex")
16687 (set_attr "mode" "<sseinsnmode>")])
16689 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16690 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16691 (vec_duplicate:VI12_AVX512VL
16692 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16695 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16696 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16697 [(set_attr "type" "ssemov")
16698 (set_attr "prefix" "evex")
16699 (set_attr "mode" "<sseinsnmode>")])
16701 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16702 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16703 (vec_duplicate:V48_AVX512VL
16704 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16706 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16707 [(set_attr "type" "ssemov")
16708 (set_attr "prefix" "evex")
16709 (set_attr "mode" "<sseinsnmode>")
16710 (set (attr "enabled")
16711 (if_then_else (eq_attr "alternative" "1")
16712 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16713 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16716 (define_insn "vec_dupv4sf"
16717 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16718 (vec_duplicate:V4SF
16719 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16722 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16723 vbroadcastss\t{%1, %0|%0, %1}
16724 shufps\t{$0, %0, %0|%0, %0, 0}"
16725 [(set_attr "isa" "avx,avx,noavx")
16726 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16727 (set_attr "length_immediate" "1,0,1")
16728 (set_attr "prefix_extra" "0,1,*")
16729 (set_attr "prefix" "vex,vex,orig")
16730 (set_attr "mode" "V4SF")])
16732 (define_insn "*vec_dupv4si"
16733 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16734 (vec_duplicate:V4SI
16735 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16738 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16739 vbroadcastss\t{%1, %0|%0, %1}
16740 shufps\t{$0, %0, %0|%0, %0, 0}"
16741 [(set_attr "isa" "sse2,avx,noavx")
16742 (set_attr "type" "sselog1,ssemov,sselog1")
16743 (set_attr "length_immediate" "1,0,1")
16744 (set_attr "prefix_extra" "0,1,*")
16745 (set_attr "prefix" "maybe_vex,vex,orig")
16746 (set_attr "mode" "TI,V4SF,V4SF")])
16748 (define_insn "*vec_dupv2di"
16749 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16750 (vec_duplicate:V2DI
16751 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16755 vpunpcklqdq\t{%d1, %0|%0, %d1}
16756 %vmovddup\t{%1, %0|%0, %1}
16758 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16759 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16760 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16761 (set_attr "mode" "TI,TI,DF,V4SF")])
16763 (define_insn "avx2_vbroadcasti128_<mode>"
16764 [(set (match_operand:VI_256 0 "register_operand" "=x")
16766 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16769 "vbroadcasti128\t{%1, %0|%0, %1}"
16770 [(set_attr "type" "ssemov")
16771 (set_attr "prefix_extra" "1")
16772 (set_attr "prefix" "vex")
16773 (set_attr "mode" "OI")])
16775 ;; Modes handled by AVX vec_dup patterns.
16776 (define_mode_iterator AVX_VEC_DUP_MODE
16777 [V8SI V8SF V4DI V4DF])
16778 ;; Modes handled by AVX2 vec_dup patterns.
16779 (define_mode_iterator AVX2_VEC_DUP_MODE
16780 [V32QI V16QI V16HI V8HI V8SI V4SI])
16782 (define_insn "*vec_dup<mode>"
16783 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16784 (vec_duplicate:AVX2_VEC_DUP_MODE
16785 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16788 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16789 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16791 [(set_attr "type" "ssemov")
16792 (set_attr "prefix_extra" "1")
16793 (set_attr "prefix" "maybe_evex")
16794 (set_attr "mode" "<sseinsnmode>")])
16796 (define_insn "vec_dup<mode>"
16797 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x")
16798 (vec_duplicate:AVX_VEC_DUP_MODE
16799 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))]
16802 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16803 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16804 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16806 [(set_attr "type" "ssemov")
16807 (set_attr "prefix_extra" "1")
16808 (set_attr "prefix" "maybe_evex")
16809 (set_attr "isa" "avx2,noavx2,avx2,noavx2")
16810 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")])
16813 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16814 (vec_duplicate:AVX2_VEC_DUP_MODE
16815 (match_operand:<ssescalarmode> 1 "register_operand")))]
16817 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16818 available, because then we can broadcast from GPRs directly.
16819 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16820 for V*SI mode it requires just -mavx512vl. */
16821 && !(TARGET_AVX512VL
16822 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16823 && reload_completed && GENERAL_REG_P (operands[1])"
16826 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16827 CONST0_RTX (V4SImode),
16828 gen_lowpart (SImode, operands[1])));
16829 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16830 gen_lowpart (<ssexmmmode>mode,
16836 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16837 (vec_duplicate:AVX_VEC_DUP_MODE
16838 (match_operand:<ssescalarmode> 1 "register_operand")))]
16839 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16840 [(set (match_dup 2)
16841 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16843 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16844 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16846 (define_insn "avx_vbroadcastf128_<mode>"
16847 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16849 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16853 vbroadcast<i128>\t{%1, %0|%0, %1}
16854 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16855 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16856 [(set_attr "type" "ssemov,sselog1,sselog1")
16857 (set_attr "prefix_extra" "1")
16858 (set_attr "length_immediate" "0,1,1")
16859 (set_attr "prefix" "vex")
16860 (set_attr "mode" "<sseinsnmode>")])
16862 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16863 (define_mode_iterator VI4F_BRCST32x2
16864 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16865 V16SF (V8SF "TARGET_AVX512VL")])
16867 (define_mode_attr 64x2mode
16868 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16870 (define_mode_attr 32x2mode
16871 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16872 (V8SF "V2SF") (V4SI "V2SI")])
16874 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16875 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16876 (vec_duplicate:VI4F_BRCST32x2
16877 (vec_select:<32x2mode>
16878 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16879 (parallel [(const_int 0) (const_int 1)]))))]
16881 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16882 [(set_attr "type" "ssemov")
16883 (set_attr "prefix_extra" "1")
16884 (set_attr "prefix" "evex")
16885 (set_attr "mode" "<sseinsnmode>")])
16887 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16888 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16889 (vec_duplicate:VI4F_256
16890 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16893 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16894 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16895 [(set_attr "type" "ssemov")
16896 (set_attr "prefix_extra" "1")
16897 (set_attr "prefix" "evex")
16898 (set_attr "mode" "<sseinsnmode>")])
16900 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16901 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16902 (vec_duplicate:V16FI
16903 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16906 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16907 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16908 [(set_attr "type" "ssemov")
16909 (set_attr "prefix_extra" "1")
16910 (set_attr "prefix" "evex")
16911 (set_attr "mode" "<sseinsnmode>")])
16913 ;; For broadcast[i|f]64x2
16914 (define_mode_iterator VI8F_BRCST64x2
16915 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16917 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16918 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16919 (vec_duplicate:VI8F_BRCST64x2
16920 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16923 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16924 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16925 [(set_attr "type" "ssemov")
16926 (set_attr "prefix_extra" "1")
16927 (set_attr "prefix" "evex")
16928 (set_attr "mode" "<sseinsnmode>")])
16930 (define_insn "avx512cd_maskb_vec_dup<mode>"
16931 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16932 (vec_duplicate:VI8_AVX512VL
16934 (match_operand:QI 1 "register_operand" "Yk"))))]
16936 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16937 [(set_attr "type" "mskmov")
16938 (set_attr "prefix" "evex")
16939 (set_attr "mode" "XI")])
16941 (define_insn "avx512cd_maskw_vec_dup<mode>"
16942 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16943 (vec_duplicate:VI4_AVX512VL
16945 (match_operand:HI 1 "register_operand" "Yk"))))]
16947 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16948 [(set_attr "type" "mskmov")
16949 (set_attr "prefix" "evex")
16950 (set_attr "mode" "XI")])
16952 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16953 ;; If it so happens that the input is in memory, use vbroadcast.
16954 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16955 (define_insn "*avx_vperm_broadcast_v4sf"
16956 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16958 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16959 (match_parallel 2 "avx_vbroadcast_operand"
16960 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16963 int elt = INTVAL (operands[3]);
16964 switch (which_alternative)
16968 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16969 return "vbroadcastss\t{%1, %0|%0, %k1}";
16971 operands[2] = GEN_INT (elt * 0x55);
16972 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16974 gcc_unreachable ();
16977 [(set_attr "type" "ssemov,ssemov,sselog1")
16978 (set_attr "prefix_extra" "1")
16979 (set_attr "length_immediate" "0,0,1")
16980 (set_attr "prefix" "vex")
16981 (set_attr "mode" "SF,SF,V4SF")])
16983 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
16984 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
16986 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
16987 (match_parallel 2 "avx_vbroadcast_operand"
16988 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16991 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
16992 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
16994 rtx op0 = operands[0], op1 = operands[1];
16995 int elt = INTVAL (operands[3]);
17001 if (TARGET_AVX2 && elt == 0)
17003 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17008 /* Shuffle element we care about into all elements of the 128-bit lane.
17009 The other lane gets shuffled too, but we don't care. */
17010 if (<MODE>mode == V4DFmode)
17011 mask = (elt & 1 ? 15 : 0);
17013 mask = (elt & 3) * 0x55;
17014 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17016 /* Shuffle the lane we care about into both lanes of the dest. */
17017 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17018 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17022 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17023 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17026 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17027 [(set (match_operand:VF2 0 "register_operand")
17029 (match_operand:VF2 1 "nonimmediate_operand")
17030 (match_operand:SI 2 "const_0_to_255_operand")))]
17031 "TARGET_AVX && <mask_mode512bit_condition>"
17033 int mask = INTVAL (operands[2]);
17034 rtx perm[<ssescalarnum>];
17037 for (i = 0; i < <ssescalarnum>; i = i + 2)
17039 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17040 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17044 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17047 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17048 [(set (match_operand:VF1 0 "register_operand")
17050 (match_operand:VF1 1 "nonimmediate_operand")
17051 (match_operand:SI 2 "const_0_to_255_operand")))]
17052 "TARGET_AVX && <mask_mode512bit_condition>"
17054 int mask = INTVAL (operands[2]);
17055 rtx perm[<ssescalarnum>];
17058 for (i = 0; i < <ssescalarnum>; i = i + 4)
17060 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17061 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17062 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17063 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17067 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17070 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17071 [(set (match_operand:VF 0 "register_operand" "=v")
17073 (match_operand:VF 1 "nonimmediate_operand" "vm")
17074 (match_parallel 2 ""
17075 [(match_operand 3 "const_int_operand")])))]
17076 "TARGET_AVX && <mask_mode512bit_condition>
17077 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17079 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17080 operands[2] = GEN_INT (mask);
17081 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17083 [(set_attr "type" "sselog")
17084 (set_attr "prefix_extra" "1")
17085 (set_attr "length_immediate" "1")
17086 (set_attr "prefix" "<mask_prefix>")
17087 (set_attr "mode" "<sseinsnmode>")])
17089 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17090 [(set (match_operand:VF 0 "register_operand" "=v")
17092 [(match_operand:VF 1 "register_operand" "v")
17093 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17095 "TARGET_AVX && <mask_mode512bit_condition>"
17096 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17097 [(set_attr "type" "sselog")
17098 (set_attr "prefix_extra" "1")
17099 (set_attr "btver2_decode" "vector")
17100 (set_attr "prefix" "<mask_prefix>")
17101 (set_attr "mode" "<sseinsnmode>")])
17103 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17104 [(match_operand:VI48F 0 "register_operand" "=v")
17105 (match_operand:VI48F 1 "register_operand" "v")
17106 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17107 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17108 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17111 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17112 operands[0], operands[1], operands[2], operands[3],
17113 CONST0_RTX (<MODE>mode), operands[4]));
17117 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17118 [(match_operand:VI1_AVX512VL 0 "register_operand")
17119 (match_operand:VI1_AVX512VL 1 "register_operand")
17120 (match_operand:<sseintvecmode> 2 "register_operand")
17121 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17122 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17123 "TARGET_AVX512VBMI"
17125 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17126 operands[0], operands[1], operands[2], operands[3],
17127 CONST0_RTX (<MODE>mode), operands[4]));
17131 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17132 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17133 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17134 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17135 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17136 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17139 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17140 operands[0], operands[1], operands[2], operands[3],
17141 CONST0_RTX (<MODE>mode), operands[4]));
17145 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17146 [(set (match_operand:VI48F 0 "register_operand" "=v")
17148 [(match_operand:VI48F 1 "register_operand" "v")
17149 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17150 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17153 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17154 [(set_attr "type" "sselog")
17155 (set_attr "prefix" "evex")
17156 (set_attr "mode" "<sseinsnmode>")])
17158 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17159 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17160 (unspec:VI1_AVX512VL
17161 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17162 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17163 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17165 "TARGET_AVX512VBMI"
17166 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17167 [(set_attr "type" "sselog")
17168 (set_attr "prefix" "evex")
17169 (set_attr "mode" "<sseinsnmode>")])
17171 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17172 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17173 (unspec:VI2_AVX512VL
17174 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17175 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17176 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17179 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17180 [(set_attr "type" "sselog")
17181 (set_attr "prefix" "evex")
17182 (set_attr "mode" "<sseinsnmode>")])
17184 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17185 [(set (match_operand:VI48F 0 "register_operand" "=v")
17188 [(match_operand:VI48F 1 "register_operand" "v")
17189 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17190 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17191 UNSPEC_VPERMI2_MASK)
17193 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17195 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17196 [(set_attr "type" "sselog")
17197 (set_attr "prefix" "evex")
17198 (set_attr "mode" "<sseinsnmode>")])
17200 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17201 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17202 (vec_merge:VI1_AVX512VL
17203 (unspec:VI1_AVX512VL
17204 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17205 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17206 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17207 UNSPEC_VPERMI2_MASK)
17209 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17210 "TARGET_AVX512VBMI"
17211 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17212 [(set_attr "type" "sselog")
17213 (set_attr "prefix" "evex")
17214 (set_attr "mode" "<sseinsnmode>")])
17216 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17217 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17218 (vec_merge:VI2_AVX512VL
17219 (unspec:VI2_AVX512VL
17220 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17221 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17222 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17223 UNSPEC_VPERMI2_MASK)
17225 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17227 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17228 [(set_attr "type" "sselog")
17229 (set_attr "prefix" "evex")
17230 (set_attr "mode" "<sseinsnmode>")])
17232 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17233 [(match_operand:VI48F 0 "register_operand" "=v")
17234 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17235 (match_operand:VI48F 2 "register_operand" "0")
17236 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17237 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17240 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17241 operands[0], operands[1], operands[2], operands[3],
17242 CONST0_RTX (<MODE>mode), operands[4]));
17246 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17247 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17248 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17249 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17250 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17251 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17252 "TARGET_AVX512VBMI"
17254 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17255 operands[0], operands[1], operands[2], operands[3],
17256 CONST0_RTX (<MODE>mode), operands[4]));
17260 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17261 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17262 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17263 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17264 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17265 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17268 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17269 operands[0], operands[1], operands[2], operands[3],
17270 CONST0_RTX (<MODE>mode), operands[4]));
17274 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17275 [(set (match_operand:VI48F 0 "register_operand" "=v")
17277 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17278 (match_operand:VI48F 2 "register_operand" "0")
17279 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17282 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17283 [(set_attr "type" "sselog")
17284 (set_attr "prefix" "evex")
17285 (set_attr "mode" "<sseinsnmode>")])
17287 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17288 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17289 (unspec:VI1_AVX512VL
17290 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17291 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17292 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17294 "TARGET_AVX512VBMI"
17295 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17296 [(set_attr "type" "sselog")
17297 (set_attr "prefix" "evex")
17298 (set_attr "mode" "<sseinsnmode>")])
17300 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17301 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17302 (unspec:VI2_AVX512VL
17303 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17304 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17305 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17308 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17309 [(set_attr "type" "sselog")
17310 (set_attr "prefix" "evex")
17311 (set_attr "mode" "<sseinsnmode>")])
17313 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17314 [(set (match_operand:VI48F 0 "register_operand" "=v")
17317 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17318 (match_operand:VI48F 2 "register_operand" "0")
17319 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17322 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17324 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17325 [(set_attr "type" "sselog")
17326 (set_attr "prefix" "evex")
17327 (set_attr "mode" "<sseinsnmode>")])
17329 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17330 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17331 (vec_merge:VI1_AVX512VL
17332 (unspec:VI1_AVX512VL
17333 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17334 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17335 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17338 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17339 "TARGET_AVX512VBMI"
17340 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17341 [(set_attr "type" "sselog")
17342 (set_attr "prefix" "evex")
17343 (set_attr "mode" "<sseinsnmode>")])
17345 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17346 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17347 (vec_merge:VI2_AVX512VL
17348 (unspec:VI2_AVX512VL
17349 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17350 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17351 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17354 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17356 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17357 [(set_attr "type" "sselog")
17358 (set_attr "prefix" "evex")
17359 (set_attr "mode" "<sseinsnmode>")])
17361 (define_expand "avx_vperm2f128<mode>3"
17362 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17363 (unspec:AVX256MODE2P
17364 [(match_operand:AVX256MODE2P 1 "register_operand")
17365 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17366 (match_operand:SI 3 "const_0_to_255_operand")]
17367 UNSPEC_VPERMIL2F128))]
17370 int mask = INTVAL (operands[3]);
17371 if ((mask & 0x88) == 0)
17373 rtx perm[<ssescalarnum>], t1, t2;
17374 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17376 base = (mask & 3) * nelt2;
17377 for (i = 0; i < nelt2; ++i)
17378 perm[i] = GEN_INT (base + i);
17380 base = ((mask >> 4) & 3) * nelt2;
17381 for (i = 0; i < nelt2; ++i)
17382 perm[i + nelt2] = GEN_INT (base + i);
17384 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17385 operands[1], operands[2]);
17386 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17387 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17388 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
17394 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17395 ;; means that in order to represent this properly in rtl we'd have to
17396 ;; nest *another* vec_concat with a zero operand and do the select from
17397 ;; a 4x wide vector. That doesn't seem very nice.
17398 (define_insn "*avx_vperm2f128<mode>_full"
17399 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17400 (unspec:AVX256MODE2P
17401 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17402 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17403 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17404 UNSPEC_VPERMIL2F128))]
17406 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17407 [(set_attr "type" "sselog")
17408 (set_attr "prefix_extra" "1")
17409 (set_attr "length_immediate" "1")
17410 (set_attr "prefix" "vex")
17411 (set_attr "mode" "<sseinsnmode>")])
17413 (define_insn "*avx_vperm2f128<mode>_nozero"
17414 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17415 (vec_select:AVX256MODE2P
17416 (vec_concat:<ssedoublevecmode>
17417 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17418 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17419 (match_parallel 3 ""
17420 [(match_operand 4 "const_int_operand")])))]
17422 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17424 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17426 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17428 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17429 operands[3] = GEN_INT (mask);
17430 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17432 [(set_attr "type" "sselog")
17433 (set_attr "prefix_extra" "1")
17434 (set_attr "length_immediate" "1")
17435 (set_attr "prefix" "vex")
17436 (set_attr "mode" "<sseinsnmode>")])
17438 (define_insn "*ssse3_palignr<mode>_perm"
17439 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17441 (match_operand:V_128 1 "register_operand" "0,x")
17442 (match_parallel 2 "palignr_operand"
17443 [(match_operand 3 "const_int_operand" "n, n")])))]
17446 machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
17447 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17449 switch (which_alternative)
17452 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17454 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17456 gcc_unreachable ();
17459 [(set_attr "isa" "noavx,avx")
17460 (set_attr "type" "sseishft")
17461 (set_attr "atom_unit" "sishuf")
17462 (set_attr "prefix_data16" "1,*")
17463 (set_attr "prefix_extra" "1")
17464 (set_attr "length_immediate" "1")
17465 (set_attr "prefix" "orig,vex")])
17467 (define_expand "avx512vl_vinsert<mode>"
17468 [(match_operand:VI48F_256 0 "register_operand")
17469 (match_operand:VI48F_256 1 "register_operand")
17470 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17471 (match_operand:SI 3 "const_0_to_1_operand")
17472 (match_operand:VI48F_256 4 "register_operand")
17473 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17476 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17478 switch (INTVAL (operands[3]))
17481 insn = gen_vec_set_lo_<mode>_mask;
17484 insn = gen_vec_set_hi_<mode>_mask;
17487 gcc_unreachable ();
17490 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17495 (define_expand "avx_vinsertf128<mode>"
17496 [(match_operand:V_256 0 "register_operand")
17497 (match_operand:V_256 1 "register_operand")
17498 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17499 (match_operand:SI 3 "const_0_to_1_operand")]
17502 rtx (*insn)(rtx, rtx, rtx);
17504 switch (INTVAL (operands[3]))
17507 insn = gen_vec_set_lo_<mode>;
17510 insn = gen_vec_set_hi_<mode>;
17513 gcc_unreachable ();
17516 emit_insn (insn (operands[0], operands[1], operands[2]));
17520 (define_insn "vec_set_lo_<mode><mask_name>"
17521 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17522 (vec_concat:VI8F_256
17523 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17524 (vec_select:<ssehalfvecmode>
17525 (match_operand:VI8F_256 1 "register_operand" "v")
17526 (parallel [(const_int 2) (const_int 3)]))))]
17529 if (TARGET_AVX512VL)
17530 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17532 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17534 [(set_attr "type" "sselog")
17535 (set_attr "prefix_extra" "1")
17536 (set_attr "length_immediate" "1")
17537 (set_attr "prefix" "vex")
17538 (set_attr "mode" "<sseinsnmode>")])
17540 (define_insn "vec_set_hi_<mode><mask_name>"
17541 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17542 (vec_concat:VI8F_256
17543 (vec_select:<ssehalfvecmode>
17544 (match_operand:VI8F_256 1 "register_operand" "v")
17545 (parallel [(const_int 0) (const_int 1)]))
17546 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17549 if (TARGET_AVX512VL)
17550 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17552 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17554 [(set_attr "type" "sselog")
17555 (set_attr "prefix_extra" "1")
17556 (set_attr "length_immediate" "1")
17557 (set_attr "prefix" "vex")
17558 (set_attr "mode" "<sseinsnmode>")])
17560 (define_insn "vec_set_lo_<mode><mask_name>"
17561 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17562 (vec_concat:VI4F_256
17563 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17564 (vec_select:<ssehalfvecmode>
17565 (match_operand:VI4F_256 1 "register_operand" "v")
17566 (parallel [(const_int 4) (const_int 5)
17567 (const_int 6) (const_int 7)]))))]
17570 if (TARGET_AVX512VL)
17571 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17573 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17575 [(set_attr "type" "sselog")
17576 (set_attr "prefix_extra" "1")
17577 (set_attr "length_immediate" "1")
17578 (set_attr "prefix" "vex")
17579 (set_attr "mode" "<sseinsnmode>")])
17581 (define_insn "vec_set_hi_<mode><mask_name>"
17582 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17583 (vec_concat:VI4F_256
17584 (vec_select:<ssehalfvecmode>
17585 (match_operand:VI4F_256 1 "register_operand" "v")
17586 (parallel [(const_int 0) (const_int 1)
17587 (const_int 2) (const_int 3)]))
17588 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17591 if (TARGET_AVX512VL)
17592 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17594 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17596 [(set_attr "type" "sselog")
17597 (set_attr "prefix_extra" "1")
17598 (set_attr "length_immediate" "1")
17599 (set_attr "prefix" "vex")
17600 (set_attr "mode" "<sseinsnmode>")])
17602 (define_insn "vec_set_lo_v16hi"
17603 [(set (match_operand:V16HI 0 "register_operand" "=x")
17605 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17607 (match_operand:V16HI 1 "register_operand" "x")
17608 (parallel [(const_int 8) (const_int 9)
17609 (const_int 10) (const_int 11)
17610 (const_int 12) (const_int 13)
17611 (const_int 14) (const_int 15)]))))]
17613 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17614 [(set_attr "type" "sselog")
17615 (set_attr "prefix_extra" "1")
17616 (set_attr "length_immediate" "1")
17617 (set_attr "prefix" "vex")
17618 (set_attr "mode" "OI")])
17620 (define_insn "vec_set_hi_v16hi"
17621 [(set (match_operand:V16HI 0 "register_operand" "=x")
17624 (match_operand:V16HI 1 "register_operand" "x")
17625 (parallel [(const_int 0) (const_int 1)
17626 (const_int 2) (const_int 3)
17627 (const_int 4) (const_int 5)
17628 (const_int 6) (const_int 7)]))
17629 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17631 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17632 [(set_attr "type" "sselog")
17633 (set_attr "prefix_extra" "1")
17634 (set_attr "length_immediate" "1")
17635 (set_attr "prefix" "vex")
17636 (set_attr "mode" "OI")])
17638 (define_insn "vec_set_lo_v32qi"
17639 [(set (match_operand:V32QI 0 "register_operand" "=x")
17641 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17643 (match_operand:V32QI 1 "register_operand" "x")
17644 (parallel [(const_int 16) (const_int 17)
17645 (const_int 18) (const_int 19)
17646 (const_int 20) (const_int 21)
17647 (const_int 22) (const_int 23)
17648 (const_int 24) (const_int 25)
17649 (const_int 26) (const_int 27)
17650 (const_int 28) (const_int 29)
17651 (const_int 30) (const_int 31)]))))]
17653 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17654 [(set_attr "type" "sselog")
17655 (set_attr "prefix_extra" "1")
17656 (set_attr "length_immediate" "1")
17657 (set_attr "prefix" "vex")
17658 (set_attr "mode" "OI")])
17660 (define_insn "vec_set_hi_v32qi"
17661 [(set (match_operand:V32QI 0 "register_operand" "=x")
17664 (match_operand:V32QI 1 "register_operand" "x")
17665 (parallel [(const_int 0) (const_int 1)
17666 (const_int 2) (const_int 3)
17667 (const_int 4) (const_int 5)
17668 (const_int 6) (const_int 7)
17669 (const_int 8) (const_int 9)
17670 (const_int 10) (const_int 11)
17671 (const_int 12) (const_int 13)
17672 (const_int 14) (const_int 15)]))
17673 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17675 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17676 [(set_attr "type" "sselog")
17677 (set_attr "prefix_extra" "1")
17678 (set_attr "length_immediate" "1")
17679 (set_attr "prefix" "vex")
17680 (set_attr "mode" "OI")])
17682 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17683 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17685 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17686 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17689 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17690 [(set_attr "type" "sselog1")
17691 (set_attr "prefix_extra" "1")
17692 (set_attr "prefix" "vex")
17693 (set_attr "btver2_decode" "vector")
17694 (set_attr "mode" "<sseinsnmode>")])
17696 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17697 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17699 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17700 (match_operand:V48_AVX2 2 "register_operand" "x")
17704 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17705 [(set_attr "type" "sselog1")
17706 (set_attr "prefix_extra" "1")
17707 (set_attr "prefix" "vex")
17708 (set_attr "btver2_decode" "vector")
17709 (set_attr "mode" "<sseinsnmode>")])
17711 (define_expand "maskload<mode>"
17712 [(set (match_operand:V48_AVX2 0 "register_operand")
17714 [(match_operand:<sseintvecmode> 2 "register_operand")
17715 (match_operand:V48_AVX2 1 "memory_operand")]
17719 (define_expand "maskstore<mode>"
17720 [(set (match_operand:V48_AVX2 0 "memory_operand")
17722 [(match_operand:<sseintvecmode> 2 "register_operand")
17723 (match_operand:V48_AVX2 1 "register_operand")
17728 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17729 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17730 (unspec:AVX256MODE2P
17731 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17735 "&& reload_completed"
17738 rtx op0 = operands[0];
17739 rtx op1 = operands[1];
17741 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17743 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17744 emit_move_insn (op0, op1);
17748 (define_expand "vec_init<mode>"
17749 [(match_operand:V_256 0 "register_operand")
17753 ix86_expand_vector_init (false, operands[0], operands[1]);
17757 (define_expand "vec_init<mode>"
17758 [(match_operand:VF48_I1248 0 "register_operand")
17762 ix86_expand_vector_init (false, operands[0], operands[1]);
17766 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17767 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17768 (ashiftrt:VI48_AVX512F_AVX512VL
17769 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17770 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17771 "TARGET_AVX2 && <mask_mode512bit_condition>"
17772 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17773 [(set_attr "type" "sseishft")
17774 (set_attr "prefix" "maybe_evex")
17775 (set_attr "mode" "<sseinsnmode>")])
17777 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17778 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17779 (ashiftrt:VI2_AVX512VL
17780 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17781 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17783 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17784 [(set_attr "type" "sseishft")
17785 (set_attr "prefix" "maybe_evex")
17786 (set_attr "mode" "<sseinsnmode>")])
17788 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17789 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17790 (any_lshift:VI48_AVX512F
17791 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17792 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17793 "TARGET_AVX2 && <mask_mode512bit_condition>"
17794 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17795 [(set_attr "type" "sseishft")
17796 (set_attr "prefix" "maybe_evex")
17797 (set_attr "mode" "<sseinsnmode>")])
17799 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17800 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17801 (any_lshift:VI2_AVX512VL
17802 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17803 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17805 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17806 [(set_attr "type" "sseishft")
17807 (set_attr "prefix" "maybe_evex")
17808 (set_attr "mode" "<sseinsnmode>")])
17810 (define_insn "avx_vec_concat<mode>"
17811 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17812 (vec_concat:V_256_512
17813 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17814 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17817 switch (which_alternative)
17820 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17822 switch (get_attr_mode (insn))
17825 return "vmovaps\t{%1, %t0|%t0, %1}";
17827 return "vmovapd\t{%1, %t0|%t0, %1}";
17829 return "vmovaps\t{%1, %x0|%x0, %1}";
17831 return "vmovapd\t{%1, %x0|%x0, %1}";
17833 return "vmovdqa\t{%1, %t0|%t0, %1}";
17835 return "vmovdqa\t{%1, %x0|%x0, %1}";
17837 gcc_unreachable ();
17840 gcc_unreachable ();
17843 [(set_attr "type" "sselog,ssemov")
17844 (set_attr "prefix_extra" "1,*")
17845 (set_attr "length_immediate" "1,*")
17846 (set_attr "prefix" "maybe_evex")
17847 (set_attr "mode" "<sseinsnmode>")])
17849 (define_insn "vcvtph2ps<mask_name>"
17850 [(set (match_operand:V4SF 0 "register_operand" "=v")
17852 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17854 (parallel [(const_int 0) (const_int 1)
17855 (const_int 2) (const_int 3)])))]
17856 "TARGET_F16C || TARGET_AVX512VL"
17857 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17858 [(set_attr "type" "ssecvt")
17859 (set_attr "prefix" "maybe_evex")
17860 (set_attr "mode" "V4SF")])
17862 (define_insn "*vcvtph2ps_load<mask_name>"
17863 [(set (match_operand:V4SF 0 "register_operand" "=v")
17864 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17865 UNSPEC_VCVTPH2PS))]
17866 "TARGET_F16C || TARGET_AVX512VL"
17867 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17868 [(set_attr "type" "ssecvt")
17869 (set_attr "prefix" "vex")
17870 (set_attr "mode" "V8SF")])
17872 (define_insn "vcvtph2ps256<mask_name>"
17873 [(set (match_operand:V8SF 0 "register_operand" "=v")
17874 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17875 UNSPEC_VCVTPH2PS))]
17876 "TARGET_F16C || TARGET_AVX512VL"
17877 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17878 [(set_attr "type" "ssecvt")
17879 (set_attr "prefix" "vex")
17880 (set_attr "btver2_decode" "double")
17881 (set_attr "mode" "V8SF")])
17883 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17884 [(set (match_operand:V16SF 0 "register_operand" "=v")
17886 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17887 UNSPEC_VCVTPH2PS))]
17889 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17890 [(set_attr "type" "ssecvt")
17891 (set_attr "prefix" "evex")
17892 (set_attr "mode" "V16SF")])
17894 (define_expand "vcvtps2ph_mask"
17895 [(set (match_operand:V8HI 0 "register_operand")
17898 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17899 (match_operand:SI 2 "const_0_to_255_operand")]
17902 (match_operand:V8HI 3 "vector_move_operand")
17903 (match_operand:QI 4 "register_operand")))]
17905 "operands[5] = CONST0_RTX (V4HImode);")
17907 (define_expand "vcvtps2ph"
17908 [(set (match_operand:V8HI 0 "register_operand")
17910 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17911 (match_operand:SI 2 "const_0_to_255_operand")]
17915 "operands[3] = CONST0_RTX (V4HImode);")
17917 (define_insn "*vcvtps2ph<mask_name>"
17918 [(set (match_operand:V8HI 0 "register_operand" "=v")
17920 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17921 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17923 (match_operand:V4HI 3 "const0_operand")))]
17924 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17925 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17926 [(set_attr "type" "ssecvt")
17927 (set_attr "prefix" "maybe_evex")
17928 (set_attr "mode" "V4SF")])
17930 (define_insn "*vcvtps2ph_store<mask_name>"
17931 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17932 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17933 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17934 UNSPEC_VCVTPS2PH))]
17935 "TARGET_F16C || TARGET_AVX512VL"
17936 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17937 [(set_attr "type" "ssecvt")
17938 (set_attr "prefix" "maybe_evex")
17939 (set_attr "mode" "V4SF")])
17941 (define_insn "vcvtps2ph256<mask_name>"
17942 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17943 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17944 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17945 UNSPEC_VCVTPS2PH))]
17946 "TARGET_F16C || TARGET_AVX512VL"
17947 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17948 [(set_attr "type" "ssecvt")
17949 (set_attr "prefix" "maybe_evex")
17950 (set_attr "btver2_decode" "vector")
17951 (set_attr "mode" "V8SF")])
17953 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17954 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17956 [(match_operand:V16SF 1 "register_operand" "v")
17957 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17958 UNSPEC_VCVTPS2PH))]
17960 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17961 [(set_attr "type" "ssecvt")
17962 (set_attr "prefix" "evex")
17963 (set_attr "mode" "V16SF")])
17965 ;; For gather* insn patterns
17966 (define_mode_iterator VEC_GATHER_MODE
17967 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17968 (define_mode_attr VEC_GATHER_IDXSI
17969 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17970 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17971 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17972 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17974 (define_mode_attr VEC_GATHER_IDXDI
17975 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17976 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
17977 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
17978 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
17980 (define_mode_attr VEC_GATHER_SRCDI
17981 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17982 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
17983 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
17984 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
17986 (define_expand "avx2_gathersi<mode>"
17987 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17988 (unspec:VEC_GATHER_MODE
17989 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
17990 (mem:<ssescalarmode>
17992 [(match_operand 2 "vsib_address_operand")
17993 (match_operand:<VEC_GATHER_IDXSI>
17994 3 "register_operand")
17995 (match_operand:SI 5 "const1248_operand ")]))
17996 (mem:BLK (scratch))
17997 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
17999 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18003 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18004 operands[5]), UNSPEC_VSIBADDR);
18007 (define_insn "*avx2_gathersi<mode>"
18008 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18009 (unspec:VEC_GATHER_MODE
18010 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18011 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18013 [(match_operand:P 3 "vsib_address_operand" "Tv")
18014 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18015 (match_operand:SI 6 "const1248_operand" "n")]
18017 (mem:BLK (scratch))
18018 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18020 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18022 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18023 [(set_attr "type" "ssemov")
18024 (set_attr "prefix" "vex")
18025 (set_attr "mode" "<sseinsnmode>")])
18027 (define_insn "*avx2_gathersi<mode>_2"
18028 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18029 (unspec:VEC_GATHER_MODE
18031 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18033 [(match_operand:P 2 "vsib_address_operand" "Tv")
18034 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18035 (match_operand:SI 5 "const1248_operand" "n")]
18037 (mem:BLK (scratch))
18038 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18040 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18042 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18043 [(set_attr "type" "ssemov")
18044 (set_attr "prefix" "vex")
18045 (set_attr "mode" "<sseinsnmode>")])
18047 (define_expand "avx2_gatherdi<mode>"
18048 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18049 (unspec:VEC_GATHER_MODE
18050 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18051 (mem:<ssescalarmode>
18053 [(match_operand 2 "vsib_address_operand")
18054 (match_operand:<VEC_GATHER_IDXDI>
18055 3 "register_operand")
18056 (match_operand:SI 5 "const1248_operand ")]))
18057 (mem:BLK (scratch))
18058 (match_operand:<VEC_GATHER_SRCDI>
18059 4 "register_operand")]
18061 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18065 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18066 operands[5]), UNSPEC_VSIBADDR);
18069 (define_insn "*avx2_gatherdi<mode>"
18070 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18071 (unspec:VEC_GATHER_MODE
18072 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18073 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18075 [(match_operand:P 3 "vsib_address_operand" "Tv")
18076 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18077 (match_operand:SI 6 "const1248_operand" "n")]
18079 (mem:BLK (scratch))
18080 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18082 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18084 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18085 [(set_attr "type" "ssemov")
18086 (set_attr "prefix" "vex")
18087 (set_attr "mode" "<sseinsnmode>")])
18089 (define_insn "*avx2_gatherdi<mode>_2"
18090 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18091 (unspec:VEC_GATHER_MODE
18093 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18095 [(match_operand:P 2 "vsib_address_operand" "Tv")
18096 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18097 (match_operand:SI 5 "const1248_operand" "n")]
18099 (mem:BLK (scratch))
18100 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18102 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18105 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18106 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18107 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18109 [(set_attr "type" "ssemov")
18110 (set_attr "prefix" "vex")
18111 (set_attr "mode" "<sseinsnmode>")])
18113 (define_insn "*avx2_gatherdi<mode>_3"
18114 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18115 (vec_select:<VEC_GATHER_SRCDI>
18117 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18118 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18120 [(match_operand:P 3 "vsib_address_operand" "Tv")
18121 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18122 (match_operand:SI 6 "const1248_operand" "n")]
18124 (mem:BLK (scratch))
18125 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18127 (parallel [(const_int 0) (const_int 1)
18128 (const_int 2) (const_int 3)])))
18129 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18131 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18132 [(set_attr "type" "ssemov")
18133 (set_attr "prefix" "vex")
18134 (set_attr "mode" "<sseinsnmode>")])
18136 (define_insn "*avx2_gatherdi<mode>_4"
18137 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18138 (vec_select:<VEC_GATHER_SRCDI>
18141 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18143 [(match_operand:P 2 "vsib_address_operand" "Tv")
18144 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18145 (match_operand:SI 5 "const1248_operand" "n")]
18147 (mem:BLK (scratch))
18148 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18150 (parallel [(const_int 0) (const_int 1)
18151 (const_int 2) (const_int 3)])))
18152 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18154 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18155 [(set_attr "type" "ssemov")
18156 (set_attr "prefix" "vex")
18157 (set_attr "mode" "<sseinsnmode>")])
18159 (define_expand "<avx512>_gathersi<mode>"
18160 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18162 [(match_operand:VI48F 1 "register_operand")
18163 (match_operand:<avx512fmaskmode> 4 "register_operand")
18164 (mem:<ssescalarmode>
18166 [(match_operand 2 "vsib_address_operand")
18167 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18168 (match_operand:SI 5 "const1248_operand")]))]
18170 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18174 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18175 operands[5]), UNSPEC_VSIBADDR);
18178 (define_insn "*avx512f_gathersi<mode>"
18179 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18181 [(match_operand:VI48F 1 "register_operand" "0")
18182 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18183 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18185 [(match_operand:P 4 "vsib_address_operand" "Tv")
18186 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18187 (match_operand:SI 5 "const1248_operand" "n")]
18188 UNSPEC_VSIBADDR)])]
18190 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18192 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18193 [(set_attr "type" "ssemov")
18194 (set_attr "prefix" "evex")
18195 (set_attr "mode" "<sseinsnmode>")])
18197 (define_insn "*avx512f_gathersi<mode>_2"
18198 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18201 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18202 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18204 [(match_operand:P 3 "vsib_address_operand" "Tv")
18205 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18206 (match_operand:SI 4 "const1248_operand" "n")]
18207 UNSPEC_VSIBADDR)])]
18209 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18211 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18212 [(set_attr "type" "ssemov")
18213 (set_attr "prefix" "evex")
18214 (set_attr "mode" "<sseinsnmode>")])
18217 (define_expand "<avx512>_gatherdi<mode>"
18218 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18220 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18221 (match_operand:QI 4 "register_operand")
18222 (mem:<ssescalarmode>
18224 [(match_operand 2 "vsib_address_operand")
18225 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18226 (match_operand:SI 5 "const1248_operand")]))]
18228 (clobber (match_scratch:QI 7))])]
18232 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18233 operands[5]), UNSPEC_VSIBADDR);
18236 (define_insn "*avx512f_gatherdi<mode>"
18237 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18239 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18240 (match_operand:QI 7 "register_operand" "2")
18241 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18243 [(match_operand:P 4 "vsib_address_operand" "Tv")
18244 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18245 (match_operand:SI 5 "const1248_operand" "n")]
18246 UNSPEC_VSIBADDR)])]
18248 (clobber (match_scratch:QI 2 "=&Yk"))]
18250 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18251 [(set_attr "type" "ssemov")
18252 (set_attr "prefix" "evex")
18253 (set_attr "mode" "<sseinsnmode>")])
18255 (define_insn "*avx512f_gatherdi<mode>_2"
18256 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18259 (match_operand:QI 6 "register_operand" "1")
18260 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18262 [(match_operand:P 3 "vsib_address_operand" "Tv")
18263 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18264 (match_operand:SI 4 "const1248_operand" "n")]
18265 UNSPEC_VSIBADDR)])]
18267 (clobber (match_scratch:QI 1 "=&Yk"))]
18270 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18272 if (<MODE_SIZE> != 64)
18273 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18275 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18277 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18279 [(set_attr "type" "ssemov")
18280 (set_attr "prefix" "evex")
18281 (set_attr "mode" "<sseinsnmode>")])
18283 (define_expand "<avx512>_scattersi<mode>"
18284 [(parallel [(set (mem:VI48F
18286 [(match_operand 0 "vsib_address_operand")
18287 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18288 (match_operand:SI 4 "const1248_operand")]))
18290 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18291 (match_operand:VI48F 3 "register_operand")]
18293 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18297 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18298 operands[4]), UNSPEC_VSIBADDR);
18301 (define_insn "*avx512f_scattersi<mode>"
18302 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18304 [(match_operand:P 0 "vsib_address_operand" "Tv")
18305 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18306 (match_operand:SI 4 "const1248_operand" "n")]
18309 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18310 (match_operand:VI48F 3 "register_operand" "v")]
18312 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18314 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18315 [(set_attr "type" "ssemov")
18316 (set_attr "prefix" "evex")
18317 (set_attr "mode" "<sseinsnmode>")])
18319 (define_expand "<avx512>_scatterdi<mode>"
18320 [(parallel [(set (mem:VI48F
18322 [(match_operand 0 "vsib_address_operand")
18323 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18324 (match_operand:SI 4 "const1248_operand")]))
18326 [(match_operand:QI 1 "register_operand")
18327 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18329 (clobber (match_scratch:QI 6))])]
18333 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18334 operands[4]), UNSPEC_VSIBADDR);
18337 (define_insn "*avx512f_scatterdi<mode>"
18338 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18340 [(match_operand:P 0 "vsib_address_operand" "Tv")
18341 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18342 (match_operand:SI 4 "const1248_operand" "n")]
18345 [(match_operand:QI 6 "register_operand" "1")
18346 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18348 (clobber (match_scratch:QI 1 "=&Yk"))]
18350 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18351 [(set_attr "type" "ssemov")
18352 (set_attr "prefix" "evex")
18353 (set_attr "mode" "<sseinsnmode>")])
18355 (define_insn "<avx512>_compress<mode>_mask"
18356 [(set (match_operand:VI48F 0 "register_operand" "=v")
18358 [(match_operand:VI48F 1 "register_operand" "v")
18359 (match_operand:VI48F 2 "vector_move_operand" "0C")
18360 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18363 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18364 [(set_attr "type" "ssemov")
18365 (set_attr "prefix" "evex")
18366 (set_attr "mode" "<sseinsnmode>")])
18368 (define_insn "<avx512>_compressstore<mode>_mask"
18369 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18371 [(match_operand:VI48F 1 "register_operand" "x")
18373 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18374 UNSPEC_COMPRESS_STORE))]
18376 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18377 [(set_attr "type" "ssemov")
18378 (set_attr "prefix" "evex")
18379 (set_attr "memory" "store")
18380 (set_attr "mode" "<sseinsnmode>")])
18382 (define_expand "<avx512>_expand<mode>_maskz"
18383 [(set (match_operand:VI48F 0 "register_operand")
18385 [(match_operand:VI48F 1 "nonimmediate_operand")
18386 (match_operand:VI48F 2 "vector_move_operand")
18387 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18390 "operands[2] = CONST0_RTX (<MODE>mode);")
18392 (define_insn "<avx512>_expand<mode>_mask"
18393 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18395 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18396 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18397 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18400 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18401 [(set_attr "type" "ssemov")
18402 (set_attr "prefix" "evex")
18403 (set_attr "memory" "none,load")
18404 (set_attr "mode" "<sseinsnmode>")])
18406 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18407 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18408 (unspec:VF_AVX512VL
18409 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18410 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18411 (match_operand:SI 3 "const_0_to_15_operand")]
18413 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18414 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
18415 [(set_attr "type" "sse")
18416 (set_attr "prefix" "evex")
18417 (set_attr "mode" "<MODE>")])
18419 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18420 [(set (match_operand:VF_128 0 "register_operand" "=v")
18423 [(match_operand:VF_128 1 "register_operand" "v")
18424 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18425 (match_operand:SI 3 "const_0_to_15_operand")]
18430 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
18431 [(set_attr "type" "sse")
18432 (set_attr "prefix" "evex")
18433 (set_attr "mode" "<MODE>")])
18435 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18436 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18437 (unspec:<avx512fmaskmode>
18438 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18439 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18442 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18443 [(set_attr "type" "sse")
18444 (set_attr "length_immediate" "1")
18445 (set_attr "prefix" "evex")
18446 (set_attr "mode" "<MODE>")])
18448 (define_insn "avx512dq_vmfpclass<mode>"
18449 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18450 (and:<avx512fmaskmode>
18451 (unspec:<avx512fmaskmode>
18452 [(match_operand:VF_128 1 "register_operand" "v")
18453 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18457 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18458 [(set_attr "type" "sse")
18459 (set_attr "length_immediate" "1")
18460 (set_attr "prefix" "evex")
18461 (set_attr "mode" "<MODE>")])
18463 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18464 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18465 (unspec:VF_AVX512VL
18466 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18467 (match_operand:SI 2 "const_0_to_15_operand")]
18470 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18471 [(set_attr "prefix" "evex")
18472 (set_attr "mode" "<MODE>")])
18474 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18475 [(set (match_operand:VF_128 0 "register_operand" "=v")
18478 [(match_operand:VF_128 1 "register_operand" "v")
18479 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18480 (match_operand:SI 3 "const_0_to_15_operand")]
18485 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18486 [(set_attr "prefix" "evex")
18487 (set_attr "mode" "<ssescalarmode>")])
18489 ;; The correct representation for this is absolutely enormous, and
18490 ;; surely not generally useful.
18491 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18492 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18493 (unspec:VI2_AVX512VL
18494 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18495 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18496 (match_operand:SI 3 "const_0_to_255_operand")]
18499 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18500 [(set_attr "isa" "avx")
18501 (set_attr "type" "sselog1")
18502 (set_attr "length_immediate" "1")
18503 (set_attr "prefix" "evex")
18504 (set_attr "mode" "<sseinsnmode>")])
18506 (define_insn "clz<mode>2<mask_name>"
18507 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18509 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18511 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18512 [(set_attr "type" "sse")
18513 (set_attr "prefix" "evex")
18514 (set_attr "mode" "<sseinsnmode>")])
18516 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18517 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18518 (unspec:VI48_AVX512VL
18519 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18522 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18523 [(set_attr "type" "sse")
18524 (set_attr "prefix" "evex")
18525 (set_attr "mode" "<sseinsnmode>")])
18527 (define_insn "sha1msg1"
18528 [(set (match_operand:V4SI 0 "register_operand" "=x")
18530 [(match_operand:V4SI 1 "register_operand" "0")
18531 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18534 "sha1msg1\t{%2, %0|%0, %2}"
18535 [(set_attr "type" "sselog1")
18536 (set_attr "mode" "TI")])
18538 (define_insn "sha1msg2"
18539 [(set (match_operand:V4SI 0 "register_operand" "=x")
18541 [(match_operand:V4SI 1 "register_operand" "0")
18542 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18545 "sha1msg2\t{%2, %0|%0, %2}"
18546 [(set_attr "type" "sselog1")
18547 (set_attr "mode" "TI")])
18549 (define_insn "sha1nexte"
18550 [(set (match_operand:V4SI 0 "register_operand" "=x")
18552 [(match_operand:V4SI 1 "register_operand" "0")
18553 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18554 UNSPEC_SHA1NEXTE))]
18556 "sha1nexte\t{%2, %0|%0, %2}"
18557 [(set_attr "type" "sselog1")
18558 (set_attr "mode" "TI")])
18560 (define_insn "sha1rnds4"
18561 [(set (match_operand:V4SI 0 "register_operand" "=x")
18563 [(match_operand:V4SI 1 "register_operand" "0")
18564 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18565 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18566 UNSPEC_SHA1RNDS4))]
18568 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18569 [(set_attr "type" "sselog1")
18570 (set_attr "length_immediate" "1")
18571 (set_attr "mode" "TI")])
18573 (define_insn "sha256msg1"
18574 [(set (match_operand:V4SI 0 "register_operand" "=x")
18576 [(match_operand:V4SI 1 "register_operand" "0")
18577 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18578 UNSPEC_SHA256MSG1))]
18580 "sha256msg1\t{%2, %0|%0, %2}"
18581 [(set_attr "type" "sselog1")
18582 (set_attr "mode" "TI")])
18584 (define_insn "sha256msg2"
18585 [(set (match_operand:V4SI 0 "register_operand" "=x")
18587 [(match_operand:V4SI 1 "register_operand" "0")
18588 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18589 UNSPEC_SHA256MSG2))]
18591 "sha256msg2\t{%2, %0|%0, %2}"
18592 [(set_attr "type" "sselog1")
18593 (set_attr "mode" "TI")])
18595 (define_insn "sha256rnds2"
18596 [(set (match_operand:V4SI 0 "register_operand" "=x")
18598 [(match_operand:V4SI 1 "register_operand" "0")
18599 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18600 (match_operand:V4SI 3 "register_operand" "Yz")]
18601 UNSPEC_SHA256RNDS2))]
18603 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18604 [(set_attr "type" "sselog1")
18605 (set_attr "length_immediate" "1")
18606 (set_attr "mode" "TI")])
18608 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18609 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18610 (unspec:AVX512MODE2P
18611 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18615 "&& reload_completed"
18618 rtx op0 = operands[0];
18619 rtx op1 = operands[1];
18621 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18623 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18624 emit_move_insn (op0, op1);
18628 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18629 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18630 (unspec:AVX512MODE2P
18631 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18635 "&& reload_completed"
18638 rtx op0 = operands[0];
18639 rtx op1 = operands[1];
18641 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18643 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18644 emit_move_insn (op0, op1);
18648 (define_int_iterator VPMADD52
18649 [UNSPEC_VPMADD52LUQ
18650 UNSPEC_VPMADD52HUQ])
18652 (define_int_attr vpmadd52type
18653 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18655 (define_expand "vpamdd52huq<mode>_maskz"
18656 [(match_operand:VI8_AVX512VL 0 "register_operand")
18657 (match_operand:VI8_AVX512VL 1 "register_operand")
18658 (match_operand:VI8_AVX512VL 2 "register_operand")
18659 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18660 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18661 "TARGET_AVX512IFMA"
18663 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18664 operands[0], operands[1], operands[2], operands[3],
18665 CONST0_RTX (<MODE>mode), operands[4]));
18669 (define_expand "vpamdd52luq<mode>_maskz"
18670 [(match_operand:VI8_AVX512VL 0 "register_operand")
18671 (match_operand:VI8_AVX512VL 1 "register_operand")
18672 (match_operand:VI8_AVX512VL 2 "register_operand")
18673 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18674 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18675 "TARGET_AVX512IFMA"
18677 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18678 operands[0], operands[1], operands[2], operands[3],
18679 CONST0_RTX (<MODE>mode), operands[4]));
18683 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18684 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18685 (unspec:VI8_AVX512VL
18686 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18687 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18688 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18690 "TARGET_AVX512IFMA"
18691 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18692 [(set_attr "type" "ssemuladd")
18693 (set_attr "prefix" "evex")
18694 (set_attr "mode" "<sseinsnmode>")])
18696 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
18697 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18698 (vec_merge:VI8_AVX512VL
18699 (unspec:VI8_AVX512VL
18700 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18701 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18702 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18705 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18706 "TARGET_AVX512IFMA"
18707 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18708 [(set_attr "type" "ssemuladd")
18709 (set_attr "prefix" "evex")
18710 (set_attr "mode" "<sseinsnmode>")])
18712 (define_insn "vpmultishiftqb<mode><mask_name>"
18713 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18714 (unspec:VI1_AVX512VL
18715 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18716 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18717 UNSPEC_VPMULTISHIFT))]
18718 "TARGET_AVX512VBMI"
18719 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18720 [(set_attr "type" "sselog")
18721 (set_attr "prefix" "evex")
18722 (set_attr "mode" "<sseinsnmode>")])