1 /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
29 #define _AVX512VLDQINTRIN_H_INCLUDED
31 #if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512vl,avx512dq")
34 #define __DISABLE_AVX512VLDQ__
35 #endif /* __AVX512VLDQ__ */
37 extern __inline __m256i
38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39 _mm256_cvttpd_epi64 (__m256d __A)
41 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
43 _mm256_setzero_si256 (),
47 extern __inline __m256i
48 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
49 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
51 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
56 extern __inline __m256i
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
60 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
62 _mm256_setzero_si256 (),
66 extern __inline __m128i
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
68 _mm_cvttpd_epi64 (__m128d __A)
70 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
76 extern __inline __m128i
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
80 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
85 extern __inline __m128i
86 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
87 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
89 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
95 extern __inline __m256i
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_cvttpd_epu64 (__m256d __A)
99 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
101 _mm256_setzero_si256 (),
105 extern __inline __m256i
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
109 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
114 extern __inline __m256i
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
118 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
120 _mm256_setzero_si256 (),
124 extern __inline __m128i
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm_cvttpd_epu64 (__m128d __A)
128 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
134 extern __inline __m128i
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
138 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
143 extern __inline __m128i
144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
147 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
149 _mm_setzero_si128 (),
153 extern __inline __m256i
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 _mm256_cvtpd_epi64 (__m256d __A)
157 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
159 _mm256_setzero_si256 (),
163 extern __inline __m256i
164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
167 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
172 extern __inline __m256i
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
176 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
178 _mm256_setzero_si256 (),
182 extern __inline __m128i
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 _mm_cvtpd_epi64 (__m128d __A)
186 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
192 extern __inline __m128i
193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
194 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
196 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
201 extern __inline __m128i
202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
205 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
207 _mm_setzero_si128 (),
211 extern __inline __m256i
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm256_cvtpd_epu64 (__m256d __A)
215 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
217 _mm256_setzero_si256 (),
221 extern __inline __m256i
222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
223 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
225 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
230 extern __inline __m256i
231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
232 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
234 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
236 _mm256_setzero_si256 (),
240 extern __inline __m128i
241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
242 _mm_cvtpd_epu64 (__m128d __A)
244 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
250 extern __inline __m128i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
254 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
259 extern __inline __m128i
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
263 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
265 _mm_setzero_si128 (),
269 extern __inline __m256i
270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 _mm256_cvttps_epi64 (__m128 __A)
273 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
275 _mm256_setzero_si256 (),
279 extern __inline __m256i
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
283 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
288 extern __inline __m256i
289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
292 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
294 _mm256_setzero_si256 (),
298 extern __inline __m128i
299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_cvttps_epi64 (__m128 __A)
302 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
308 extern __inline __m128i
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
312 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
317 extern __inline __m128i
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
321 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
327 extern __inline __m256i
328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
329 _mm256_cvttps_epu64 (__m128 __A)
331 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
333 _mm256_setzero_si256 (),
337 extern __inline __m256i
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
341 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
346 extern __inline __m256i
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
350 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
352 _mm256_setzero_si256 (),
356 extern __inline __m128i
357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
358 _mm_cvttps_epu64 (__m128 __A)
360 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
366 extern __inline __m128i
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
370 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
375 extern __inline __m128i
376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
379 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
385 extern __inline __m256d
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 _mm256_broadcast_f64x2 (__m128d __A)
389 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
391 (__v4df)_mm256_undefined_pd(),
396 extern __inline __m256d
397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398 _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
400 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
406 extern __inline __m256d
407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
410 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
413 _mm256_setzero_ps (),
417 extern __inline __m256i
418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419 _mm256_broadcast_i64x2 (__m128i __A)
421 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
423 (__v4di)_mm256_undefined_si256(),
428 extern __inline __m256i
429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
430 _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
432 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
438 extern __inline __m256i
439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
442 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
445 _mm256_setzero_si256 (),
449 extern __inline __m256
450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 _mm256_broadcast_f32x2 (__m128 __A)
453 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
454 (__v8sf)_mm256_undefined_ps(),
459 extern __inline __m256
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
463 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
468 extern __inline __m256
469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
472 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
474 _mm256_setzero_ps (),
478 extern __inline __m256i
479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480 _mm256_broadcast_i32x2 (__m128i __A)
482 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
484 (__v8si)_mm256_undefined_si256(),
489 extern __inline __m256i
490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
491 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
493 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
499 extern __inline __m256i
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
503 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
506 _mm256_setzero_si256 (),
510 extern __inline __m128i
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm_broadcast_i32x2 (__m128i __A)
514 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
516 (__v4si)_mm_undefined_si128(),
521 extern __inline __m128i
522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
523 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
525 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
531 extern __inline __m128i
532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
533 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
535 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
538 _mm_setzero_si128 (),
542 extern __inline __m256i
543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
544 _mm256_mullo_epi64 (__m256i __A, __m256i __B)
546 return (__m256i) ((__v4du) __A * (__v4du) __B);
549 extern __inline __m256i
550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
554 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
560 extern __inline __m256i
561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
562 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
564 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
567 _mm256_setzero_si256 (),
571 extern __inline __m128i
572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573 _mm_mullo_epi64 (__m128i __A, __m128i __B)
575 return (__m128i) ((__v2du) __A * (__v2du) __B);
578 extern __inline __m128i
579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
580 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
583 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
589 extern __inline __m128i
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
591 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
593 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
600 extern __inline __m256d
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
605 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
611 extern __inline __m256d
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
615 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
618 _mm256_setzero_pd (),
622 extern __inline __m128d
623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
624 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
627 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
633 extern __inline __m128d
634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
637 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
644 extern __inline __m256
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
649 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
655 extern __inline __m256
656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
659 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
662 _mm256_setzero_ps (),
666 extern __inline __m128
667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
670 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
676 extern __inline __m128
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
680 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
687 extern __inline __m256i
688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689 _mm256_cvtps_epi64 (__m128 __A)
691 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
693 _mm256_setzero_si256 (),
697 extern __inline __m256i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
701 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
706 extern __inline __m256i
707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
708 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
710 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
712 _mm256_setzero_si256 (),
716 extern __inline __m128i
717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718 _mm_cvtps_epi64 (__m128 __A)
720 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
726 extern __inline __m128i
727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
730 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
735 extern __inline __m128i
736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
739 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
745 extern __inline __m256i
746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
747 _mm256_cvtps_epu64 (__m128 __A)
749 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
751 _mm256_setzero_si256 (),
755 extern __inline __m256i
756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
757 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
759 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
764 extern __inline __m256i
765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
768 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
770 _mm256_setzero_si256 (),
774 extern __inline __m128i
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm_cvtps_epu64 (__m128 __A)
778 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
784 extern __inline __m128i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
788 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
793 extern __inline __m128i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
797 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
803 extern __inline __m128
804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
805 _mm256_cvtepi64_ps (__m256i __A)
807 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
813 extern __inline __m128
814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
817 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
822 extern __inline __m128
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
826 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
832 extern __inline __m128
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm_cvtepi64_ps (__m128i __A)
836 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
842 extern __inline __m128
843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
844 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
846 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
851 extern __inline __m128
852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
855 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
861 extern __inline __m128
862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 _mm256_cvtepu64_ps (__m256i __A)
865 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
871 extern __inline __m128
872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
875 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
880 extern __inline __m128
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
884 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
890 extern __inline __m128
891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892 _mm_cvtepu64_ps (__m128i __A)
894 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
900 extern __inline __m128
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
904 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
909 extern __inline __m128
910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
913 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
919 extern __inline __m256d
920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
921 _mm256_cvtepi64_pd (__m256i __A)
923 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
925 _mm256_setzero_pd (),
929 extern __inline __m256d
930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
931 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
933 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
938 extern __inline __m256d
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
942 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
944 _mm256_setzero_pd (),
948 extern __inline __m128d
949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950 _mm_cvtepi64_pd (__m128i __A)
952 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
958 extern __inline __m128d
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
962 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
967 extern __inline __m128d
968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
971 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
977 extern __inline __m256d
978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979 _mm256_cvtepu64_pd (__m256i __A)
981 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
983 _mm256_setzero_pd (),
987 extern __inline __m256d
988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
991 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
996 extern __inline __m256d
997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
998 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
1000 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
1002 _mm256_setzero_pd (),
1006 extern __inline __m256d
1007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1008 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
1011 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1017 extern __inline __m256d
1018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1019 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
1021 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
1024 _mm256_setzero_pd (),
1028 extern __inline __m128d
1029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1030 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1032 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1038 extern __inline __m128d
1039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1040 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
1042 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
1049 extern __inline __m256
1050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1051 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1053 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1059 extern __inline __m256
1060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1061 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
1063 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
1066 _mm256_setzero_ps (),
1070 extern __inline __m128
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1074 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1080 extern __inline __m128
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
1084 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
1091 extern __inline __m128d
1092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093 _mm_cvtepu64_pd (__m128i __A)
1095 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1101 extern __inline __m128d
1102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
1105 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1110 extern __inline __m128d
1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
1114 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
1120 extern __inline __m256d
1121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
1125 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1131 extern __inline __m256d
1132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
1135 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
1138 _mm256_setzero_pd (),
1142 extern __inline __m128d
1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1144 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1146 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1152 extern __inline __m128d
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
1156 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
1163 extern __inline __m256
1164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1165 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1167 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1173 extern __inline __m256
1174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1175 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
1177 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
1180 _mm256_setzero_ps (),
1184 extern __inline __m128
1185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1186 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1188 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1194 extern __inline __m128
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
1198 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
1205 extern __inline __m256d
1206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1207 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
1209 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1215 extern __inline __m256d
1216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
1219 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
1222 _mm256_setzero_pd (),
1226 extern __inline __m128d
1227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1228 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1230 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1236 extern __inline __m128d
1237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
1240 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
1247 extern __inline __m256
1248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
1251 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1257 extern __inline __m256
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
1261 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
1264 _mm256_setzero_ps (),
1268 extern __inline __m128
1269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1270 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1272 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1278 extern __inline __m128
1279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1280 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
1282 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
1289 extern __inline __m128i
1290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291 _mm_movm_epi32 (__mmask8 __A)
1293 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1296 extern __inline __m256i
1297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1298 _mm256_movm_epi32 (__mmask8 __A)
1300 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1303 extern __inline __m128i
1304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1305 _mm_movm_epi64 (__mmask8 __A)
1307 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1310 extern __inline __m256i
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm256_movm_epi64 (__mmask8 __A)
1314 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1317 extern __inline __mmask8
1318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 _mm_movepi32_mask (__m128i __A)
1321 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
1324 extern __inline __mmask8
1325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm256_movepi32_mask (__m256i __A)
1328 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
1331 extern __inline __mmask8
1332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 _mm_movepi64_mask (__m128i __A)
1335 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1338 extern __inline __mmask8
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm256_movepi64_mask (__m256i __A)
1342 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1346 extern __inline __m128d
1347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1348 _mm256_extractf64x2_pd (__m256d __A, const int __imm)
1350 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1358 extern __inline __m128d
1359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1360 _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
1363 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1370 extern __inline __m128d
1371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372 _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
1375 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
1383 extern __inline __m128i
1384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1385 _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
1387 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1395 extern __inline __m128i
1396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
1400 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1407 extern __inline __m128i
1408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409 _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
1412 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
1420 extern __inline __m256d
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm256_reduce_pd (__m256d __A, int __B)
1424 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1426 _mm256_setzero_pd (),
1430 extern __inline __m256d
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
1434 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1439 extern __inline __m256d
1440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1441 _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
1443 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
1445 _mm256_setzero_pd (),
1449 extern __inline __m128d
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm_reduce_pd (__m128d __A, int __B)
1453 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1459 extern __inline __m128d
1460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461 _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
1463 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1468 extern __inline __m128d
1469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1470 _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
1472 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
1478 extern __inline __m256
1479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480 _mm256_reduce_ps (__m256 __A, int __B)
1482 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1484 _mm256_setzero_ps (),
1488 extern __inline __m256
1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1490 _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
1492 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1497 extern __inline __m256
1498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499 _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
1501 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
1503 _mm256_setzero_ps (),
1507 extern __inline __m128
1508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm_reduce_ps (__m128 __A, int __B)
1511 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1517 extern __inline __m128
1518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1519 _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
1521 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1526 extern __inline __m128
1527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
1530 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
1536 extern __inline __m256d
1537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 _mm256_range_pd (__m256d __A, __m256d __B, int __C)
1540 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1543 _mm256_setzero_pd (),
1547 extern __inline __m256d
1548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm256_mask_range_pd (__m256d __W, __mmask8 __U,
1550 __m256d __A, __m256d __B, int __C)
1552 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1558 extern __inline __m256d
1559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1560 _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
1562 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
1565 _mm256_setzero_pd (),
1569 extern __inline __m128d
1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571 _mm_range_pd (__m128d __A, __m128d __B, int __C)
1573 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1580 extern __inline __m128d
1581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582 _mm_mask_range_pd (__m128d __W, __mmask8 __U,
1583 __m128d __A, __m128d __B, int __C)
1585 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1591 extern __inline __m128d
1592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1593 _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1595 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
1602 extern __inline __m256
1603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1604 _mm256_range_ps (__m256 __A, __m256 __B, int __C)
1606 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1609 _mm256_setzero_ps (),
1613 extern __inline __m256
1614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1615 _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1618 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1624 extern __inline __m256
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
1628 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
1631 _mm256_setzero_ps (),
1635 extern __inline __m128
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 _mm_range_ps (__m128 __A, __m128 __B, int __C)
1639 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1646 extern __inline __m128
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm_mask_range_ps (__m128 __W, __mmask8 __U,
1649 __m128 __A, __m128 __B, int __C)
1651 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1657 extern __inline __m128
1658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659 _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1661 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
1668 extern __inline __mmask8
1669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670 _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
1673 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1677 extern __inline __mmask8
1678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1679 _mm256_fpclass_pd_mask (__m256d __A, const int __imm)
1681 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
1686 extern __inline __mmask8
1687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688 _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
1690 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1694 extern __inline __mmask8
1695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696 _mm256_fpclass_ps_mask (__m256 __A, const int __imm)
1698 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
1703 extern __inline __mmask8
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
1707 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1711 extern __inline __mmask8
1712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713 _mm_fpclass_pd_mask (__m128d __A, const int __imm)
1715 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
1720 extern __inline __mmask8
1721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1722 _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
1724 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1728 extern __inline __mmask8
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm_fpclass_ps_mask (__m128 __A, const int __imm)
1732 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
1737 extern __inline __m256i
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
1741 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1745 _mm256_setzero_si256 (),
1750 extern __inline __m256i
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
1753 __m128i __B, const int __imm)
1755 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1763 extern __inline __m256i
1764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1765 _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
1768 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
1772 _mm256_setzero_si256 (),
1777 extern __inline __m256d
1778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779 _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
1781 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1785 _mm256_setzero_pd (),
1790 extern __inline __m256d
1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1792 _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
1793 __m128d __B, const int __imm)
1795 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1803 extern __inline __m256d
1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1805 _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
1808 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
1812 _mm256_setzero_pd (),
1818 #define _mm256_insertf64x2(X, Y, C) \
1819 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1820 (__v2df)(__m128d) (Y), (int) (C), \
1821 (__v4df)(__m256d)_mm256_setzero_pd(), \
1824 #define _mm256_mask_insertf64x2(W, U, X, Y, C) \
1825 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1826 (__v2df)(__m128d) (Y), (int) (C), \
1827 (__v4df)(__m256d)(W), \
1830 #define _mm256_maskz_insertf64x2(U, X, Y, C) \
1831 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
1832 (__v2df)(__m128d) (Y), (int) (C), \
1833 (__v4df)(__m256d)_mm256_setzero_pd(), \
1836 #define _mm256_inserti64x2(X, Y, C) \
1837 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1838 (__v2di)(__m128i) (Y), (int) (C), \
1839 (__v4di)(__m256i)_mm256_setzero_si256 (), \
1842 #define _mm256_mask_inserti64x2(W, U, X, Y, C) \
1843 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1844 (__v2di)(__m128i) (Y), (int) (C), \
1845 (__v4di)(__m256i)(W), \
1848 #define _mm256_maskz_inserti64x2(U, X, Y, C) \
1849 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
1850 (__v2di)(__m128i) (Y), (int) (C), \
1851 (__v4di)(__m256i)_mm256_setzero_si256 (), \
1854 #define _mm256_extractf64x2_pd(X, C) \
1855 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1856 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
1858 #define _mm256_mask_extractf64x2_pd(W, U, X, C) \
1859 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1860 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
1862 #define _mm256_maskz_extractf64x2_pd(U, X, C) \
1863 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
1864 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
1866 #define _mm256_extracti64x2_epi64(X, C) \
1867 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1868 (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
1870 #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
1871 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1872 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
1874 #define _mm256_maskz_extracti64x2_epi64(U, X, C) \
1875 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
1876 (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
1878 #define _mm256_reduce_pd(A, B) \
1879 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1880 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1882 #define _mm256_mask_reduce_pd(W, U, A, B) \
1883 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1884 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
1886 #define _mm256_maskz_reduce_pd(U, A, B) \
1887 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
1888 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1890 #define _mm_reduce_pd(A, B) \
1891 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1892 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1894 #define _mm_mask_reduce_pd(W, U, A, B) \
1895 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1896 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
1898 #define _mm_maskz_reduce_pd(U, A, B) \
1899 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
1900 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1902 #define _mm256_reduce_ps(A, B) \
1903 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1904 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1906 #define _mm256_mask_reduce_ps(W, U, A, B) \
1907 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1908 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
1910 #define _mm256_maskz_reduce_ps(U, A, B) \
1911 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
1912 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1914 #define _mm_reduce_ps(A, B) \
1915 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1916 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1918 #define _mm_mask_reduce_ps(W, U, A, B) \
1919 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1920 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
1922 #define _mm_maskz_reduce_ps(U, A, B) \
1923 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
1924 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1926 #define _mm256_range_pd(A, B, C) \
1927 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1928 (__v4df)(__m256d)(B), (int)(C), \
1929 (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
1931 #define _mm256_maskz_range_pd(U, A, B, C) \
1932 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1933 (__v4df)(__m256d)(B), (int)(C), \
1934 (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
1936 #define _mm_range_pd(A, B, C) \
1937 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1938 (__v2df)(__m128d)(B), (int)(C), \
1939 (__v2df)_mm_setzero_pd(), (__mmask8)-1))
1941 #define _mm256_range_ps(A, B, C) \
1942 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1943 (__v8sf)(__m256)(B), (int)(C), \
1944 (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
1946 #define _mm256_mask_range_ps(W, U, A, B, C) \
1947 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1948 (__v8sf)(__m256)(B), (int)(C), \
1949 (__v8sf)(__m256)(W), (__mmask8)(U)))
1951 #define _mm256_maskz_range_ps(U, A, B, C) \
1952 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
1953 (__v8sf)(__m256)(B), (int)(C), \
1954 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
1956 #define _mm_range_ps(A, B, C) \
1957 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1958 (__v4sf)(__m128)(B), (int)(C), \
1959 (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
1961 #define _mm_mask_range_ps(W, U, A, B, C) \
1962 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1963 (__v4sf)(__m128)(B), (int)(C), \
1964 (__v4sf)(__m128)(W), (__mmask8)(U)))
1966 #define _mm_maskz_range_ps(U, A, B, C) \
1967 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
1968 (__v4sf)(__m128)(B), (int)(C), \
1969 (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
1971 #define _mm256_mask_range_pd(W, U, A, B, C) \
1972 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
1973 (__v4df)(__m256d)(B), (int)(C), \
1974 (__v4df)(__m256d)(W), (__mmask8)(U)))
1976 #define _mm_mask_range_pd(W, U, A, B, C) \
1977 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1978 (__v2df)(__m128d)(B), (int)(C), \
1979 (__v2df)(__m128d)(W), (__mmask8)(U)))
1981 #define _mm_maskz_range_pd(U, A, B, C) \
1982 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
1983 (__v2df)(__m128d)(B), (int)(C), \
1984 (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
1986 #define _mm256_mask_fpclass_pd_mask(u, X, C) \
1987 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
1988 (int) (C),(__mmask8)(u)))
1990 #define _mm256_mask_fpclass_ps_mask(u, X, C) \
1991 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
1992 (int) (C),(__mmask8)(u)))
1994 #define _mm_mask_fpclass_pd_mask(u, X, C) \
1995 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
1996 (int) (C),(__mmask8)(u)))
1998 #define _mm_mask_fpclass_ps_mask(u, X, C) \
1999 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2000 (int) (C),(__mmask8)(u)))
2002 #define _mm256_fpclass_pd_mask(X, C) \
2003 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
2004 (int) (C),(__mmask8)-1))
2006 #define _mm256_fpclass_ps_mask(X, C) \
2007 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
2008 (int) (C),(__mmask8)-1))
2010 #define _mm_fpclass_pd_mask(X, C) \
2011 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
2012 (int) (C),(__mmask8)-1))
2014 #define _mm_fpclass_ps_mask(X, C) \
2015 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
2016 (int) (C),(__mmask8)-1))
2020 #ifdef __DISABLE_AVX512VLDQ__
2021 #undef __DISABLE_AVX512VLDQ__
2022 #pragma GCC pop_options
2023 #endif /* __DISABLE_AVX512VLDQ__ */
2025 #endif /* _AVX512VLDQINTRIN_H_INCLUDED */