1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 typedef unsigned char __mmask8;
56 typedef unsigned short __mmask16;
58 extern __inline __m512i
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm512_set_epi64 (long long __A, long long __B, long long __C,
61 long long __D, long long __E, long long __F,
62 long long __G, long long __H)
64 return __extension__ (__m512i) (__v8di)
65 { __H, __G, __F, __E, __D, __C, __B, __A };
68 /* Create the vector [A B C D E F G H I J K L M N O P]. */
69 extern __inline __m512i
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
72 int __E, int __F, int __G, int __H,
73 int __I, int __J, int __K, int __L,
74 int __M, int __N, int __O, int __P)
76 return __extension__ (__m512i)(__v16si)
77 { __P, __O, __N, __M, __L, __K, __J, __I,
78 __H, __G, __F, __E, __D, __C, __B, __A };
81 extern __inline __m512d
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _mm512_set_pd (double __A, double __B, double __C, double __D,
84 double __E, double __F, double __G, double __H)
86 return __extension__ (__m512d)
87 { __H, __G, __F, __E, __D, __C, __B, __A };
90 extern __inline __m512
91 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 _mm512_set_ps (float __A, float __B, float __C, float __D,
93 float __E, float __F, float __G, float __H,
94 float __I, float __J, float __K, float __L,
95 float __M, float __N, float __O, float __P)
97 return __extension__ (__m512)
98 { __P, __O, __N, __M, __L, __K, __J, __I,
99 __H, __G, __F, __E, __D, __C, __B, __A };
102 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
103 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
105 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
106 e8,e9,e10,e11,e12,e13,e14,e15) \
107 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
109 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
110 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
112 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
115 extern __inline __m512
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm512_undefined_ps (void)
123 extern __inline __m512d
124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125 _mm512_undefined_pd (void)
131 extern __inline __m512i
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_undefined_epi32 (void)
139 #define _mm512_undefined_si512 _mm512_undefined_epi32
141 extern __inline __m512i
142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
143 _mm512_set1_epi8 (char __A)
145 return __extension__ (__m512i)(__v64qi)
146 { __A, __A, __A, __A, __A, __A, __A, __A,
147 __A, __A, __A, __A, __A, __A, __A, __A,
148 __A, __A, __A, __A, __A, __A, __A, __A,
149 __A, __A, __A, __A, __A, __A, __A, __A,
150 __A, __A, __A, __A, __A, __A, __A, __A,
151 __A, __A, __A, __A, __A, __A, __A, __A,
152 __A, __A, __A, __A, __A, __A, __A, __A,
153 __A, __A, __A, __A, __A, __A, __A, __A };
156 extern __inline __m512i
157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
158 _mm512_set1_epi16 (short __A)
160 return __extension__ (__m512i)(__v32hi)
161 { __A, __A, __A, __A, __A, __A, __A, __A,
162 __A, __A, __A, __A, __A, __A, __A, __A,
163 __A, __A, __A, __A, __A, __A, __A, __A,
164 __A, __A, __A, __A, __A, __A, __A, __A };
167 extern __inline __m512d
168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169 _mm512_set1_pd (double __A)
171 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
174 _mm512_undefined_pd (),
178 extern __inline __m512
179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
180 _mm512_set1_ps (float __A)
182 return (__m512) __builtin_ia32_broadcastss512 (__extension__
185 _mm512_undefined_ps (),
189 /* Create the vector [A B C D A B C D A B C D A B C D]. */
190 extern __inline __m512i
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
194 return __extension__ (__m512i)(__v16si)
195 { __D, __C, __B, __A, __D, __C, __B, __A,
196 __D, __C, __B, __A, __D, __C, __B, __A };
199 extern __inline __m512i
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
204 return __extension__ (__m512i) (__v8di)
205 { __D, __C, __B, __A, __D, __C, __B, __A };
208 extern __inline __m512d
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm512_set4_pd (double __A, double __B, double __C, double __D)
212 return __extension__ (__m512d)
213 { __D, __C, __B, __A, __D, __C, __B, __A };
216 extern __inline __m512
217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
218 _mm512_set4_ps (float __A, float __B, float __C, float __D)
220 return __extension__ (__m512)
221 { __D, __C, __B, __A, __D, __C, __B, __A,
222 __D, __C, __B, __A, __D, __C, __B, __A };
225 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
226 _mm512_set4_epi64(e3,e2,e1,e0)
228 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
229 _mm512_set4_epi32(e3,e2,e1,e0)
231 #define _mm512_setr4_pd(e0,e1,e2,e3) \
232 _mm512_set4_pd(e3,e2,e1,e0)
234 #define _mm512_setr4_ps(e0,e1,e2,e3) \
235 _mm512_set4_ps(e3,e2,e1,e0)
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_setzero_ps (void)
241 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
242 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
245 extern __inline __m512d
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm512_setzero_pd (void)
249 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
252 extern __inline __m512i
253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
254 _mm512_setzero_epi32 (void)
256 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
259 extern __inline __m512i
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm512_setzero_si512 (void)
263 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
270 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
275 extern __inline __m512d
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
277 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
279 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
281 _mm512_setzero_pd (),
285 extern __inline __m512
286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
289 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
294 extern __inline __m512
295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
296 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
298 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
300 _mm512_setzero_ps (),
304 extern __inline __m512d
305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
306 _mm512_load_pd (void const *__P)
308 return *(__m512d *) __P;
311 extern __inline __m512d
312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
315 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
320 extern __inline __m512d
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
324 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
326 _mm512_setzero_pd (),
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm512_store_pd (void *__P, __m512d __A)
334 *(__m512d *) __P = __A;
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
341 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
345 extern __inline __m512
346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347 _mm512_load_ps (void const *__P)
349 return *(__m512 *) __P;
352 extern __inline __m512
353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
354 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
356 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
361 extern __inline __m512
362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
365 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
367 _mm512_setzero_ps (),
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 _mm512_store_ps (void *__P, __m512 __A)
375 *(__m512 *) __P = __A;
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
382 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
386 extern __inline __m512i
387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
390 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
395 extern __inline __m512i
396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
399 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
401 _mm512_setzero_si512 (),
405 extern __inline __m512i
406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
407 _mm512_load_epi64 (void const *__P)
409 return *(__m512i *) __P;
412 extern __inline __m512i
413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
414 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
416 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
421 extern __inline __m512i
422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
423 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
425 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
427 _mm512_setzero_si512 (),
432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
433 _mm512_store_epi64 (void *__P, __m512i __A)
435 *(__m512i *) __P = __A;
439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
442 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
446 extern __inline __m512i
447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
448 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
450 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
455 extern __inline __m512i
456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
457 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
459 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
461 _mm512_setzero_si512 (),
465 extern __inline __m512i
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm512_load_si512 (void const *__P)
469 return *(__m512i *) __P;
472 extern __inline __m512i
473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
474 _mm512_load_epi32 (void const *__P)
476 return *(__m512i *) __P;
479 extern __inline __m512i
480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
481 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
483 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
488 extern __inline __m512i
489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
492 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
494 _mm512_setzero_si512 (),
499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
500 _mm512_store_si512 (void *__P, __m512i __A)
502 *(__m512i *) __P = __A;
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 _mm512_store_epi32 (void *__P, __m512i __A)
509 *(__m512i *) __P = __A;
513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
514 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
516 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
520 extern __inline __m512i
521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
522 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
524 return (__m512i) ((__v16su) __A * (__v16su) __B);
527 extern __inline __m512i
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
529 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
531 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
534 _mm512_setzero_si512 (),
538 extern __inline __m512i
539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
542 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
547 extern __inline __m512i
548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
551 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
554 _mm512_undefined_epi32 (),
558 extern __inline __m512i
559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
562 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
575 _mm512_setzero_si512 (),
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
583 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
586 _mm512_undefined_epi32 (),
590 extern __inline __m512i
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
594 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
607 _mm512_setzero_si512 (),
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
615 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
618 _mm512_undefined_epi32 (),
622 extern __inline __m512i
623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
624 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
626 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
639 _mm512_setzero_si512 (),
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_add_epi64 (__m512i __A, __m512i __B)
647 return (__m512i) ((__v8du) __A + (__v8du) __B);
650 extern __inline __m512i
651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
654 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
660 extern __inline __m512i
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
664 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
667 _mm512_setzero_si512 (),
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_sub_epi64 (__m512i __A, __m512i __B)
675 return (__m512i) ((__v8du) __A - (__v8du) __B);
678 extern __inline __m512i
679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
680 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
682 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
688 extern __inline __m512i
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
692 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
695 _mm512_setzero_si512 (),
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
703 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
706 _mm512_undefined_pd (),
710 extern __inline __m512i
711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
712 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
714 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
727 _mm512_setzero_si512 (),
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
735 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
738 _mm512_undefined_epi32 (),
742 extern __inline __m512i
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
746 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
759 _mm512_setzero_si512 (),
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
767 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
770 _mm512_undefined_epi32 (),
774 extern __inline __m512i
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
778 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
791 _mm512_setzero_si512 (),
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_add_epi32 (__m512i __A, __m512i __B)
799 return (__m512i) ((__v16su) __A + (__v16su) __B);
802 extern __inline __m512i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
806 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
812 extern __inline __m512i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
816 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
819 _mm512_setzero_si512 (),
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
827 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
830 _mm512_undefined_epi32 (),
834 extern __inline __m512i
835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
838 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
843 extern __inline __m512i
844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
845 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
847 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
850 _mm512_setzero_si512 (),
854 extern __inline __m512i
855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
856 _mm512_sub_epi32 (__m512i __A, __m512i __B)
858 return (__m512i) ((__v16su) __A - (__v16su) __B);
861 extern __inline __m512i
862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
865 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
871 extern __inline __m512i
872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
875 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
878 _mm512_setzero_si512 (),
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
886 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
889 _mm512_undefined_epi32 (),
893 extern __inline __m512i
894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
895 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
897 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
902 extern __inline __m512i
903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
904 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
906 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
909 _mm512_setzero_si512 (),
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
918 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
920 _mm512_undefined_epi32 (),
924 extern __inline __m512i
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
926 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
929 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
934 extern __inline __m512i
935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
936 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
938 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
940 _mm512_setzero_si512 (),
944 #define _mm512_slli_epi64(X, C) \
945 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
946 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
949 #define _mm512_mask_slli_epi64(W, U, X, C) \
950 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
951 (__v8di)(__m512i)(W),\
954 #define _mm512_maskz_slli_epi64(U, X, C) \
955 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
956 (__v8di)(__m512i)_mm512_setzero_si512 (),\
960 extern __inline __m512i
961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
962 _mm512_sll_epi64 (__m512i __A, __m128i __B)
964 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
967 _mm512_undefined_epi32 (),
971 extern __inline __m512i
972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
973 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
975 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
988 _mm512_setzero_si512 (),
993 extern __inline __m512i
994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
997 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
999 _mm512_undefined_epi32 (),
1003 extern __inline __m512i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1006 __m512i __A, unsigned int __B)
1008 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1013 extern __inline __m512i
1014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1015 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1017 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1019 _mm512_setzero_si512 (),
1023 #define _mm512_srli_epi64(X, C) \
1024 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1025 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1028 #define _mm512_mask_srli_epi64(W, U, X, C) \
1029 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1030 (__v8di)(__m512i)(W),\
1033 #define _mm512_maskz_srli_epi64(U, X, C) \
1034 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1035 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1039 extern __inline __m512i
1040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1041 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1043 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1046 _mm512_undefined_epi32 (),
1050 extern __inline __m512i
1051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1052 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1054 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1067 _mm512_setzero_si512 (),
1072 extern __inline __m512i
1073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1076 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1078 _mm512_undefined_epi32 (),
1082 extern __inline __m512i
1083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1087 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1092 extern __inline __m512i
1093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1094 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1096 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1098 _mm512_setzero_si512 (),
1102 #define _mm512_srai_epi64(X, C) \
1103 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1104 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1107 #define _mm512_mask_srai_epi64(W, U, X, C) \
1108 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1109 (__v8di)(__m512i)(W),\
1112 #define _mm512_maskz_srai_epi64(U, X, C) \
1113 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1114 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1118 extern __inline __m512i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1122 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1125 _mm512_undefined_epi32 (),
1129 extern __inline __m512i
1130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1131 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1133 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1146 _mm512_setzero_si512 (),
1151 extern __inline __m512i
1152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1153 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1155 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1157 _mm512_undefined_epi32 (),
1161 extern __inline __m512i
1162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1163 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1166 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1171 extern __inline __m512i
1172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1173 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1175 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1177 _mm512_setzero_si512 (),
1181 #define _mm512_slli_epi32(X, C) \
1182 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1183 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1186 #define _mm512_mask_slli_epi32(W, U, X, C) \
1187 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1188 (__v16si)(__m512i)(W),\
1191 #define _mm512_maskz_slli_epi32(U, X, C) \
1192 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1193 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1197 extern __inline __m512i
1198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1201 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1204 _mm512_undefined_epi32 (),
1208 extern __inline __m512i
1209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1210 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1212 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1225 _mm512_setzero_si512 (),
1230 extern __inline __m512i
1231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1232 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1234 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1236 _mm512_undefined_epi32 (),
1240 extern __inline __m512i
1241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1242 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1243 __m512i __A, unsigned int __B)
1245 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1250 extern __inline __m512i
1251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1252 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1254 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1256 _mm512_setzero_si512 (),
1260 #define _mm512_srli_epi32(X, C) \
1261 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1262 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1265 #define _mm512_mask_srli_epi32(W, U, X, C) \
1266 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1267 (__v16si)(__m512i)(W),\
1270 #define _mm512_maskz_srli_epi32(U, X, C) \
1271 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1272 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1276 extern __inline __m512i
1277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1280 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1283 _mm512_undefined_epi32 (),
1287 extern __inline __m512i
1288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1291 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1304 _mm512_setzero_si512 (),
1309 extern __inline __m512i
1310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1311 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1313 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1315 _mm512_undefined_epi32 (),
1319 extern __inline __m512i
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1324 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1329 extern __inline __m512i
1330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1333 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1335 _mm512_setzero_si512 (),
1339 #define _mm512_srai_epi32(X, C) \
1340 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1341 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1344 #define _mm512_mask_srai_epi32(W, U, X, C) \
1345 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1346 (__v16si)(__m512i)(W),\
1349 #define _mm512_maskz_srai_epi32(U, X, C) \
1350 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1351 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1355 extern __inline __m512i
1356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1357 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1359 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1362 _mm512_undefined_epi32 (),
1366 extern __inline __m512i
1367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1368 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1370 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1383 _mm512_setzero_si512 (),
1388 extern __inline __m128d
1389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1392 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1397 extern __inline __m128
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1401 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1406 extern __inline __m128d
1407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1410 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1415 extern __inline __m128
1416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1419 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1425 #define _mm_add_round_sd(A, B, C) \
1426 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1428 #define _mm_add_round_ss(A, B, C) \
1429 (__m128)__builtin_ia32_addss_round(A, B, C)
1431 #define _mm_sub_round_sd(A, B, C) \
1432 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1434 #define _mm_sub_round_ss(A, B, C) \
1435 (__m128)__builtin_ia32_subss_round(A, B, C)
1439 extern __inline __m512i
1440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1441 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1443 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1449 extern __inline __m512i
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1452 __m512i __C, const int imm)
1454 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1460 extern __inline __m512i
1461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1462 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1463 __m512i __C, const int imm)
1465 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1468 imm, (__mmask8) __U);
1471 extern __inline __m512i
1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1475 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1478 imm, (__mmask16) -1);
1481 extern __inline __m512i
1482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1484 __m512i __C, const int imm)
1486 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1489 imm, (__mmask16) __U);
1492 extern __inline __m512i
1493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1495 __m512i __C, const int imm)
1497 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1500 imm, (__mmask16) __U);
1503 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1504 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1505 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1506 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1507 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1508 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1509 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1510 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1511 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1512 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1513 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1514 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1516 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1517 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1518 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1520 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1521 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1522 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1526 extern __inline __m512d
1527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 _mm512_rcp14_pd (__m512d __A)
1530 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1532 _mm512_undefined_pd (),
1536 extern __inline __m512d
1537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1540 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1545 extern __inline __m512d
1546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1547 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1549 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1551 _mm512_setzero_pd (),
1555 extern __inline __m512
1556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557 _mm512_rcp14_ps (__m512 __A)
1559 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1561 _mm512_undefined_ps (),
1565 extern __inline __m512
1566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1569 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1574 extern __inline __m512
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1578 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1580 _mm512_setzero_ps (),
1584 extern __inline __m128d
1585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1586 _mm_rcp14_sd (__m128d __A, __m128d __B)
1588 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1592 extern __inline __m128
1593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1594 _mm_rcp14_ss (__m128 __A, __m128 __B)
1596 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1600 extern __inline __m512d
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602 _mm512_rsqrt14_pd (__m512d __A)
1604 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1606 _mm512_undefined_pd (),
1610 extern __inline __m512d
1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1614 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1619 extern __inline __m512d
1620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1621 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1623 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1625 _mm512_setzero_pd (),
1629 extern __inline __m512
1630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1631 _mm512_rsqrt14_ps (__m512 __A)
1633 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1635 _mm512_undefined_ps (),
1639 extern __inline __m512
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1643 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1648 extern __inline __m512
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1652 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1654 _mm512_setzero_ps (),
1658 extern __inline __m128d
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1662 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1666 extern __inline __m128
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1670 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1675 extern __inline __m512d
1676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1679 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1681 _mm512_undefined_pd (),
1682 (__mmask8) -1, __R);
1685 extern __inline __m512d
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1690 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1692 (__mmask8) __U, __R);
1695 extern __inline __m512d
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1699 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1701 _mm512_setzero_pd (),
1702 (__mmask8) __U, __R);
1705 extern __inline __m512
1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1709 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1711 _mm512_undefined_ps (),
1712 (__mmask16) -1, __R);
1715 extern __inline __m512
1716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1717 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1719 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1721 (__mmask16) __U, __R);
1724 extern __inline __m512
1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1728 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1730 _mm512_setzero_ps (),
1731 (__mmask16) __U, __R);
1734 extern __inline __m128d
1735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1736 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1738 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1743 extern __inline __m128
1744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1745 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1747 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1752 #define _mm512_sqrt_round_pd(A, C) \
1753 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1755 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1756 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1758 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1759 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1761 #define _mm512_sqrt_round_ps(A, C) \
1762 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1764 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1765 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1767 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1768 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1770 #define _mm_sqrt_round_sd(A, B, C) \
1771 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1773 #define _mm_sqrt_round_ss(A, B, C) \
1774 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1777 extern __inline __m512i
1778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779 _mm512_cvtepi8_epi32 (__m128i __A)
1781 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1783 _mm512_undefined_epi32 (),
1787 extern __inline __m512i
1788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1789 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1791 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1796 extern __inline __m512i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1800 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1802 _mm512_setzero_si512 (),
1806 extern __inline __m512i
1807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808 _mm512_cvtepi8_epi64 (__m128i __A)
1810 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1812 _mm512_undefined_epi32 (),
1816 extern __inline __m512i
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1820 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1825 extern __inline __m512i
1826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1827 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1829 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1831 _mm512_setzero_si512 (),
1835 extern __inline __m512i
1836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1837 _mm512_cvtepi16_epi32 (__m256i __A)
1839 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1841 _mm512_undefined_epi32 (),
1845 extern __inline __m512i
1846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1847 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1849 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1854 extern __inline __m512i
1855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1858 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1860 _mm512_setzero_si512 (),
1864 extern __inline __m512i
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm512_cvtepi16_epi64 (__m128i __A)
1868 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1870 _mm512_undefined_epi32 (),
1874 extern __inline __m512i
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1878 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1883 extern __inline __m512i
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1887 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1889 _mm512_setzero_si512 (),
1893 extern __inline __m512i
1894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1895 _mm512_cvtepi32_epi64 (__m256i __X)
1897 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1899 _mm512_undefined_epi32 (),
1903 extern __inline __m512i
1904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1905 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1907 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1912 extern __inline __m512i
1913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1916 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1918 _mm512_setzero_si512 (),
1922 extern __inline __m512i
1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924 _mm512_cvtepu8_epi32 (__m128i __A)
1926 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1928 _mm512_undefined_epi32 (),
1932 extern __inline __m512i
1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1936 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1941 extern __inline __m512i
1942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1943 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1945 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1947 _mm512_setzero_si512 (),
1951 extern __inline __m512i
1952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953 _mm512_cvtepu8_epi64 (__m128i __A)
1955 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1957 _mm512_undefined_epi32 (),
1961 extern __inline __m512i
1962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1963 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1965 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1970 extern __inline __m512i
1971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1972 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1974 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1976 _mm512_setzero_si512 (),
1980 extern __inline __m512i
1981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1982 _mm512_cvtepu16_epi32 (__m256i __A)
1984 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1986 _mm512_undefined_epi32 (),
1990 extern __inline __m512i
1991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1992 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1994 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1999 extern __inline __m512i
2000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2003 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2005 _mm512_setzero_si512 (),
2009 extern __inline __m512i
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm512_cvtepu16_epi64 (__m128i __A)
2013 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2015 _mm512_undefined_epi32 (),
2019 extern __inline __m512i
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2023 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2028 extern __inline __m512i
2029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2030 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2032 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2034 _mm512_setzero_si512 (),
2038 extern __inline __m512i
2039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2040 _mm512_cvtepu32_epi64 (__m256i __X)
2042 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2044 _mm512_undefined_epi32 (),
2048 extern __inline __m512i
2049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2050 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2052 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2057 extern __inline __m512i
2058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2059 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2061 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2063 _mm512_setzero_si512 (),
2068 extern __inline __m512d
2069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2070 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2072 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2075 _mm512_undefined_pd (),
2076 (__mmask8) -1, __R);
2079 extern __inline __m512d
2080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2081 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2082 __m512d __B, const int __R)
2084 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2087 (__mmask8) __U, __R);
2090 extern __inline __m512d
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2095 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2098 _mm512_setzero_pd (),
2099 (__mmask8) __U, __R);
2102 extern __inline __m512
2103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2104 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2106 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2109 _mm512_undefined_ps (),
2110 (__mmask16) -1, __R);
2113 extern __inline __m512
2114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2116 __m512 __B, const int __R)
2118 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2121 (__mmask16) __U, __R);
2124 extern __inline __m512
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2128 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2131 _mm512_setzero_ps (),
2132 (__mmask16) __U, __R);
2135 extern __inline __m512d
2136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2137 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2139 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2142 _mm512_undefined_pd (),
2143 (__mmask8) -1, __R);
2146 extern __inline __m512d
2147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2148 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2149 __m512d __B, const int __R)
2151 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2154 (__mmask8) __U, __R);
2157 extern __inline __m512d
2158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2159 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2162 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2165 _mm512_setzero_pd (),
2166 (__mmask8) __U, __R);
2169 extern __inline __m512
2170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2173 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2176 _mm512_undefined_ps (),
2177 (__mmask16) -1, __R);
2180 extern __inline __m512
2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2183 __m512 __B, const int __R)
2185 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2188 (__mmask16) __U, __R);
2191 extern __inline __m512
2192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2193 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2195 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2198 _mm512_setzero_ps (),
2199 (__mmask16) __U, __R);
2202 #define _mm512_add_round_pd(A, B, C) \
2203 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2205 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2206 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2208 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2209 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2211 #define _mm512_add_round_ps(A, B, C) \
2212 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2214 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2215 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2217 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2218 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2220 #define _mm512_sub_round_pd(A, B, C) \
2221 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2223 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2224 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2226 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2227 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2229 #define _mm512_sub_round_ps(A, B, C) \
2230 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2232 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2233 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2235 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2236 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2240 extern __inline __m512d
2241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2244 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2247 _mm512_undefined_pd (),
2248 (__mmask8) -1, __R);
2251 extern __inline __m512d
2252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2254 __m512d __B, const int __R)
2256 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2259 (__mmask8) __U, __R);
2262 extern __inline __m512d
2263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2264 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2267 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2270 _mm512_setzero_pd (),
2271 (__mmask8) __U, __R);
2274 extern __inline __m512
2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2278 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2281 _mm512_undefined_ps (),
2282 (__mmask16) -1, __R);
2285 extern __inline __m512
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2288 __m512 __B, const int __R)
2290 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2293 (__mmask16) __U, __R);
2296 extern __inline __m512
2297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2298 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2300 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2303 _mm512_setzero_ps (),
2304 (__mmask16) __U, __R);
2307 extern __inline __m512d
2308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2309 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2311 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2314 _mm512_undefined_pd (),
2315 (__mmask8) -1, __R);
2318 extern __inline __m512d
2319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2320 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2321 __m512d __V, const int __R)
2323 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2326 (__mmask8) __U, __R);
2329 extern __inline __m512d
2330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2331 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2334 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2337 _mm512_setzero_pd (),
2338 (__mmask8) __U, __R);
2341 extern __inline __m512
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2345 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2348 _mm512_undefined_ps (),
2349 (__mmask16) -1, __R);
2352 extern __inline __m512
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2355 __m512 __B, const int __R)
2357 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2360 (__mmask16) __U, __R);
2363 extern __inline __m512
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2367 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2370 _mm512_setzero_ps (),
2371 (__mmask16) __U, __R);
2374 extern __inline __m128d
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2378 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2383 extern __inline __m128
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2387 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2392 extern __inline __m128d
2393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2394 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2396 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2401 extern __inline __m128
2402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2403 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2405 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2411 #define _mm512_mul_round_pd(A, B, C) \
2412 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2414 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2415 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2417 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2418 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2420 #define _mm512_mul_round_ps(A, B, C) \
2421 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2423 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2424 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2426 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2427 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2429 #define _mm512_div_round_pd(A, B, C) \
2430 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2432 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2433 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2435 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2436 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2438 #define _mm512_div_round_ps(A, B, C) \
2439 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2441 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2442 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2444 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2445 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2447 #define _mm_mul_round_sd(A, B, C) \
2448 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2450 #define _mm_mul_round_ss(A, B, C) \
2451 (__m128)__builtin_ia32_mulss_round(A, B, C)
2453 #define _mm_div_round_sd(A, B, C) \
2454 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2456 #define _mm_div_round_ss(A, B, C) \
2457 (__m128)__builtin_ia32_divss_round(A, B, C)
2461 extern __inline __m512d
2462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2463 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2465 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2468 _mm512_undefined_pd (),
2469 (__mmask8) -1, __R);
2472 extern __inline __m512d
2473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2475 __m512d __B, const int __R)
2477 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2480 (__mmask8) __U, __R);
2483 extern __inline __m512d
2484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2485 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2488 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2491 _mm512_setzero_pd (),
2492 (__mmask8) __U, __R);
2495 extern __inline __m512
2496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2497 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2499 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2502 _mm512_undefined_ps (),
2503 (__mmask16) -1, __R);
2506 extern __inline __m512
2507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2509 __m512 __B, const int __R)
2511 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2514 (__mmask16) __U, __R);
2517 extern __inline __m512
2518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2519 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2521 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2524 _mm512_setzero_ps (),
2525 (__mmask16) __U, __R);
2528 extern __inline __m512d
2529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2532 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2535 _mm512_undefined_pd (),
2536 (__mmask8) -1, __R);
2539 extern __inline __m512d
2540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2541 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2542 __m512d __B, const int __R)
2544 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2547 (__mmask8) __U, __R);
2550 extern __inline __m512d
2551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2555 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2558 _mm512_setzero_pd (),
2559 (__mmask8) __U, __R);
2562 extern __inline __m512
2563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2566 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2569 _mm512_undefined_ps (),
2570 (__mmask16) -1, __R);
2573 extern __inline __m512
2574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2576 __m512 __B, const int __R)
2578 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2581 (__mmask16) __U, __R);
2584 extern __inline __m512
2585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2588 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2591 _mm512_setzero_ps (),
2592 (__mmask16) __U, __R);
2595 #define _mm512_max_round_pd(A, B, R) \
2596 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2598 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2599 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2601 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2602 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2604 #define _mm512_max_round_ps(A, B, R) \
2605 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2607 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2608 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2610 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2611 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2613 #define _mm512_min_round_pd(A, B, R) \
2614 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2616 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2617 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2619 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2620 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2622 #define _mm512_min_round_ps(A, B, R) \
2623 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2625 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2626 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2628 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2629 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2633 extern __inline __m512d
2634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2635 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2637 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2640 _mm512_undefined_pd (),
2641 (__mmask8) -1, __R);
2644 extern __inline __m512d
2645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2646 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2647 __m512d __B, const int __R)
2649 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2652 (__mmask8) __U, __R);
2655 extern __inline __m512d
2656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2657 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2660 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2663 _mm512_setzero_pd (),
2664 (__mmask8) __U, __R);
2667 extern __inline __m512
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2671 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2674 _mm512_undefined_ps (),
2675 (__mmask16) -1, __R);
2678 extern __inline __m512
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2681 __m512 __B, const int __R)
2683 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2686 (__mmask16) __U, __R);
2689 extern __inline __m512
2690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2691 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2694 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2697 _mm512_setzero_ps (),
2698 (__mmask16) __U, __R);
2701 extern __inline __m128d
2702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2703 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2705 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2710 extern __inline __m128
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2714 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2719 #define _mm512_scalef_round_pd(A, B, C) \
2720 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2722 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2723 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2725 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2726 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2728 #define _mm512_scalef_round_ps(A, B, C) \
2729 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2731 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2732 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2734 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2735 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2737 #define _mm_scalef_round_sd(A, B, C) \
2738 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2740 #define _mm_scalef_round_ss(A, B, C) \
2741 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2745 extern __inline __m512d
2746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2749 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2752 (__mmask8) -1, __R);
2755 extern __inline __m512d
2756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2757 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2758 __m512d __C, const int __R)
2760 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2763 (__mmask8) __U, __R);
2766 extern __inline __m512d
2767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2768 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2769 __mmask8 __U, const int __R)
2771 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2774 (__mmask8) __U, __R);
2777 extern __inline __m512d
2778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2779 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2780 __m512d __C, const int __R)
2782 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2785 (__mmask8) __U, __R);
2788 extern __inline __m512
2789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2792 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2795 (__mmask16) -1, __R);
2798 extern __inline __m512
2799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2801 __m512 __C, const int __R)
2803 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2806 (__mmask16) __U, __R);
2809 extern __inline __m512
2810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2812 __mmask16 __U, const int __R)
2814 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2817 (__mmask16) __U, __R);
2820 extern __inline __m512
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2823 __m512 __C, const int __R)
2825 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2828 (__mmask16) __U, __R);
2831 extern __inline __m512d
2832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2835 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2838 (__mmask8) -1, __R);
2841 extern __inline __m512d
2842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2844 __m512d __C, const int __R)
2846 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2849 (__mmask8) __U, __R);
2852 extern __inline __m512d
2853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2855 __mmask8 __U, const int __R)
2857 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2860 (__mmask8) __U, __R);
2863 extern __inline __m512d
2864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2865 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2866 __m512d __C, const int __R)
2868 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2871 (__mmask8) __U, __R);
2874 extern __inline __m512
2875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2876 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2878 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2881 (__mmask16) -1, __R);
2884 extern __inline __m512
2885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2886 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2887 __m512 __C, const int __R)
2889 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2892 (__mmask16) __U, __R);
2895 extern __inline __m512
2896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2897 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2898 __mmask16 __U, const int __R)
2900 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2903 (__mmask16) __U, __R);
2906 extern __inline __m512
2907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2908 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2909 __m512 __C, const int __R)
2911 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2914 (__mmask16) __U, __R);
2917 extern __inline __m512d
2918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2921 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2924 (__mmask8) -1, __R);
2927 extern __inline __m512d
2928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2930 __m512d __C, const int __R)
2932 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2935 (__mmask8) __U, __R);
2938 extern __inline __m512d
2939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2941 __mmask8 __U, const int __R)
2943 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2946 (__mmask8) __U, __R);
2949 extern __inline __m512d
2950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2951 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2952 __m512d __C, const int __R)
2954 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2957 (__mmask8) __U, __R);
2960 extern __inline __m512
2961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2962 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2964 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2967 (__mmask16) -1, __R);
2970 extern __inline __m512
2971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2972 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2973 __m512 __C, const int __R)
2975 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2978 (__mmask16) __U, __R);
2981 extern __inline __m512
2982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2984 __mmask16 __U, const int __R)
2986 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2989 (__mmask16) __U, __R);
2992 extern __inline __m512
2993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2995 __m512 __C, const int __R)
2997 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3000 (__mmask16) __U, __R);
3003 extern __inline __m512d
3004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3007 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3010 (__mmask8) -1, __R);
3013 extern __inline __m512d
3014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3016 __m512d __C, const int __R)
3018 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3021 (__mmask8) __U, __R);
3024 extern __inline __m512d
3025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3026 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3027 __mmask8 __U, const int __R)
3029 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3032 (__mmask8) __U, __R);
3035 extern __inline __m512d
3036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3038 __m512d __C, const int __R)
3040 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3043 (__mmask8) __U, __R);
3046 extern __inline __m512
3047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3048 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3050 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3053 (__mmask16) -1, __R);
3056 extern __inline __m512
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3059 __m512 __C, const int __R)
3061 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3064 (__mmask16) __U, __R);
3067 extern __inline __m512
3068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3070 __mmask16 __U, const int __R)
3072 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3075 (__mmask16) __U, __R);
3078 extern __inline __m512
3079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3081 __m512 __C, const int __R)
3083 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3086 (__mmask16) __U, __R);
3089 extern __inline __m512d
3090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3093 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3096 (__mmask8) -1, __R);
3099 extern __inline __m512d
3100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3102 __m512d __C, const int __R)
3104 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3107 (__mmask8) __U, __R);
3110 extern __inline __m512d
3111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3112 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3113 __mmask8 __U, const int __R)
3115 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3118 (__mmask8) __U, __R);
3121 extern __inline __m512d
3122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3124 __m512d __C, const int __R)
3126 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3129 (__mmask8) __U, __R);
3132 extern __inline __m512
3133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3134 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3136 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3139 (__mmask16) -1, __R);
3142 extern __inline __m512
3143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3144 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3145 __m512 __C, const int __R)
3147 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3150 (__mmask16) __U, __R);
3153 extern __inline __m512
3154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3155 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3156 __mmask16 __U, const int __R)
3158 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3161 (__mmask16) __U, __R);
3164 extern __inline __m512
3165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3166 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3167 __m512 __C, const int __R)
3169 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3172 (__mmask16) __U, __R);
3175 extern __inline __m512d
3176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3179 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3182 (__mmask8) -1, __R);
3185 extern __inline __m512d
3186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3187 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3188 __m512d __C, const int __R)
3190 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3193 (__mmask8) __U, __R);
3196 extern __inline __m512d
3197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3198 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3199 __mmask8 __U, const int __R)
3201 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3204 (__mmask8) __U, __R);
3207 extern __inline __m512d
3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3210 __m512d __C, const int __R)
3212 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3215 (__mmask8) __U, __R);
3218 extern __inline __m512
3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3222 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3225 (__mmask16) -1, __R);
3228 extern __inline __m512
3229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3230 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3231 __m512 __C, const int __R)
3233 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3236 (__mmask16) __U, __R);
3239 extern __inline __m512
3240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3241 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3242 __mmask16 __U, const int __R)
3244 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3247 (__mmask16) __U, __R);
3250 extern __inline __m512
3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3252 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3253 __m512 __C, const int __R)
3255 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3258 (__mmask16) __U, __R);
3261 #define _mm512_fmadd_round_pd(A, B, C, R) \
3262 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3264 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3265 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3267 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3268 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3270 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3271 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3273 #define _mm512_fmadd_round_ps(A, B, C, R) \
3274 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3276 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3277 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3279 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3280 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3282 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3283 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3285 #define _mm512_fmsub_round_pd(A, B, C, R) \
3286 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3288 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3289 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3291 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3292 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3294 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3295 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3297 #define _mm512_fmsub_round_ps(A, B, C, R) \
3298 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3300 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3301 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3303 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3304 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3306 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3307 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3309 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3310 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3312 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3313 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3315 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3316 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3318 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3319 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3321 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3322 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3324 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3325 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3327 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3328 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3330 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3331 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3333 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3334 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3336 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3337 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3339 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3340 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3342 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3343 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3345 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3346 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3348 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3349 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3351 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3352 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3354 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3355 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3357 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3358 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3360 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3361 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3363 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3364 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3366 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3367 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3369 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3370 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3372 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3373 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3375 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3376 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3378 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3379 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3381 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3382 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3384 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3385 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3387 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3388 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3390 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3391 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3393 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3394 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3396 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3397 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3399 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3400 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3402 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3403 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3406 extern __inline __m512i
3407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408 _mm512_abs_epi64 (__m512i __A)
3410 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3412 _mm512_undefined_epi32 (),
3416 extern __inline __m512i
3417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3418 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3420 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3425 extern __inline __m512i
3426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3429 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3431 _mm512_setzero_si512 (),
3435 extern __inline __m512i
3436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3437 _mm512_abs_epi32 (__m512i __A)
3439 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3441 _mm512_undefined_epi32 (),
3445 extern __inline __m512i
3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3449 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3454 extern __inline __m512i
3455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3456 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3458 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3460 _mm512_setzero_si512 (),
3464 extern __inline __m512
3465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466 _mm512_broadcastss_ps (__m128 __A)
3468 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3470 _mm512_undefined_ps (),
3474 extern __inline __m512
3475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3476 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3478 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3479 (__v16sf) __O, __M);
3482 extern __inline __m512
3483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3484 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3486 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3488 _mm512_setzero_ps (),
3492 extern __inline __m512d
3493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3494 _mm512_broadcastsd_pd (__m128d __A)
3496 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3498 _mm512_undefined_pd (),
3502 extern __inline __m512d
3503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3504 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3506 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3510 extern __inline __m512d
3511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3514 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3516 _mm512_setzero_pd (),
3520 extern __inline __m512i
3521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3522 _mm512_broadcastd_epi32 (__m128i __A)
3524 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3526 _mm512_undefined_epi32 (),
3530 extern __inline __m512i
3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3534 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3535 (__v16si) __O, __M);
3538 extern __inline __m512i
3539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3540 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3542 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3544 _mm512_setzero_si512 (),
3548 extern __inline __m512i
3549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3550 _mm512_set1_epi32 (int __A)
3552 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3554 _mm512_undefined_epi32 (),
3558 extern __inline __m512i
3559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3562 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3566 extern __inline __m512i
3567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3568 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3571 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3572 (__v16si) _mm512_setzero_si512 (),
3576 extern __inline __m512i
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm512_broadcastq_epi64 (__m128i __A)
3580 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3582 _mm512_undefined_epi32 (),
3586 extern __inline __m512i
3587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3588 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3590 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3594 extern __inline __m512i
3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3596 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3598 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3600 _mm512_setzero_si512 (),
3604 extern __inline __m512i
3605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606 _mm512_set1_epi64 (long long __A)
3608 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3610 _mm512_undefined_epi32 (),
3614 extern __inline __m512i
3615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3616 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3618 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3622 extern __inline __m512i
3623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3627 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3628 (__v8di) _mm512_setzero_si512 (),
3632 extern __inline __m512
3633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3634 _mm512_broadcast_f32x4 (__m128 __A)
3636 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3638 _mm512_undefined_ps (),
3642 extern __inline __m512
3643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3644 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3646 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3651 extern __inline __m512
3652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3653 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3655 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3657 _mm512_setzero_ps (),
3661 extern __inline __m512i
3662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3663 _mm512_broadcast_i32x4 (__m128i __A)
3665 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3667 _mm512_undefined_epi32 (),
3671 extern __inline __m512i
3672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3673 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3675 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3680 extern __inline __m512i
3681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3682 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3684 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3686 _mm512_setzero_si512 (),
3690 extern __inline __m512d
3691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3692 _mm512_broadcast_f64x4 (__m256d __A)
3694 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3696 _mm512_undefined_pd (),
3700 extern __inline __m512d
3701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3702 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3704 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3709 extern __inline __m512d
3710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3711 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3713 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3715 _mm512_setzero_pd (),
3719 extern __inline __m512i
3720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3721 _mm512_broadcast_i64x4 (__m256i __A)
3723 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3725 _mm512_undefined_epi32 (),
3729 extern __inline __m512i
3730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3731 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3733 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3738 extern __inline __m512i
3739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3740 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3742 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3744 _mm512_setzero_si512 (),
3750 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3751 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3752 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3753 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3754 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3755 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3756 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3757 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3758 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3759 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3760 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3761 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3762 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3763 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3764 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3765 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3766 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3767 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3768 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3769 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3770 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3771 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3772 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3773 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3774 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3775 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3776 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3777 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3778 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3779 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3780 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3781 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3782 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3783 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3784 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3785 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3786 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3787 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3788 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3789 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3790 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3791 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3792 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3793 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3794 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3795 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3796 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3797 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3798 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3799 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3800 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3801 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3802 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3803 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3804 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3805 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3806 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3807 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3808 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3809 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3810 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3811 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3812 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3813 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3814 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3815 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3816 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3817 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3818 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3819 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3820 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3821 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3822 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3823 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3824 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3825 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3826 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3827 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3828 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3829 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3830 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3831 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3832 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3833 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3834 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3835 _MM_PERM_DDDD = 0xFF
3839 extern __inline __m512i
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3843 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3846 _mm512_undefined_epi32 (),
3850 extern __inline __m512i
3851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3852 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3853 _MM_PERM_ENUM __mask)
3855 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3861 extern __inline __m512i
3862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3863 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3865 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3868 _mm512_setzero_si512 (),
3872 extern __inline __m512i
3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3876 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3877 (__v8di) __B, __imm,
3879 _mm512_undefined_epi32 (),
3883 extern __inline __m512i
3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3886 __m512i __B, const int __imm)
3888 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3889 (__v8di) __B, __imm,
3894 extern __inline __m512i
3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3899 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3900 (__v8di) __B, __imm,
3902 _mm512_setzero_si512 (),
3906 extern __inline __m512i
3907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3908 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3910 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3914 _mm512_undefined_epi32 (),
3918 extern __inline __m512i
3919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3920 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3921 __m512i __B, const int __imm)
3923 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3930 extern __inline __m512i
3931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3932 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3935 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3939 _mm512_setzero_si512 (),
3943 extern __inline __m512d
3944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3945 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3947 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3948 (__v8df) __B, __imm,
3950 _mm512_undefined_pd (),
3954 extern __inline __m512d
3955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3956 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3957 __m512d __B, const int __imm)
3959 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3960 (__v8df) __B, __imm,
3965 extern __inline __m512d
3966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3967 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3970 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3971 (__v8df) __B, __imm,
3973 _mm512_setzero_pd (),
3977 extern __inline __m512
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3981 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3982 (__v16sf) __B, __imm,
3984 _mm512_undefined_ps (),
3988 extern __inline __m512
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3991 __m512 __B, const int __imm)
3993 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3994 (__v16sf) __B, __imm,
3999 extern __inline __m512
4000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4004 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4005 (__v16sf) __B, __imm,
4007 _mm512_setzero_ps (),
4012 #define _mm512_shuffle_epi32(X, C) \
4013 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4014 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4017 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4018 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4019 (__v16si)(__m512i)(W),\
4022 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4023 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4024 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4027 #define _mm512_shuffle_i64x2(X, Y, C) \
4028 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4029 (__v8di)(__m512i)(Y), (int)(C),\
4030 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4033 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4034 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4035 (__v8di)(__m512i)(Y), (int)(C),\
4036 (__v8di)(__m512i)(W),\
4039 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4040 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4041 (__v8di)(__m512i)(Y), (int)(C),\
4042 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4045 #define _mm512_shuffle_i32x4(X, Y, C) \
4046 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4047 (__v16si)(__m512i)(Y), (int)(C),\
4048 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4051 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4052 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4053 (__v16si)(__m512i)(Y), (int)(C),\
4054 (__v16si)(__m512i)(W),\
4057 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4058 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4059 (__v16si)(__m512i)(Y), (int)(C),\
4060 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4063 #define _mm512_shuffle_f64x2(X, Y, C) \
4064 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4065 (__v8df)(__m512d)(Y), (int)(C),\
4066 (__v8df)(__m512d)_mm512_undefined_pd(),\
4069 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4070 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4071 (__v8df)(__m512d)(Y), (int)(C),\
4072 (__v8df)(__m512d)(W),\
4075 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4076 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4077 (__v8df)(__m512d)(Y), (int)(C),\
4078 (__v8df)(__m512d)_mm512_setzero_pd(),\
4081 #define _mm512_shuffle_f32x4(X, Y, C) \
4082 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4083 (__v16sf)(__m512)(Y), (int)(C),\
4084 (__v16sf)(__m512)_mm512_undefined_ps(),\
4087 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4088 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4089 (__v16sf)(__m512)(Y), (int)(C),\
4090 (__v16sf)(__m512)(W),\
4093 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4094 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4095 (__v16sf)(__m512)(Y), (int)(C),\
4096 (__v16sf)(__m512)_mm512_setzero_ps(),\
4100 extern __inline __m512i
4101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4102 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4104 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4107 _mm512_undefined_epi32 (),
4111 extern __inline __m512i
4112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4113 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4115 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4121 extern __inline __m512i
4122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4123 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4125 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4128 _mm512_setzero_si512 (),
4132 extern __inline __m512i
4133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4136 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4139 _mm512_undefined_epi32 (),
4143 extern __inline __m512i
4144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4145 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4147 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4153 extern __inline __m512i
4154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4155 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4157 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4160 _mm512_setzero_si512 (),
4164 extern __inline __m512i
4165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4166 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4168 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4171 _mm512_undefined_epi32 (),
4175 extern __inline __m512i
4176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4177 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4179 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4185 extern __inline __m512i
4186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4187 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4189 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4192 _mm512_setzero_si512 (),
4196 extern __inline __m512i
4197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4198 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4200 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4203 _mm512_undefined_epi32 (),
4207 extern __inline __m512i
4208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4209 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4211 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4217 extern __inline __m512i
4218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4219 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4221 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4224 _mm512_setzero_si512 (),
4229 extern __inline __m256i
4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4233 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4235 _mm256_undefined_si256 (),
4236 (__mmask8) -1, __R);
4239 extern __inline __m256i
4240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4241 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4244 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4246 (__mmask8) __U, __R);
4249 extern __inline __m256i
4250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4251 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4253 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4255 _mm256_setzero_si256 (),
4256 (__mmask8) __U, __R);
4259 extern __inline __m256i
4260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4261 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4263 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4265 _mm256_undefined_si256 (),
4266 (__mmask8) -1, __R);
4269 extern __inline __m256i
4270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4274 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4276 (__mmask8) __U, __R);
4279 extern __inline __m256i
4280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4281 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4283 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4285 _mm256_setzero_si256 (),
4286 (__mmask8) __U, __R);
4289 #define _mm512_cvtt_roundpd_epi32(A, B) \
4290 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4292 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4293 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4295 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4296 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4298 #define _mm512_cvtt_roundpd_epu32(A, B) \
4299 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4301 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4302 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4304 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4305 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4309 extern __inline __m256i
4310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4311 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4313 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4315 _mm256_undefined_si256 (),
4316 (__mmask8) -1, __R);
4319 extern __inline __m256i
4320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4321 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4324 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4326 (__mmask8) __U, __R);
4329 extern __inline __m256i
4330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4331 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4333 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4335 _mm256_setzero_si256 (),
4336 (__mmask8) __U, __R);
4339 extern __inline __m256i
4340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4341 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4343 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4345 _mm256_undefined_si256 (),
4346 (__mmask8) -1, __R);
4349 extern __inline __m256i
4350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4351 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4354 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4356 (__mmask8) __U, __R);
4359 extern __inline __m256i
4360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4361 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4363 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4365 _mm256_setzero_si256 (),
4366 (__mmask8) __U, __R);
4369 #define _mm512_cvt_roundpd_epi32(A, B) \
4370 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4372 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4373 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4375 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4376 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4378 #define _mm512_cvt_roundpd_epu32(A, B) \
4379 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4381 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4382 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4384 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4385 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4389 extern __inline __m512i
4390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4391 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4393 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4395 _mm512_undefined_epi32 (),
4396 (__mmask16) -1, __R);
4399 extern __inline __m512i
4400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4401 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4404 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4406 (__mmask16) __U, __R);
4409 extern __inline __m512i
4410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4411 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4413 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4415 _mm512_setzero_si512 (),
4416 (__mmask16) __U, __R);
4419 extern __inline __m512i
4420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4421 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4423 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4425 _mm512_undefined_epi32 (),
4426 (__mmask16) -1, __R);
4429 extern __inline __m512i
4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4434 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4436 (__mmask16) __U, __R);
4439 extern __inline __m512i
4440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4441 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4443 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4445 _mm512_setzero_si512 (),
4446 (__mmask16) __U, __R);
4449 #define _mm512_cvtt_roundps_epi32(A, B) \
4450 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4452 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4453 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4455 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4456 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4458 #define _mm512_cvtt_roundps_epu32(A, B) \
4459 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4461 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4462 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4464 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4465 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4469 extern __inline __m512i
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4473 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4475 _mm512_undefined_epi32 (),
4476 (__mmask16) -1, __R);
4479 extern __inline __m512i
4480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4484 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4486 (__mmask16) __U, __R);
4489 extern __inline __m512i
4490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4491 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4493 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4495 _mm512_setzero_si512 (),
4496 (__mmask16) __U, __R);
4499 extern __inline __m512i
4500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4501 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4503 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4505 _mm512_undefined_epi32 (),
4506 (__mmask16) -1, __R);
4509 extern __inline __m512i
4510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4511 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4514 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4516 (__mmask16) __U, __R);
4519 extern __inline __m512i
4520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4521 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4523 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4525 _mm512_setzero_si512 (),
4526 (__mmask16) __U, __R);
4529 #define _mm512_cvt_roundps_epi32(A, B) \
4530 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4532 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4533 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4535 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4536 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4538 #define _mm512_cvt_roundps_epu32(A, B) \
4539 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4541 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4542 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4544 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4545 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4548 extern __inline __m128d
4549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4550 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4552 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4557 extern __inline __m128d
4558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4559 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4561 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4564 extern __inline __m128d
4565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4566 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4568 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4571 extern __inline __m128d
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4575 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4578 #define _mm_cvt_roundu64_sd(A, B, C) \
4579 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4581 #define _mm_cvt_roundi64_sd(A, B, C) \
4582 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4584 #define _mm_cvt_roundsi64_sd(A, B, C) \
4585 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4591 extern __inline __m128
4592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4593 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4595 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4598 extern __inline __m128
4599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4602 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4605 extern __inline __m128
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4609 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4612 #define _mm_cvt_roundu32_ss(A, B, C) \
4613 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4615 #define _mm_cvt_roundi32_ss(A, B, C) \
4616 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4618 #define _mm_cvt_roundsi32_ss(A, B, C) \
4619 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4624 extern __inline __m128
4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4626 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4628 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4631 extern __inline __m128
4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4635 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4638 extern __inline __m128
4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4640 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4642 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4645 #define _mm_cvt_roundu64_ss(A, B, C) \
4646 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4648 #define _mm_cvt_roundi64_ss(A, B, C) \
4649 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4651 #define _mm_cvt_roundsi64_ss(A, B, C) \
4652 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4657 extern __inline __m128i
4658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4659 _mm512_cvtepi32_epi8 (__m512i __A)
4661 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4663 _mm_undefined_si128 (),
4667 extern __inline void
4668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4671 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4674 extern __inline __m128i
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4678 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4679 (__v16qi) __O, __M);
4682 extern __inline __m128i
4683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4684 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4686 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4688 _mm_setzero_si128 (),
4692 extern __inline __m128i
4693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4694 _mm512_cvtsepi32_epi8 (__m512i __A)
4696 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4698 _mm_undefined_si128 (),
4702 extern __inline void
4703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4704 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4706 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4709 extern __inline __m128i
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4713 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4714 (__v16qi) __O, __M);
4717 extern __inline __m128i
4718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4719 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4721 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4723 _mm_setzero_si128 (),
4727 extern __inline __m128i
4728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4729 _mm512_cvtusepi32_epi8 (__m512i __A)
4731 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4733 _mm_undefined_si128 (),
4737 extern __inline void
4738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4739 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4741 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4744 extern __inline __m128i
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4748 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4753 extern __inline __m128i
4754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4755 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4757 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4759 _mm_setzero_si128 (),
4763 extern __inline __m256i
4764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4765 _mm512_cvtepi32_epi16 (__m512i __A)
4767 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4769 _mm256_undefined_si256 (),
4773 extern __inline void
4774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4775 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4777 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4780 extern __inline __m256i
4781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4784 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4785 (__v16hi) __O, __M);
4788 extern __inline __m256i
4789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4790 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4792 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4794 _mm256_setzero_si256 (),
4798 extern __inline __m256i
4799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4800 _mm512_cvtsepi32_epi16 (__m512i __A)
4802 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4804 _mm256_undefined_si256 (),
4808 extern __inline void
4809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4810 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4812 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4815 extern __inline __m256i
4816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4817 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4819 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4820 (__v16hi) __O, __M);
4823 extern __inline __m256i
4824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4825 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4827 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4829 _mm256_setzero_si256 (),
4833 extern __inline __m256i
4834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4835 _mm512_cvtusepi32_epi16 (__m512i __A)
4837 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4839 _mm256_undefined_si256 (),
4843 extern __inline void
4844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4845 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4847 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4850 extern __inline __m256i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4854 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4859 extern __inline __m256i
4860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4861 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4863 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4865 _mm256_setzero_si256 (),
4869 extern __inline __m256i
4870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4871 _mm512_cvtepi64_epi32 (__m512i __A)
4873 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4875 _mm256_undefined_si256 (),
4879 extern __inline void
4880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4883 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4886 extern __inline __m256i
4887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4890 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4894 extern __inline __m256i
4895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4898 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4900 _mm256_setzero_si256 (),
4904 extern __inline __m256i
4905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4906 _mm512_cvtsepi64_epi32 (__m512i __A)
4908 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4910 _mm256_undefined_si256 (),
4914 extern __inline void
4915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4916 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4918 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4921 extern __inline __m256i
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4925 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4929 extern __inline __m256i
4930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4931 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4933 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4935 _mm256_setzero_si256 (),
4939 extern __inline __m256i
4940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4941 _mm512_cvtusepi64_epi32 (__m512i __A)
4943 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4945 _mm256_undefined_si256 (),
4949 extern __inline void
4950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4951 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4953 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4956 extern __inline __m256i
4957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4960 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4964 extern __inline __m256i
4965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4966 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4968 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4970 _mm256_setzero_si256 (),
4974 extern __inline __m128i
4975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4976 _mm512_cvtepi64_epi16 (__m512i __A)
4978 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4980 _mm_undefined_si128 (),
4984 extern __inline void
4985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4986 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4988 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4991 extern __inline __m128i
4992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4993 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4995 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4999 extern __inline __m128i
5000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5001 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5003 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5005 _mm_setzero_si128 (),
5009 extern __inline __m128i
5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011 _mm512_cvtsepi64_epi16 (__m512i __A)
5013 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5015 _mm_undefined_si128 (),
5019 extern __inline void
5020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5023 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5026 extern __inline __m128i
5027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5028 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5030 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5034 extern __inline __m128i
5035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5036 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5038 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5040 _mm_setzero_si128 (),
5044 extern __inline __m128i
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5046 _mm512_cvtusepi64_epi16 (__m512i __A)
5048 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5050 _mm_undefined_si128 (),
5054 extern __inline void
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5058 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5061 extern __inline __m128i
5062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5065 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5069 extern __inline __m128i
5070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5071 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5073 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5075 _mm_setzero_si128 (),
5079 extern __inline __m128i
5080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5081 _mm512_cvtepi64_epi8 (__m512i __A)
5083 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5085 _mm_undefined_si128 (),
5089 extern __inline void
5090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5091 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5093 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5096 extern __inline __m128i
5097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5098 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5100 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5101 (__v16qi) __O, __M);
5104 extern __inline __m128i
5105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5106 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5108 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5110 _mm_setzero_si128 (),
5114 extern __inline __m128i
5115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116 _mm512_cvtsepi64_epi8 (__m512i __A)
5118 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5120 _mm_undefined_si128 (),
5124 extern __inline void
5125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5126 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5128 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5131 extern __inline __m128i
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5135 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5136 (__v16qi) __O, __M);
5139 extern __inline __m128i
5140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5141 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5143 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5145 _mm_setzero_si128 (),
5149 extern __inline __m128i
5150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5151 _mm512_cvtusepi64_epi8 (__m512i __A)
5153 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5155 _mm_undefined_si128 (),
5159 extern __inline void
5160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5161 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5163 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5166 extern __inline __m128i
5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5170 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5175 extern __inline __m128i
5176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5177 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5179 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5181 _mm_setzero_si128 (),
5185 extern __inline __m512d
5186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187 _mm512_cvtepi32_pd (__m256i __A)
5189 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5191 _mm512_undefined_pd (),
5195 extern __inline __m512d
5196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5199 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5204 extern __inline __m512d
5205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5208 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5210 _mm512_setzero_pd (),
5214 extern __inline __m512d
5215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5216 _mm512_cvtepu32_pd (__m256i __A)
5218 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5220 _mm512_undefined_pd (),
5224 extern __inline __m512d
5225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5226 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5228 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5233 extern __inline __m512d
5234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5235 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5237 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5239 _mm512_setzero_pd (),
5244 extern __inline __m512
5245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5246 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5248 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5250 _mm512_undefined_ps (),
5251 (__mmask16) -1, __R);
5254 extern __inline __m512
5255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5256 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5259 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5261 (__mmask16) __U, __R);
5264 extern __inline __m512
5265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5266 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5268 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5270 _mm512_setzero_ps (),
5271 (__mmask16) __U, __R);
5274 extern __inline __m512
5275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5276 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5278 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5280 _mm512_undefined_ps (),
5281 (__mmask16) -1, __R);
5284 extern __inline __m512
5285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5286 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5289 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5291 (__mmask16) __U, __R);
5294 extern __inline __m512
5295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5298 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5300 _mm512_setzero_ps (),
5301 (__mmask16) __U, __R);
5305 #define _mm512_cvt_roundepi32_ps(A, B) \
5306 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5308 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5309 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5311 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5312 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5314 #define _mm512_cvt_roundepu32_ps(A, B) \
5315 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5317 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5318 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5320 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5321 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5325 extern __inline __m256d
5326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5329 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5332 _mm256_undefined_pd (),
5336 extern __inline __m256d
5337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5341 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5347 extern __inline __m256d
5348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5349 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5351 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5354 _mm256_setzero_pd (),
5358 extern __inline __m128
5359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5360 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5362 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5365 _mm_undefined_ps (),
5369 extern __inline __m128
5370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5371 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5374 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5380 extern __inline __m128
5381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5384 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5391 extern __inline __m256i
5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5395 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5398 _mm256_undefined_si256 (),
5402 extern __inline __m256i
5403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5404 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5407 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5413 extern __inline __m256i
5414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5417 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5420 _mm256_setzero_si256 (),
5424 extern __inline __m128i
5425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5426 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5428 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5431 _mm_undefined_si128 (),
5435 extern __inline __m128i
5436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5437 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5440 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5446 extern __inline __m128i
5447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5448 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5450 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5453 _mm_setzero_si128 (),
5458 #define _mm512_extractf64x4_pd(X, C) \
5459 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5461 (__v4df)(__m256d)_mm256_undefined_pd(),\
5464 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5465 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5467 (__v4df)(__m256d)(W),\
5470 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5471 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5473 (__v4df)(__m256d)_mm256_setzero_pd(),\
5476 #define _mm512_extractf32x4_ps(X, C) \
5477 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5479 (__v4sf)(__m128)_mm_undefined_ps(),\
5482 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5483 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5485 (__v4sf)(__m128)(W),\
5488 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5489 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5491 (__v4sf)(__m128)_mm_setzero_ps(),\
5494 #define _mm512_extracti64x4_epi64(X, C) \
5495 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5497 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5500 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5501 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5503 (__v4di)(__m256i)(W),\
5506 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5507 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5509 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5512 #define _mm512_extracti32x4_epi32(X, C) \
5513 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5515 (__v4si)(__m128i)_mm_undefined_si128 (),\
5518 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5519 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5521 (__v4si)(__m128i)(W),\
5524 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5525 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5527 (__v4si)(__m128i)_mm_setzero_si128 (),\
5532 extern __inline __m512i
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5536 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5542 extern __inline __m512
5543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5544 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5546 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5552 extern __inline __m512i
5553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5554 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5556 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5560 _mm512_undefined_epi32 (),
5564 extern __inline __m512i
5565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5566 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5567 __m256i __B, const int __imm)
5569 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5576 extern __inline __m512i
5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5578 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5581 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5585 _mm512_setzero_si512 (),
5589 extern __inline __m512d
5590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5591 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5593 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5597 _mm512_undefined_pd (),
5601 extern __inline __m512d
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5604 __m256d __B, const int __imm)
5606 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5613 extern __inline __m512d
5614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5615 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5618 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5622 _mm512_setzero_pd (),
5626 #define _mm512_insertf32x4(X, Y, C) \
5627 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5628 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5630 #define _mm512_inserti32x4(X, Y, C) \
5631 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5632 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5634 #define _mm512_insertf64x4(X, Y, C) \
5635 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5636 (__v4df)(__m256d) (Y), (int) (C), \
5637 (__v8df)(__m512d)_mm512_undefined_pd(), \
5640 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5641 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5642 (__v4df)(__m256d) (Y), (int) (C), \
5643 (__v8df)(__m512d)(W), \
5646 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5647 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5648 (__v4df)(__m256d) (Y), (int) (C), \
5649 (__v8df)(__m512d)_mm512_setzero_pd(), \
5652 #define _mm512_inserti64x4(X, Y, C) \
5653 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5654 (__v4di)(__m256i) (Y), (int) (C), \
5655 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
5658 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5659 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5660 (__v4di)(__m256i) (Y), (int) (C),\
5661 (__v8di)(__m512i)(W),\
5664 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5665 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5666 (__v4di)(__m256i) (Y), (int) (C), \
5667 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5671 extern __inline __m512d
5672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5673 _mm512_loadu_pd (void const *__P)
5675 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5677 _mm512_undefined_pd (),
5681 extern __inline __m512d
5682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5683 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5685 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5690 extern __inline __m512d
5691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5692 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5694 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5696 _mm512_setzero_pd (),
5700 extern __inline void
5701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5702 _mm512_storeu_pd (void *__P, __m512d __A)
5704 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5708 extern __inline void
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5712 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5716 extern __inline __m512
5717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5718 _mm512_loadu_ps (void const *__P)
5720 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5722 _mm512_undefined_ps (),
5726 extern __inline __m512
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5730 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5735 extern __inline __m512
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5739 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5741 _mm512_setzero_ps (),
5745 extern __inline void
5746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 _mm512_storeu_ps (void *__P, __m512 __A)
5749 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5753 extern __inline void
5754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5757 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5761 extern __inline __m512i
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5765 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5770 extern __inline __m512i
5771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5772 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5774 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5776 _mm512_setzero_si512 (),
5780 extern __inline void
5781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5782 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5784 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5788 extern __inline __m512i
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm512_loadu_si512 (void const *__P)
5792 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5794 _mm512_setzero_si512 (),
5798 extern __inline __m512i
5799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5802 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5807 extern __inline __m512i
5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5811 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5813 _mm512_setzero_si512 (),
5817 extern __inline void
5818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5819 _mm512_storeu_si512 (void *__P, __m512i __A)
5821 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5825 extern __inline void
5826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5827 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5829 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5833 extern __inline __m512d
5834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5835 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5837 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5840 _mm512_undefined_pd (),
5844 extern __inline __m512d
5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5848 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5854 extern __inline __m512d
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5858 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5861 _mm512_setzero_pd (),
5865 extern __inline __m512
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5869 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5872 _mm512_undefined_ps (),
5876 extern __inline __m512
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5880 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5886 extern __inline __m512
5887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5888 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5890 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5893 _mm512_setzero_ps (),
5897 extern __inline __m512i
5898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5899 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5901 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5908 extern __inline __m512i
5909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5910 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5913 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5920 extern __inline __m512i
5921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5922 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5923 __mmask8 __U, __m512i __B)
5925 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5932 extern __inline __m512i
5933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5934 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5935 __m512i __I, __m512i __B)
5937 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5944 extern __inline __m512i
5945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5946 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5948 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5955 extern __inline __m512i
5956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5957 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5958 __m512i __I, __m512i __B)
5960 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5967 extern __inline __m512i
5968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5969 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5970 __mmask16 __U, __m512i __B)
5972 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5979 extern __inline __m512i
5980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5981 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5982 __m512i __I, __m512i __B)
5984 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5991 extern __inline __m512d
5992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5993 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5995 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6002 extern __inline __m512d
6003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6004 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6007 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6014 extern __inline __m512d
6015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6016 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6019 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6026 extern __inline __m512d
6027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6028 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6031 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6038 extern __inline __m512
6039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6040 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6042 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6049 extern __inline __m512
6050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6053 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6060 extern __inline __m512
6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6065 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6072 extern __inline __m512
6073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6077 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6085 extern __inline __m512d
6086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6087 _mm512_permute_pd (__m512d __X, const int __C)
6089 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6091 _mm512_undefined_pd (),
6095 extern __inline __m512d
6096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6097 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6099 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6104 extern __inline __m512d
6105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6106 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6108 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6110 _mm512_setzero_pd (),
6114 extern __inline __m512
6115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6116 _mm512_permute_ps (__m512 __X, const int __C)
6118 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6120 _mm512_undefined_ps (),
6124 extern __inline __m512
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6128 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6133 extern __inline __m512
6134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6137 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6139 _mm512_setzero_ps (),
6143 #define _mm512_permute_pd(X, C) \
6144 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6145 (__v8df)(__m512d)_mm512_undefined_pd(),\
6148 #define _mm512_mask_permute_pd(W, U, X, C) \
6149 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6150 (__v8df)(__m512d)(W), \
6153 #define _mm512_maskz_permute_pd(U, X, C) \
6154 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6155 (__v8df)(__m512d)_mm512_setzero_pd(), \
6158 #define _mm512_permute_ps(X, C) \
6159 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6160 (__v16sf)(__m512)_mm512_undefined_ps(),\
6163 #define _mm512_mask_permute_ps(W, U, X, C) \
6164 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6165 (__v16sf)(__m512)(W), \
6168 #define _mm512_maskz_permute_ps(U, X, C) \
6169 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6170 (__v16sf)(__m512)_mm512_setzero_ps(), \
6175 extern __inline __m512i
6176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6177 _mm512_permutex_epi64 (__m512i __X, const int __I)
6179 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6181 _mm512_undefined_epi32 (),
6185 extern __inline __m512i
6186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6187 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6188 __m512i __X, const int __I)
6190 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6195 extern __inline __m512i
6196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6197 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6199 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6201 _mm512_setzero_si512 (),
6205 extern __inline __m512d
6206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6207 _mm512_permutex_pd (__m512d __X, const int __M)
6209 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6211 _mm512_undefined_pd (),
6215 extern __inline __m512d
6216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6217 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6219 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6224 extern __inline __m512d
6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6226 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6228 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6230 _mm512_setzero_pd (),
6234 #define _mm512_permutex_pd(X, M) \
6235 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6236 (__v8df)(__m512d)_mm512_undefined_pd(),\
6239 #define _mm512_mask_permutex_pd(W, U, X, M) \
6240 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6241 (__v8df)(__m512d)(W), (__mmask8)(U)))
6243 #define _mm512_maskz_permutex_pd(U, X, M) \
6244 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6245 (__v8df)(__m512d)_mm512_setzero_pd(),\
6248 #define _mm512_permutex_epi64(X, I) \
6249 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6252 (_mm512_undefined_epi32 ()),\
6255 #define _mm512_maskz_permutex_epi64(M, X, I) \
6256 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6259 (_mm512_setzero_si512 ()),\
6262 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6263 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6265 (__v8di)(__m512i)(W), \
6269 extern __inline __m512i
6270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6271 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6273 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6276 _mm512_setzero_si512 (),
6280 extern __inline __m512i
6281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6284 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6287 _mm512_undefined_epi32 (),
6291 extern __inline __m512i
6292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6293 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6296 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6302 extern __inline __m512i
6303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6306 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6309 _mm512_setzero_si512 (),
6313 extern __inline __m512i
6314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6317 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6320 _mm512_undefined_epi32 (),
6324 extern __inline __m512i
6325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6326 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6329 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6335 extern __inline __m512d
6336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6337 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6339 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6342 _mm512_undefined_pd (),
6346 extern __inline __m512d
6347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6348 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6350 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6356 extern __inline __m512d
6357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6358 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6360 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6363 _mm512_setzero_pd (),
6367 extern __inline __m512
6368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6369 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6371 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6374 _mm512_undefined_ps (),
6378 extern __inline __m512
6379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6380 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6382 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6388 extern __inline __m512
6389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6390 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6392 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6395 _mm512_setzero_ps (),
6400 extern __inline __m512
6401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6402 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6404 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6405 (__v16sf) __V, __imm,
6407 _mm512_undefined_ps (),
6411 extern __inline __m512
6412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6413 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6414 __m512 __V, const int __imm)
6416 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6417 (__v16sf) __V, __imm,
6422 extern __inline __m512
6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6426 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6427 (__v16sf) __V, __imm,
6429 _mm512_setzero_ps (),
6433 extern __inline __m512d
6434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6435 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6437 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6438 (__v8df) __V, __imm,
6440 _mm512_undefined_pd (),
6444 extern __inline __m512d
6445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6447 __m512d __V, const int __imm)
6449 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6450 (__v8df) __V, __imm,
6455 extern __inline __m512d
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6460 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6461 (__v8df) __V, __imm,
6463 _mm512_setzero_pd (),
6467 extern __inline __m512d
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6470 const int __imm, const int __R)
6472 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6476 (__mmask8) -1, __R);
6479 extern __inline __m512d
6480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6481 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6482 __m512i __C, const int __imm, const int __R)
6484 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6488 (__mmask8) __U, __R);
6491 extern __inline __m512d
6492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6493 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6494 __m512i __C, const int __imm, const int __R)
6496 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6500 (__mmask8) __U, __R);
6503 extern __inline __m512
6504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6505 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6506 const int __imm, const int __R)
6508 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6512 (__mmask16) -1, __R);
6515 extern __inline __m512
6516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6517 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6518 __m512i __C, const int __imm, const int __R)
6520 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6524 (__mmask16) __U, __R);
6527 extern __inline __m512
6528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6529 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6530 __m512i __C, const int __imm, const int __R)
6532 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6536 (__mmask16) __U, __R);
6539 extern __inline __m128d
6540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6541 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6542 const int __imm, const int __R)
6544 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6546 (__v2di) __C, __imm,
6547 (__mmask8) -1, __R);
6550 extern __inline __m128d
6551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6552 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6553 __m128i __C, const int __imm, const int __R)
6555 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6557 (__v2di) __C, __imm,
6558 (__mmask8) __U, __R);
6561 extern __inline __m128d
6562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6563 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6564 __m128i __C, const int __imm, const int __R)
6566 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6570 (__mmask8) __U, __R);
6573 extern __inline __m128
6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6575 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6576 const int __imm, const int __R)
6578 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6580 (__v4si) __C, __imm,
6581 (__mmask8) -1, __R);
6584 extern __inline __m128
6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6586 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6587 __m128i __C, const int __imm, const int __R)
6589 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6591 (__v4si) __C, __imm,
6592 (__mmask8) __U, __R);
6595 extern __inline __m128
6596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6597 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6598 __m128i __C, const int __imm, const int __R)
6600 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6602 (__v4si) __C, __imm,
6603 (__mmask8) __U, __R);
6607 #define _mm512_shuffle_pd(X, Y, C) \
6608 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6609 (__v8df)(__m512d)(Y), (int)(C),\
6610 (__v8df)(__m512d)_mm512_undefined_pd(),\
6613 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6614 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6615 (__v8df)(__m512d)(Y), (int)(C),\
6616 (__v8df)(__m512d)(W),\
6619 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6620 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6621 (__v8df)(__m512d)(Y), (int)(C),\
6622 (__v8df)(__m512d)_mm512_setzero_pd(),\
6625 #define _mm512_shuffle_ps(X, Y, C) \
6626 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6627 (__v16sf)(__m512)(Y), (int)(C),\
6628 (__v16sf)(__m512)_mm512_undefined_ps(),\
6631 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6632 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6633 (__v16sf)(__m512)(Y), (int)(C),\
6634 (__v16sf)(__m512)(W),\
6637 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6638 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6639 (__v16sf)(__m512)(Y), (int)(C),\
6640 (__v16sf)(__m512)_mm512_setzero_ps(),\
6643 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6644 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6645 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6646 (__mmask8)(-1), (R)))
6648 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6649 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6650 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6651 (__mmask8)(U), (R)))
6653 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6654 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6655 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6656 (__mmask8)(U), (R)))
6658 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6659 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6660 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6661 (__mmask16)(-1), (R)))
6663 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6664 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6665 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6666 (__mmask16)(U), (R)))
6668 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6669 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6670 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6671 (__mmask16)(U), (R)))
6673 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6674 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6675 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6676 (__mmask8)(-1), (R)))
6678 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6679 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6680 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6681 (__mmask8)(U), (R)))
6683 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6684 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6685 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6686 (__mmask8)(U), (R)))
6688 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6689 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6690 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6691 (__mmask8)(-1), (R)))
6693 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6694 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6695 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6696 (__mmask8)(U), (R)))
6698 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6699 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6700 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6701 (__mmask8)(U), (R)))
6704 extern __inline __m512
6705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6706 _mm512_movehdup_ps (__m512 __A)
6708 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6710 _mm512_undefined_ps (),
6714 extern __inline __m512
6715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6716 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6718 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6723 extern __inline __m512
6724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6725 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6727 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6729 _mm512_setzero_ps (),
6733 extern __inline __m512
6734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 _mm512_moveldup_ps (__m512 __A)
6737 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6739 _mm512_undefined_ps (),
6743 extern __inline __m512
6744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6745 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6747 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6752 extern __inline __m512
6753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6754 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6756 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6758 _mm512_setzero_ps (),
6762 extern __inline __m512i
6763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6764 _mm512_or_si512 (__m512i __A, __m512i __B)
6766 return (__m512i) ((__v16su) __A | (__v16su) __B);
6769 extern __inline __m512i
6770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771 _mm512_or_epi32 (__m512i __A, __m512i __B)
6773 return (__m512i) ((__v16su) __A | (__v16su) __B);
6776 extern __inline __m512i
6777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6778 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6780 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6786 extern __inline __m512i
6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6790 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6793 _mm512_setzero_si512 (),
6797 extern __inline __m512i
6798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6799 _mm512_or_epi64 (__m512i __A, __m512i __B)
6801 return (__m512i) ((__v8du) __A | (__v8du) __B);
6804 extern __inline __m512i
6805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6806 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6808 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6814 extern __inline __m512i
6815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6816 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6818 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6821 _mm512_setzero_si512 (),
6825 extern __inline __m512i
6826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6827 _mm512_xor_si512 (__m512i __A, __m512i __B)
6829 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6832 extern __inline __m512i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6836 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6839 extern __inline __m512i
6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6841 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6843 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6849 extern __inline __m512i
6850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6851 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6853 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6856 _mm512_setzero_si512 (),
6860 extern __inline __m512i
6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6864 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6867 extern __inline __m512i
6868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6869 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6871 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6877 extern __inline __m512i
6878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6879 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6881 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6884 _mm512_setzero_si512 (),
6889 extern __inline __m512i
6890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6891 _mm512_rol_epi32 (__m512i __A, const int __B)
6893 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6895 _mm512_undefined_epi32 (),
6899 extern __inline __m512i
6900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6901 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6903 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6908 extern __inline __m512i
6909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6910 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6912 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6914 _mm512_setzero_si512 (),
6918 extern __inline __m512i
6919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6920 _mm512_ror_epi32 (__m512i __A, int __B)
6922 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6924 _mm512_undefined_epi32 (),
6928 extern __inline __m512i
6929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6930 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6932 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6937 extern __inline __m512i
6938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6939 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6941 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6943 _mm512_setzero_si512 (),
6947 extern __inline __m512i
6948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6949 _mm512_rol_epi64 (__m512i __A, const int __B)
6951 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6953 _mm512_undefined_epi32 (),
6957 extern __inline __m512i
6958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6959 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6961 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6966 extern __inline __m512i
6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6968 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6970 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6972 _mm512_setzero_si512 (),
6976 extern __inline __m512i
6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6978 _mm512_ror_epi64 (__m512i __A, int __B)
6980 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6982 _mm512_undefined_epi32 (),
6986 extern __inline __m512i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6990 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6995 extern __inline __m512i
6996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6997 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6999 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7001 _mm512_setzero_si512 (),
7006 #define _mm512_rol_epi32(A, B) \
7007 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7009 (__v16si)_mm512_undefined_epi32 (), \
7011 #define _mm512_mask_rol_epi32(W, U, A, B) \
7012 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7014 (__v16si)(__m512i)(W), \
7016 #define _mm512_maskz_rol_epi32(U, A, B) \
7017 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7019 (__v16si)_mm512_setzero_si512 (), \
7021 #define _mm512_ror_epi32(A, B) \
7022 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7024 (__v16si)_mm512_undefined_epi32 (), \
7026 #define _mm512_mask_ror_epi32(W, U, A, B) \
7027 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7029 (__v16si)(__m512i)(W), \
7031 #define _mm512_maskz_ror_epi32(U, A, B) \
7032 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7034 (__v16si)_mm512_setzero_si512 (), \
7036 #define _mm512_rol_epi64(A, B) \
7037 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7039 (__v8di)_mm512_undefined_epi32 (), \
7041 #define _mm512_mask_rol_epi64(W, U, A, B) \
7042 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7044 (__v8di)(__m512i)(W), \
7046 #define _mm512_maskz_rol_epi64(U, A, B) \
7047 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7049 (__v8di)_mm512_setzero_si512 (), \
7052 #define _mm512_ror_epi64(A, B) \
7053 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7055 (__v8di)_mm512_undefined_epi32 (), \
7057 #define _mm512_mask_ror_epi64(W, U, A, B) \
7058 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7060 (__v8di)(__m512i)(W), \
7062 #define _mm512_maskz_ror_epi64(U, A, B) \
7063 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7065 (__v8di)_mm512_setzero_si512 (), \
7069 extern __inline __m512i
7070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7071 _mm512_and_si512 (__m512i __A, __m512i __B)
7073 return (__m512i) ((__v16su) __A & (__v16su) __B);
7076 extern __inline __m512i
7077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7078 _mm512_and_epi32 (__m512i __A, __m512i __B)
7080 return (__m512i) ((__v16su) __A & (__v16su) __B);
7083 extern __inline __m512i
7084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7085 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7087 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7093 extern __inline __m512i
7094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7095 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7097 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7100 _mm512_setzero_si512 (),
7104 extern __inline __m512i
7105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106 _mm512_and_epi64 (__m512i __A, __m512i __B)
7108 return (__m512i) ((__v8du) __A & (__v8du) __B);
7111 extern __inline __m512i
7112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7113 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7115 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7120 extern __inline __m512i
7121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7122 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7124 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7127 _mm512_setzero_pd (),
7131 extern __inline __m512i
7132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7133 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7135 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7138 _mm512_undefined_epi32 (),
7142 extern __inline __m512i
7143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7144 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7146 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7149 _mm512_undefined_epi32 (),
7153 extern __inline __m512i
7154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7155 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7157 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7163 extern __inline __m512i
7164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7165 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7167 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7170 _mm512_setzero_si512 (),
7174 extern __inline __m512i
7175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7178 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7181 _mm512_undefined_epi32 (),
7185 extern __inline __m512i
7186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7189 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7194 extern __inline __m512i
7195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7196 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7198 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7201 _mm512_setzero_pd (),
7205 extern __inline __mmask16
7206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7207 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7209 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7214 extern __inline __mmask16
7215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7216 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7218 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7219 (__v16si) __B, __U);
7222 extern __inline __mmask8
7223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7224 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7226 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7231 extern __inline __mmask8
7232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7233 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7235 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7238 extern __inline __mmask16
7239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7240 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7242 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7247 extern __inline __mmask16
7248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7249 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7251 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7252 (__v16si) __B, __U);
7255 extern __inline __mmask8
7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7257 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7259 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7264 extern __inline __mmask8
7265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7266 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7268 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7272 extern __inline __m512i
7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7276 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7279 _mm512_undefined_epi32 (),
7283 extern __inline __m512i
7284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7285 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7288 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7294 extern __inline __m512i
7295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7296 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7298 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7301 _mm512_setzero_si512 (),
7305 extern __inline __m512i
7306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7307 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7309 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7312 _mm512_undefined_epi32 (),
7316 extern __inline __m512i
7317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7318 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7320 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7326 extern __inline __m512i
7327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7328 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7330 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7333 _mm512_setzero_si512 (),
7337 extern __inline __m512i
7338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7339 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7341 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7344 _mm512_undefined_epi32 (),
7348 extern __inline __m512i
7349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7350 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7353 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7359 extern __inline __m512i
7360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7361 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7363 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7366 _mm512_setzero_si512 (),
7370 extern __inline __m512i
7371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7372 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7374 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7377 _mm512_undefined_epi32 (),
7381 extern __inline __m512i
7382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7383 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7385 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7391 extern __inline __m512i
7392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7393 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7395 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7398 _mm512_setzero_si512 (),
7404 extern __inline unsigned long long
7405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7406 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7408 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7411 extern __inline long long
7412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7415 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7418 extern __inline long long
7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7422 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7425 extern __inline unsigned long long
7426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7427 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7429 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7432 extern __inline long long
7433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7434 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7436 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7439 extern __inline long long
7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7443 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7446 #define _mm_cvt_roundss_u64(A, B) \
7447 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7449 #define _mm_cvt_roundss_si64(A, B) \
7450 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7452 #define _mm_cvt_roundss_i64(A, B) \
7453 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7455 #define _mm_cvtt_roundss_u64(A, B) \
7456 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7458 #define _mm_cvtt_roundss_i64(A, B) \
7459 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7461 #define _mm_cvtt_roundss_si64(A, B) \
7462 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7467 extern __inline unsigned
7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7471 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7476 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7478 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7483 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7485 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7488 extern __inline unsigned
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7492 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7499 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7504 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7506 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7509 #define _mm_cvt_roundss_u32(A, B) \
7510 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7512 #define _mm_cvt_roundss_si32(A, B) \
7513 ((int)__builtin_ia32_vcvtss2si32(A, B))
7515 #define _mm_cvt_roundss_i32(A, B) \
7516 ((int)__builtin_ia32_vcvtss2si32(A, B))
7518 #define _mm_cvtt_roundss_u32(A, B) \
7519 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7521 #define _mm_cvtt_roundss_si32(A, B) \
7522 ((int)__builtin_ia32_vcvttss2si32(A, B))
7524 #define _mm_cvtt_roundss_i32(A, B) \
7525 ((int)__builtin_ia32_vcvttss2si32(A, B))
7530 extern __inline unsigned long long
7531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7534 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7537 extern __inline long long
7538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7539 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7541 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7544 extern __inline long long
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7548 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7551 extern __inline unsigned long long
7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7555 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7558 extern __inline long long
7559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7560 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7562 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7565 extern __inline long long
7566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7567 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7569 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7572 #define _mm_cvt_roundsd_u64(A, B) \
7573 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7575 #define _mm_cvt_roundsd_si64(A, B) \
7576 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7578 #define _mm_cvt_roundsd_i64(A, B) \
7579 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7581 #define _mm_cvtt_roundsd_u64(A, B) \
7582 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7584 #define _mm_cvtt_roundsd_si64(A, B) \
7585 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7587 #define _mm_cvtt_roundsd_i64(A, B) \
7588 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7593 extern __inline unsigned
7594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7595 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7597 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7602 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7604 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7609 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7611 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7614 extern __inline unsigned
7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7616 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7618 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7625 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7630 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7632 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7635 #define _mm_cvt_roundsd_u32(A, B) \
7636 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7638 #define _mm_cvt_roundsd_si32(A, B) \
7639 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7641 #define _mm_cvt_roundsd_i32(A, B) \
7642 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7644 #define _mm_cvtt_roundsd_u32(A, B) \
7645 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7647 #define _mm_cvtt_roundsd_si32(A, B) \
7648 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7650 #define _mm_cvtt_roundsd_i32(A, B) \
7651 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7654 extern __inline __m512d
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm512_movedup_pd (__m512d __A)
7658 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7660 _mm512_undefined_pd (),
7664 extern __inline __m512d
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7668 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7673 extern __inline __m512d
7674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7675 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7677 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7679 _mm512_setzero_pd (),
7683 extern __inline __m512d
7684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7685 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7687 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7690 _mm512_undefined_pd (),
7694 extern __inline __m512d
7695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7696 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7698 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7704 extern __inline __m512d
7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7706 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7708 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7711 _mm512_setzero_pd (),
7715 extern __inline __m512d
7716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7717 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7719 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7722 _mm512_undefined_pd (),
7726 extern __inline __m512d
7727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7728 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7730 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7736 extern __inline __m512d
7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7738 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7740 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7743 _mm512_setzero_pd (),
7747 extern __inline __m512
7748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7749 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7751 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7754 _mm512_undefined_ps (),
7758 extern __inline __m512
7759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7760 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7762 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7768 extern __inline __m512
7769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7770 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7772 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7775 _mm512_setzero_ps (),
7780 extern __inline __m512d
7781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7782 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7784 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7786 _mm512_undefined_pd (),
7787 (__mmask8) -1, __R);
7790 extern __inline __m512d
7791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7795 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7797 (__mmask8) __U, __R);
7800 extern __inline __m512d
7801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7802 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7804 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7806 _mm512_setzero_pd (),
7807 (__mmask8) __U, __R);
7810 extern __inline __m512
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7814 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7816 _mm512_undefined_ps (),
7817 (__mmask16) -1, __R);
7820 extern __inline __m512
7821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7822 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7825 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7827 (__mmask16) __U, __R);
7830 extern __inline __m512
7831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7832 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7834 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7836 _mm512_setzero_ps (),
7837 (__mmask16) __U, __R);
7840 extern __inline __m256i
7841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7842 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7844 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7847 _mm256_undefined_si256 (),
7851 extern __inline __m256i
7852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7853 _mm512_cvtps_ph (__m512 __A, const int __I)
7855 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7858 _mm256_undefined_si256 (),
7862 extern __inline __m256i
7863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7864 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7867 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7873 extern __inline __m256i
7874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7875 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7877 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7883 extern __inline __m256i
7884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7885 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7887 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7890 _mm256_setzero_si256 (),
7894 extern __inline __m256i
7895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7896 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7898 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7901 _mm256_setzero_si256 (),
7905 #define _mm512_cvt_roundps_pd(A, B) \
7906 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7908 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7909 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7911 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7912 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7914 #define _mm512_cvt_roundph_ps(A, B) \
7915 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7917 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7918 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7920 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7921 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7923 #define _mm512_cvt_roundps_ph(A, I) \
7924 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7925 (__v16hi)_mm256_undefined_si256 (), -1))
7926 #define _mm512_cvtps_ph(A, I) \
7927 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7928 (__v16hi)_mm256_undefined_si256 (), -1))
7929 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7930 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7931 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7932 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7933 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7934 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7935 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7936 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7937 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7938 #define _mm512_maskz_cvtps_ph(W, A, I) \
7939 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7940 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7944 extern __inline __m256
7945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7946 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7948 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7950 _mm256_undefined_ps (),
7951 (__mmask8) -1, __R);
7954 extern __inline __m256
7955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7956 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7959 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7961 (__mmask8) __U, __R);
7964 extern __inline __m256
7965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7966 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7968 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7970 _mm256_setzero_ps (),
7971 (__mmask8) __U, __R);
7974 extern __inline __m128
7975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7976 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7978 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7983 extern __inline __m128d
7984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7987 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7992 #define _mm512_cvt_roundpd_ps(A, B) \
7993 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7995 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7996 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7998 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7999 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8001 #define _mm_cvt_roundsd_ss(A, B, C) \
8002 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8004 #define _mm_cvt_roundss_sd(A, B, C) \
8005 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8008 extern __inline void
8009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8012 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8015 extern __inline void
8016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017 _mm512_stream_ps (float *__P, __m512 __A)
8019 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8022 extern __inline void
8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024 _mm512_stream_pd (double *__P, __m512d __A)
8026 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8029 extern __inline __m512i
8030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031 _mm512_stream_load_si512 (void *__P)
8033 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8036 /* Constants for mantissa extraction */
8039 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8040 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8041 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8042 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8043 } _MM_MANTISSA_NORM_ENUM;
8047 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8048 _MM_MANT_SIGN_zero, /* sign = 0 */
8049 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8050 } _MM_MANTISSA_SIGN_ENUM;
8053 extern __inline __m128
8054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8055 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8057 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8062 extern __inline __m128d
8063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8064 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8066 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8071 extern __inline __m512
8072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8073 _mm512_getexp_round_ps (__m512 __A, const int __R)
8075 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8077 _mm512_undefined_ps (),
8078 (__mmask16) -1, __R);
8081 extern __inline __m512
8082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8083 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8086 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8088 (__mmask16) __U, __R);
8091 extern __inline __m512
8092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8093 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8095 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8097 _mm512_setzero_ps (),
8098 (__mmask16) __U, __R);
8101 extern __inline __m512d
8102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8103 _mm512_getexp_round_pd (__m512d __A, const int __R)
8105 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8107 _mm512_undefined_pd (),
8108 (__mmask8) -1, __R);
8111 extern __inline __m512d
8112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8116 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8118 (__mmask8) __U, __R);
8121 extern __inline __m512d
8122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8123 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8125 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8127 _mm512_setzero_pd (),
8128 (__mmask8) __U, __R);
8131 extern __inline __m512d
8132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8133 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8134 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8136 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8138 _mm512_undefined_pd (),
8139 (__mmask8) -1, __R);
8142 extern __inline __m512d
8143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8144 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8145 _MM_MANTISSA_NORM_ENUM __B,
8146 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8148 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8154 extern __inline __m512d
8155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8156 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8157 _MM_MANTISSA_NORM_ENUM __B,
8158 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8160 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8163 _mm512_setzero_pd (),
8167 extern __inline __m512
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8170 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8172 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8174 _mm512_undefined_ps (),
8175 (__mmask16) -1, __R);
8178 extern __inline __m512
8179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8181 _MM_MANTISSA_NORM_ENUM __B,
8182 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8184 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8190 extern __inline __m512
8191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8193 _MM_MANTISSA_NORM_ENUM __B,
8194 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8196 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8199 _mm512_setzero_ps (),
8203 extern __inline __m128d
8204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8205 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8206 _MM_MANTISSA_NORM_ENUM __C,
8207 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8209 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8215 extern __inline __m128
8216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8217 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8218 _MM_MANTISSA_NORM_ENUM __C,
8219 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8221 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8228 #define _mm512_getmant_round_pd(X, B, C, R) \
8229 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8230 (int)(((C)<<2) | (B)), \
8231 (__v8df)(__m512d)_mm512_undefined_pd(), \
8235 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8236 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8237 (int)(((C)<<2) | (B)), \
8238 (__v8df)(__m512d)(W), \
8242 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8243 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8244 (int)(((C)<<2) | (B)), \
8245 (__v8df)(__m512d)_mm512_setzero_pd(), \
8248 #define _mm512_getmant_round_ps(X, B, C, R) \
8249 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8250 (int)(((C)<<2) | (B)), \
8251 (__v16sf)(__m512)_mm512_undefined_ps(), \
8255 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8256 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8257 (int)(((C)<<2) | (B)), \
8258 (__v16sf)(__m512)(W), \
8262 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8263 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8264 (int)(((C)<<2) | (B)), \
8265 (__v16sf)(__m512)_mm512_setzero_ps(), \
8268 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8269 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8270 (__v2df)(__m128d)(Y), \
8271 (int)(((D)<<2) | (C)), \
8274 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8275 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8276 (__v4sf)(__m128)(Y), \
8277 (int)(((D)<<2) | (C)), \
8280 #define _mm_getexp_round_ss(A, B, R) \
8281 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8283 #define _mm_getexp_round_sd(A, B, R) \
8284 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8286 #define _mm512_getexp_round_ps(A, R) \
8287 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8288 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8290 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8291 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8292 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8294 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8295 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8296 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8298 #define _mm512_getexp_round_pd(A, R) \
8299 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8300 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8302 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8303 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8304 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8306 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8307 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8308 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8312 extern __inline __m512
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8316 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8318 _mm512_undefined_ps (),
8322 extern __inline __m512
8323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8325 const int __imm, const int __R)
8327 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8329 (__mmask16) __B, __R);
8332 extern __inline __m512
8333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8335 const int __imm, const int __R)
8337 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8340 _mm512_setzero_ps (),
8341 (__mmask16) __A, __R);
8344 extern __inline __m512d
8345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8346 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8348 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8350 _mm512_undefined_pd (),
8354 extern __inline __m512d
8355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8357 __m512d __C, const int __imm, const int __R)
8359 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8361 (__mmask8) __B, __R);
8364 extern __inline __m512d
8365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8366 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8367 const int __imm, const int __R)
8369 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8372 _mm512_setzero_pd (),
8373 (__mmask8) __A, __R);
8376 extern __inline __m128
8377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8378 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8380 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8381 (__v4sf) __B, __imm, __R);
8384 extern __inline __m128d
8385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8389 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8390 (__v2df) __B, __imm, __R);
8394 #define _mm512_roundscale_round_ps(A, B, R) \
8395 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8396 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8397 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8398 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8400 (__v16sf)(__m512)(A), \
8402 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8403 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8405 (__v16sf)_mm512_setzero_ps(),\
8407 #define _mm512_roundscale_round_pd(A, B, R) \
8408 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8409 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8410 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8411 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8413 (__v8df)(__m512d)(A), \
8415 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8416 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8418 (__v8df)_mm512_setzero_pd(),\
8420 #define _mm_roundscale_round_ss(A, B, C, R) \
8421 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8422 (__v4sf)(__m128)(B), (int)(C), R))
8423 #define _mm_roundscale_round_sd(A, B, C, R) \
8424 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8425 (__v2df)(__m128d)(B), (int)(C), R))
8428 extern __inline __m512
8429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8430 _mm512_floor_ps (__m512 __A)
8432 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8435 _MM_FROUND_CUR_DIRECTION);
8438 extern __inline __m512d
8439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8440 _mm512_floor_pd (__m512d __A)
8442 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8445 _MM_FROUND_CUR_DIRECTION);
8448 extern __inline __m512
8449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8450 _mm512_ceil_ps (__m512 __A)
8452 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8455 _MM_FROUND_CUR_DIRECTION);
8458 extern __inline __m512d
8459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8460 _mm512_ceil_pd (__m512d __A)
8462 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8465 _MM_FROUND_CUR_DIRECTION);
8468 extern __inline __m512
8469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8470 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8472 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8475 _MM_FROUND_CUR_DIRECTION);
8478 extern __inline __m512d
8479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8480 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8482 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8485 _MM_FROUND_CUR_DIRECTION);
8488 extern __inline __m512
8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8492 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8495 _MM_FROUND_CUR_DIRECTION);
8498 extern __inline __m512d
8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8502 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8505 _MM_FROUND_CUR_DIRECTION);
8509 extern __inline __m512i
8510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8513 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8514 (__v16si) __B, __imm,
8516 _mm512_undefined_epi32 (),
8520 extern __inline __m512i
8521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8523 __m512i __B, const int __imm)
8525 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8526 (__v16si) __B, __imm,
8531 extern __inline __m512i
8532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8533 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8536 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8537 (__v16si) __B, __imm,
8539 _mm512_setzero_si512 (),
8543 extern __inline __m512i
8544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8545 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8547 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8548 (__v8di) __B, __imm,
8550 _mm512_undefined_epi32 (),
8554 extern __inline __m512i
8555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8556 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8557 __m512i __B, const int __imm)
8559 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8560 (__v8di) __B, __imm,
8565 extern __inline __m512i
8566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8567 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8570 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8571 (__v8di) __B, __imm,
8573 _mm512_setzero_si512 (),
8577 #define _mm512_alignr_epi32(X, Y, C) \
8578 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8579 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
8582 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8583 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8584 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8587 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8588 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8589 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8592 #define _mm512_alignr_epi64(X, Y, C) \
8593 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8594 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
8597 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8598 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8599 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8601 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8602 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8603 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8607 extern __inline __mmask16
8608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8609 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8611 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8616 extern __inline __mmask16
8617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8618 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8620 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8621 (__v16si) __B, __U);
8624 extern __inline __mmask8
8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8626 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8628 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8632 extern __inline __mmask8
8633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8634 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8636 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8641 extern __inline __mmask16
8642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8643 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8645 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8650 extern __inline __mmask16
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8654 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8655 (__v16si) __B, __U);
8658 extern __inline __mmask8
8659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8660 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8662 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8666 extern __inline __mmask8
8667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8668 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8670 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8675 extern __inline __mmask16
8676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8677 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8679 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8684 extern __inline __mmask16
8685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8686 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8688 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8693 extern __inline __mmask16
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8697 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8702 extern __inline __mmask16
8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8706 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8711 extern __inline __mmask8
8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8713 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8715 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8720 extern __inline __mmask8
8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8722 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8724 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8729 extern __inline __mmask8
8730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8731 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8733 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8738 extern __inline __mmask8
8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8742 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8747 extern __inline __mmask16
8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8751 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8756 extern __inline __mmask16
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8760 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8765 extern __inline __mmask16
8766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8769 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8774 extern __inline __mmask16
8775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8776 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8778 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8783 extern __inline __mmask8
8784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8785 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8787 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8792 extern __inline __mmask8
8793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8794 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8796 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8801 extern __inline __mmask8
8802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8803 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8805 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8810 extern __inline __mmask8
8811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8812 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8814 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8819 extern __inline __mmask16
8820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8821 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8823 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8828 extern __inline __mmask16
8829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8830 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8832 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8837 extern __inline __mmask16
8838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8839 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8841 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8846 extern __inline __mmask16
8847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8848 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8850 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8855 extern __inline __mmask8
8856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8859 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8864 extern __inline __mmask8
8865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8868 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8873 extern __inline __mmask8
8874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8875 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8877 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8882 extern __inline __mmask8
8883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8884 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8886 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8891 extern __inline __mmask16
8892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8893 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8895 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8900 extern __inline __mmask16
8901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8902 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8904 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8909 extern __inline __mmask16
8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8911 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8913 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8918 extern __inline __mmask16
8919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8920 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8922 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8927 extern __inline __mmask8
8928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8929 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8931 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8936 extern __inline __mmask8
8937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8938 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8940 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8945 extern __inline __mmask8
8946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8949 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8954 extern __inline __mmask8
8955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8956 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8958 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8963 #define _MM_CMPINT_EQ 0x0
8964 #define _MM_CMPINT_LT 0x1
8965 #define _MM_CMPINT_LE 0x2
8966 #define _MM_CMPINT_UNUSED 0x3
8967 #define _MM_CMPINT_NE 0x4
8968 #define _MM_CMPINT_NLT 0x5
8969 #define _MM_CMPINT_GE 0x5
8970 #define _MM_CMPINT_NLE 0x6
8971 #define _MM_CMPINT_GT 0x6
8974 extern __inline __mmask8
8975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8976 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8978 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8983 extern __inline __mmask16
8984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8985 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8987 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8992 extern __inline __mmask8
8993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8994 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8996 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9001 extern __inline __mmask16
9002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9003 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9005 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9010 extern __inline __mmask8
9011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9012 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9015 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9017 (__mmask8) -1, __R);
9020 extern __inline __mmask16
9021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9022 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9024 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9026 (__mmask16) -1, __R);
9029 extern __inline __mmask8
9030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9031 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9034 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9039 extern __inline __mmask16
9040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9041 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9044 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9049 extern __inline __mmask8
9050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9051 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9054 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9059 extern __inline __mmask16
9060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9061 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9064 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9069 extern __inline __mmask8
9070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9071 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9072 const int __P, const int __R)
9074 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9076 (__mmask8) __U, __R);
9079 extern __inline __mmask16
9080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9081 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9082 const int __P, const int __R)
9084 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9086 (__mmask16) __U, __R);
9089 extern __inline __mmask8
9090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9091 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9093 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9095 (__mmask8) -1, __R);
9098 extern __inline __mmask8
9099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9100 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9101 const int __P, const int __R)
9103 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9105 (__mmask8) __M, __R);
9108 extern __inline __mmask8
9109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9110 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9112 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9114 (__mmask8) -1, __R);
9117 extern __inline __mmask8
9118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9119 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9120 const int __P, const int __R)
9122 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9124 (__mmask8) __M, __R);
9128 #define _mm512_cmp_epi64_mask(X, Y, P) \
9129 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9130 (__v8di)(__m512i)(Y), (int)(P),\
9133 #define _mm512_cmp_epi32_mask(X, Y, P) \
9134 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9135 (__v16si)(__m512i)(Y), (int)(P), \
9138 #define _mm512_cmp_epu64_mask(X, Y, P) \
9139 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9140 (__v8di)(__m512i)(Y), (int)(P),\
9143 #define _mm512_cmp_epu32_mask(X, Y, P) \
9144 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9145 (__v16si)(__m512i)(Y), (int)(P), \
9148 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9149 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9150 (__v8df)(__m512d)(Y), (int)(P),\
9153 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9154 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9155 (__v16sf)(__m512)(Y), (int)(P),\
9158 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9159 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9160 (__v8di)(__m512i)(Y), (int)(P),\
9163 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9164 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9165 (__v16si)(__m512i)(Y), (int)(P), \
9168 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9169 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9170 (__v8di)(__m512i)(Y), (int)(P),\
9173 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9174 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9175 (__v16si)(__m512i)(Y), (int)(P), \
9178 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9179 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9180 (__v8df)(__m512d)(Y), (int)(P),\
9183 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9184 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9185 (__v16sf)(__m512)(Y), (int)(P),\
9188 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9189 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9190 (__v2df)(__m128d)(Y), (int)(P),\
9193 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9194 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9195 (__v2df)(__m128d)(Y), (int)(P),\
9198 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9199 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9200 (__v4sf)(__m128)(Y), (int)(P), \
9203 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9204 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9205 (__v4sf)(__m128)(Y), (int)(P), \
9210 extern __inline __m512
9211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9212 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9214 __m512 v1_old = _mm512_undefined_ps ();
9215 __mmask16 mask = 0xFFFF;
9217 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9223 extern __inline __m512
9224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9226 __m512i __index, float const *__addr, int __scale)
9228 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9234 extern __inline __m512d
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9238 __m512d v1_old = _mm512_undefined_pd ();
9239 __mmask8 mask = 0xFF;
9241 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9243 (__v8si) __index, mask,
9247 extern __inline __m512d
9248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9249 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9250 __m256i __index, double const *__addr, int __scale)
9252 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9258 extern __inline __m256
9259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9260 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9262 __m256 v1_old = _mm256_undefined_ps ();
9263 __mmask8 mask = 0xFF;
9265 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9267 (__v8di) __index, mask,
9271 extern __inline __m256
9272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9273 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9274 __m512i __index, float const *__addr, int __scale)
9276 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9282 extern __inline __m512d
9283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9284 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9286 __m512d v1_old = _mm512_undefined_pd ();
9287 __mmask8 mask = 0xFF;
9289 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9291 (__v8di) __index, mask,
9295 extern __inline __m512d
9296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9297 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9298 __m512i __index, double const *__addr, int __scale)
9300 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9306 extern __inline __m512i
9307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9308 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9310 __m512i v1_old = _mm512_undefined_epi32 ();
9311 __mmask16 mask = 0xFFFF;
9313 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9319 extern __inline __m512i
9320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9321 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9322 __m512i __index, int const *__addr, int __scale)
9324 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9330 extern __inline __m512i
9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9334 __m512i v1_old = _mm512_undefined_epi32 ();
9335 __mmask8 mask = 0xFF;
9337 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9339 (__v8si) __index, mask,
9343 extern __inline __m512i
9344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9345 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9346 __m256i __index, long long const *__addr,
9349 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9355 extern __inline __m256i
9356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9357 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9359 __m256i v1_old = _mm256_undefined_si256 ();
9360 __mmask8 mask = 0xFF;
9362 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9368 extern __inline __m256i
9369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9370 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9371 __m512i __index, int const *__addr, int __scale)
9373 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9379 extern __inline __m512i
9380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9381 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9383 __m512i v1_old = _mm512_undefined_epi32 ();
9384 __mmask8 mask = 0xFF;
9386 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9388 (__v8di) __index, mask,
9392 extern __inline __m512i
9393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9394 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9395 __m512i __index, long long const *__addr,
9398 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9404 extern __inline void
9405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9406 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9408 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9409 (__v16si) __index, (__v16sf) __v1, __scale);
9412 extern __inline void
9413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9414 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9415 __m512i __index, __m512 __v1, int __scale)
9417 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9418 (__v16sf) __v1, __scale);
9421 extern __inline void
9422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9423 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9426 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9427 (__v8si) __index, (__v8df) __v1, __scale);
9430 extern __inline void
9431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9432 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9433 __m256i __index, __m512d __v1, int __scale)
9435 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9436 (__v8df) __v1, __scale);
9439 extern __inline void
9440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9441 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9443 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9444 (__v8di) __index, (__v8sf) __v1, __scale);
9447 extern __inline void
9448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9449 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9450 __m512i __index, __m256 __v1, int __scale)
9452 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9453 (__v8sf) __v1, __scale);
9456 extern __inline void
9457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9458 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9461 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9462 (__v8di) __index, (__v8df) __v1, __scale);
9465 extern __inline void
9466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9467 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9468 __m512i __index, __m512d __v1, int __scale)
9470 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9471 (__v8df) __v1, __scale);
9474 extern __inline void
9475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9476 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9477 __m512i __v1, int __scale)
9479 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9480 (__v16si) __index, (__v16si) __v1, __scale);
9483 extern __inline void
9484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9485 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9486 __m512i __index, __m512i __v1, int __scale)
9488 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9489 (__v16si) __v1, __scale);
9492 extern __inline void
9493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9494 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9495 __m512i __v1, int __scale)
9497 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9498 (__v8si) __index, (__v8di) __v1, __scale);
9501 extern __inline void
9502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9503 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9504 __m256i __index, __m512i __v1, int __scale)
9506 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9507 (__v8di) __v1, __scale);
9510 extern __inline void
9511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9513 __m256i __v1, int __scale)
9515 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9516 (__v8di) __index, (__v8si) __v1, __scale);
9519 extern __inline void
9520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9521 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9522 __m512i __index, __m256i __v1, int __scale)
9524 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9525 (__v8si) __v1, __scale);
9528 extern __inline void
9529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9530 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9531 __m512i __v1, int __scale)
9533 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9534 (__v8di) __index, (__v8di) __v1, __scale);
9537 extern __inline void
9538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9539 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9540 __m512i __index, __m512i __v1, int __scale)
9542 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9543 (__v8di) __v1, __scale);
9546 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9547 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9548 (float const *)ADDR, \
9549 (__v16si)(__m512i)INDEX, \
9550 (__mmask16)0xFFFF, (int)SCALE)
9552 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9553 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9554 (float const *)ADDR, \
9555 (__v16si)(__m512i)INDEX, \
9556 (__mmask16)MASK, (int)SCALE)
9558 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9559 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9560 (double const *)ADDR, \
9561 (__v8si)(__m256i)INDEX, \
9562 (__mmask8)0xFF, (int)SCALE)
9564 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9565 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9566 (double const *)ADDR, \
9567 (__v8si)(__m256i)INDEX, \
9568 (__mmask8)MASK, (int)SCALE)
9570 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9571 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9572 (float const *)ADDR, \
9573 (__v8di)(__m512i)INDEX, \
9574 (__mmask8)0xFF, (int)SCALE)
9576 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9577 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9578 (float const *)ADDR, \
9579 (__v8di)(__m512i)INDEX, \
9580 (__mmask8)MASK, (int)SCALE)
9582 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9583 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9584 (double const *)ADDR, \
9585 (__v8di)(__m512i)INDEX, \
9586 (__mmask8)0xFF, (int)SCALE)
9588 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9589 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9590 (double const *)ADDR, \
9591 (__v8di)(__m512i)INDEX, \
9592 (__mmask8)MASK, (int)SCALE)
9594 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9595 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \
9596 (int const *)ADDR, \
9597 (__v16si)(__m512i)INDEX, \
9598 (__mmask16)0xFFFF, (int)SCALE)
9600 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9601 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9602 (int const *)ADDR, \
9603 (__v16si)(__m512i)INDEX, \
9604 (__mmask16)MASK, (int)SCALE)
9606 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9607 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \
9608 (long long const *)ADDR, \
9609 (__v8si)(__m256i)INDEX, \
9610 (__mmask8)0xFF, (int)SCALE)
9612 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9613 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9614 (long long const *)ADDR, \
9615 (__v8si)(__m256i)INDEX, \
9616 (__mmask8)MASK, (int)SCALE)
9618 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9619 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9620 (int const *)ADDR, \
9621 (__v8di)(__m512i)INDEX, \
9622 (__mmask8)0xFF, (int)SCALE)
9624 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9625 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9626 (int const *)ADDR, \
9627 (__v8di)(__m512i)INDEX, \
9628 (__mmask8)MASK, (int)SCALE)
9630 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9631 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \
9632 (long long const *)ADDR, \
9633 (__v8di)(__m512i)INDEX, \
9634 (__mmask8)0xFF, (int)SCALE)
9636 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9637 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9638 (long long const *)ADDR, \
9639 (__v8di)(__m512i)INDEX, \
9640 (__mmask8)MASK, (int)SCALE)
9642 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9643 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9644 (__v16si)(__m512i)INDEX, \
9645 (__v16sf)(__m512)V1, (int)SCALE)
9647 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9648 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9649 (__v16si)(__m512i)INDEX, \
9650 (__v16sf)(__m512)V1, (int)SCALE)
9652 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9653 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9654 (__v8si)(__m256i)INDEX, \
9655 (__v8df)(__m512d)V1, (int)SCALE)
9657 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9658 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9659 (__v8si)(__m256i)INDEX, \
9660 (__v8df)(__m512d)V1, (int)SCALE)
9662 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9663 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9664 (__v8di)(__m512i)INDEX, \
9665 (__v8sf)(__m256)V1, (int)SCALE)
9667 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9668 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9669 (__v8di)(__m512i)INDEX, \
9670 (__v8sf)(__m256)V1, (int)SCALE)
9672 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9673 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9674 (__v8di)(__m512i)INDEX, \
9675 (__v8df)(__m512d)V1, (int)SCALE)
9677 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9678 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9679 (__v8di)(__m512i)INDEX, \
9680 (__v8df)(__m512d)V1, (int)SCALE)
9682 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9683 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9684 (__v16si)(__m512i)INDEX, \
9685 (__v16si)(__m512i)V1, (int)SCALE)
9687 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9688 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9689 (__v16si)(__m512i)INDEX, \
9690 (__v16si)(__m512i)V1, (int)SCALE)
9692 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9693 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9694 (__v8si)(__m256i)INDEX, \
9695 (__v8di)(__m512i)V1, (int)SCALE)
9697 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9698 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9699 (__v8si)(__m256i)INDEX, \
9700 (__v8di)(__m512i)V1, (int)SCALE)
9702 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9703 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9704 (__v8di)(__m512i)INDEX, \
9705 (__v8si)(__m256i)V1, (int)SCALE)
9707 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9708 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9709 (__v8di)(__m512i)INDEX, \
9710 (__v8si)(__m256i)V1, (int)SCALE)
9712 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9713 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9714 (__v8di)(__m512i)INDEX, \
9715 (__v8di)(__m512i)V1, (int)SCALE)
9717 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9718 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9719 (__v8di)(__m512i)INDEX, \
9720 (__v8di)(__m512i)V1, (int)SCALE)
9723 extern __inline __m512d
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9727 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9732 extern __inline __m512d
9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9736 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9738 _mm512_setzero_pd (),
9742 extern __inline void
9743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9744 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9746 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9750 extern __inline __m512
9751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9752 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9754 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9759 extern __inline __m512
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9763 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9765 _mm512_setzero_ps (),
9769 extern __inline void
9770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9771 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9773 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9777 extern __inline __m512i
9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9781 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9786 extern __inline __m512i
9787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9788 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9790 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9792 _mm512_setzero_si512 (),
9796 extern __inline void
9797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9798 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9800 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9804 extern __inline __m512i
9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9806 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9808 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9813 extern __inline __m512i
9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9817 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9819 _mm512_setzero_si512 (),
9823 extern __inline void
9824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9825 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9827 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9831 extern __inline __m512d
9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9835 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9840 extern __inline __m512d
9841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9842 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9844 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9846 _mm512_setzero_pd (),
9850 extern __inline __m512d
9851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9852 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9854 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9859 extern __inline __m512d
9860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9861 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9863 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9865 _mm512_setzero_pd (),
9869 extern __inline __m512
9870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9871 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9873 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9878 extern __inline __m512
9879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9880 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9882 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9884 _mm512_setzero_ps (),
9888 extern __inline __m512
9889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9890 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9892 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9897 extern __inline __m512
9898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9899 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9901 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9903 _mm512_setzero_ps (),
9907 extern __inline __m512i
9908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9909 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9911 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9916 extern __inline __m512i
9917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9920 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9922 _mm512_setzero_si512 (),
9926 extern __inline __m512i
9927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9928 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9930 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9935 extern __inline __m512i
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9940 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9942 _mm512_setzero_si512 (),
9946 extern __inline __m512i
9947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9948 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9950 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9955 extern __inline __m512i
9956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9957 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9959 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9961 _mm512_setzero_si512 (),
9965 extern __inline __m512i
9966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9967 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9969 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9974 extern __inline __m512i
9975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9976 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9978 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9980 _mm512_setzero_si512
9981 (), (__mmask16) __U);
9984 /* Mask arithmetic operations */
9985 extern __inline __mmask16
9986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9987 _mm512_kand (__mmask16 __A, __mmask16 __B)
9989 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9992 extern __inline __mmask16
9993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9994 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9996 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9999 extern __inline __mmask16
10000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10001 _mm512_kor (__mmask16 __A, __mmask16 __B)
10003 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10006 extern __inline int
10007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10008 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10010 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10014 extern __inline int
10015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10016 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10018 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10022 extern __inline __mmask16
10023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10024 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10026 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10029 extern __inline __mmask16
10030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10031 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10033 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10036 extern __inline __mmask16
10037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10038 _mm512_knot (__mmask16 __A)
10040 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10043 extern __inline __mmask16
10044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10045 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10047 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10050 #ifdef __OPTIMIZE__
10051 extern __inline __m512i
10052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10056 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10060 _mm512_setzero_si512 (),
10064 extern __inline __m512
10065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10066 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10069 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10073 _mm512_setzero_ps (), __B);
10076 extern __inline __m512i
10077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10078 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10079 __m128i __D, const int __imm)
10081 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10088 extern __inline __m512
10089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10090 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10091 __m128 __D, const int __imm)
10093 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10096 (__v16sf) __A, __B);
10099 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10100 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10101 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10104 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10105 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10106 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10109 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10110 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10111 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10114 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10115 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10116 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10120 extern __inline __m512i
10121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10122 _mm512_max_epi64 (__m512i __A, __m512i __B)
10124 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10127 _mm512_undefined_epi32 (),
10131 extern __inline __m512i
10132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10133 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10135 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10138 _mm512_setzero_si512 (),
10142 extern __inline __m512i
10143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10144 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10146 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10148 (__v8di) __W, __M);
10151 extern __inline __m512i
10152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10153 _mm512_min_epi64 (__m512i __A, __m512i __B)
10155 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10158 _mm512_undefined_epi32 (),
10162 extern __inline __m512i
10163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10164 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10166 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10168 (__v8di) __W, __M);
10171 extern __inline __m512i
10172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10173 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10175 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10178 _mm512_setzero_si512 (),
10182 extern __inline __m512i
10183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10184 _mm512_max_epu64 (__m512i __A, __m512i __B)
10186 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10189 _mm512_undefined_epi32 (),
10193 extern __inline __m512i
10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10197 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10200 _mm512_setzero_si512 (),
10204 extern __inline __m512i
10205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10206 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10208 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10210 (__v8di) __W, __M);
10213 extern __inline __m512i
10214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10215 _mm512_min_epu64 (__m512i __A, __m512i __B)
10217 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10220 _mm512_undefined_epi32 (),
10224 extern __inline __m512i
10225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10226 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10228 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10230 (__v8di) __W, __M);
10233 extern __inline __m512i
10234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10235 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10237 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10240 _mm512_setzero_si512 (),
10244 extern __inline __m512i
10245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10246 _mm512_max_epi32 (__m512i __A, __m512i __B)
10248 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10251 _mm512_undefined_epi32 (),
10255 extern __inline __m512i
10256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10257 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10259 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10262 _mm512_setzero_si512 (),
10266 extern __inline __m512i
10267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10268 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10270 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10272 (__v16si) __W, __M);
10275 extern __inline __m512i
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm512_min_epi32 (__m512i __A, __m512i __B)
10279 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10282 _mm512_undefined_epi32 (),
10286 extern __inline __m512i
10287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10288 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10290 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10293 _mm512_setzero_si512 (),
10297 extern __inline __m512i
10298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10299 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10301 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10303 (__v16si) __W, __M);
10306 extern __inline __m512i
10307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10308 _mm512_max_epu32 (__m512i __A, __m512i __B)
10310 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10313 _mm512_undefined_epi32 (),
10317 extern __inline __m512i
10318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10319 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10321 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10324 _mm512_setzero_si512 (),
10328 extern __inline __m512i
10329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10330 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10332 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10334 (__v16si) __W, __M);
10337 extern __inline __m512i
10338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10339 _mm512_min_epu32 (__m512i __A, __m512i __B)
10341 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10344 _mm512_undefined_epi32 (),
10348 extern __inline __m512i
10349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10350 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10352 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10355 _mm512_setzero_si512 (),
10359 extern __inline __m512i
10360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10363 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10365 (__v16si) __W, __M);
10368 extern __inline __m512
10369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10370 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10372 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10375 _mm512_undefined_ps (),
10379 extern __inline __m512
10380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10381 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10383 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10389 extern __inline __m512
10390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10391 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10393 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10396 _mm512_setzero_ps (),
10400 #ifdef __OPTIMIZE__
10401 extern __inline __m128d
10402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10403 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10405 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10410 extern __inline __m128
10411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10412 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10414 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10419 extern __inline __m128d
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10423 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10428 extern __inline __m128
10429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10430 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10432 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10438 #define _mm_max_round_sd(A, B, C) \
10439 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10441 #define _mm_max_round_ss(A, B, C) \
10442 (__m128)__builtin_ia32_addss_round(A, B, C)
10444 #define _mm_min_round_sd(A, B, C) \
10445 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10447 #define _mm_min_round_ss(A, B, C) \
10448 (__m128)__builtin_ia32_subss_round(A, B, C)
10451 extern __inline __m512d
10452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10453 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10455 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10460 extern __inline __m512
10461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10462 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10464 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10469 extern __inline __m512i
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10473 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10478 extern __inline __m512i
10479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10480 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10482 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10487 #ifdef __OPTIMIZE__
10488 extern __inline __m128d
10489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10492 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10498 extern __inline __m128
10499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10502 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10508 extern __inline __m128d
10509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10510 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10512 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10518 extern __inline __m128
10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10522 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10528 extern __inline __m128d
10529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10530 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10532 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10538 extern __inline __m128
10539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10542 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10548 extern __inline __m128d
10549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10552 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10558 extern __inline __m128
10559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10560 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10562 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10568 #define _mm_fmadd_round_sd(A, B, C, R) \
10569 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10571 #define _mm_fmadd_round_ss(A, B, C, R) \
10572 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10574 #define _mm_fmsub_round_sd(A, B, C, R) \
10575 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10577 #define _mm_fmsub_round_ss(A, B, C, R) \
10578 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10580 #define _mm_fnmadd_round_sd(A, B, C, R) \
10581 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10583 #define _mm_fnmadd_round_ss(A, B, C, R) \
10584 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10586 #define _mm_fnmsub_round_sd(A, B, C, R) \
10587 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10589 #define _mm_fnmsub_round_ss(A, B, C, R) \
10590 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10593 #ifdef __OPTIMIZE__
10594 extern __inline int
10595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10596 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10598 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10601 extern __inline int
10602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10603 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10605 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10608 #define _mm_comi_round_ss(A, B, C, D)\
10609 __builtin_ia32_vcomiss(A, B, C, D)
10610 #define _mm_comi_round_sd(A, B, C, D)\
10611 __builtin_ia32_vcomisd(A, B, C, D)
10614 extern __inline __m512d
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm512_sqrt_pd (__m512d __A)
10618 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10620 _mm512_undefined_pd (),
10622 _MM_FROUND_CUR_DIRECTION);
10625 extern __inline __m512d
10626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10627 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10629 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10632 _MM_FROUND_CUR_DIRECTION);
10635 extern __inline __m512d
10636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10637 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10639 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10641 _mm512_setzero_pd (),
10643 _MM_FROUND_CUR_DIRECTION);
10646 extern __inline __m512
10647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10648 _mm512_sqrt_ps (__m512 __A)
10650 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10652 _mm512_undefined_ps (),
10654 _MM_FROUND_CUR_DIRECTION);
10657 extern __inline __m512
10658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10659 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10661 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10664 _MM_FROUND_CUR_DIRECTION);
10667 extern __inline __m512
10668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10669 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10671 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10673 _mm512_setzero_ps (),
10675 _MM_FROUND_CUR_DIRECTION);
10678 extern __inline __m512d
10679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10680 _mm512_add_pd (__m512d __A, __m512d __B)
10682 return (__m512d) ((__v8df)__A + (__v8df)__B);
10685 extern __inline __m512d
10686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10687 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10689 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10693 _MM_FROUND_CUR_DIRECTION);
10696 extern __inline __m512d
10697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10698 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10700 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10703 _mm512_setzero_pd (),
10705 _MM_FROUND_CUR_DIRECTION);
10708 extern __inline __m512
10709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10710 _mm512_add_ps (__m512 __A, __m512 __B)
10712 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10715 extern __inline __m512
10716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10717 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10719 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10723 _MM_FROUND_CUR_DIRECTION);
10726 extern __inline __m512
10727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10728 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10730 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10733 _mm512_setzero_ps (),
10735 _MM_FROUND_CUR_DIRECTION);
10738 extern __inline __m512d
10739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10740 _mm512_sub_pd (__m512d __A, __m512d __B)
10742 return (__m512d) ((__v8df)__A - (__v8df)__B);
10745 extern __inline __m512d
10746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10747 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10749 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10753 _MM_FROUND_CUR_DIRECTION);
10756 extern __inline __m512d
10757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10758 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10760 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10763 _mm512_setzero_pd (),
10765 _MM_FROUND_CUR_DIRECTION);
10768 extern __inline __m512
10769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10770 _mm512_sub_ps (__m512 __A, __m512 __B)
10772 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10775 extern __inline __m512
10776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10779 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10783 _MM_FROUND_CUR_DIRECTION);
10786 extern __inline __m512
10787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10788 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10790 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10793 _mm512_setzero_ps (),
10795 _MM_FROUND_CUR_DIRECTION);
10798 extern __inline __m512d
10799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10800 _mm512_mul_pd (__m512d __A, __m512d __B)
10802 return (__m512d) ((__v8df)__A * (__v8df)__B);
10805 extern __inline __m512d
10806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10807 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10809 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10813 _MM_FROUND_CUR_DIRECTION);
10816 extern __inline __m512d
10817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10820 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10823 _mm512_setzero_pd (),
10825 _MM_FROUND_CUR_DIRECTION);
10828 extern __inline __m512
10829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10830 _mm512_mul_ps (__m512 __A, __m512 __B)
10832 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10835 extern __inline __m512
10836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10837 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10839 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10843 _MM_FROUND_CUR_DIRECTION);
10846 extern __inline __m512
10847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10848 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10850 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10853 _mm512_setzero_ps (),
10855 _MM_FROUND_CUR_DIRECTION);
10858 extern __inline __m512d
10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10860 _mm512_div_pd (__m512d __M, __m512d __V)
10862 return (__m512d) ((__v8df)__M / (__v8df)__V);
10865 extern __inline __m512d
10866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10867 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10869 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10873 _MM_FROUND_CUR_DIRECTION);
10876 extern __inline __m512d
10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10878 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10880 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10883 _mm512_setzero_pd (),
10885 _MM_FROUND_CUR_DIRECTION);
10888 extern __inline __m512
10889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10890 _mm512_div_ps (__m512 __A, __m512 __B)
10892 return (__m512) ((__v16sf)__A / (__v16sf)__B);
10895 extern __inline __m512
10896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10897 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10899 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10903 _MM_FROUND_CUR_DIRECTION);
10906 extern __inline __m512
10907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10910 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10913 _mm512_setzero_ps (),
10915 _MM_FROUND_CUR_DIRECTION);
10918 extern __inline __m512d
10919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10920 _mm512_max_pd (__m512d __A, __m512d __B)
10922 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10925 _mm512_undefined_pd (),
10927 _MM_FROUND_CUR_DIRECTION);
10930 extern __inline __m512d
10931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10932 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10934 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10938 _MM_FROUND_CUR_DIRECTION);
10941 extern __inline __m512d
10942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10945 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10948 _mm512_setzero_pd (),
10950 _MM_FROUND_CUR_DIRECTION);
10953 extern __inline __m512
10954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10955 _mm512_max_ps (__m512 __A, __m512 __B)
10957 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10960 _mm512_undefined_ps (),
10962 _MM_FROUND_CUR_DIRECTION);
10965 extern __inline __m512
10966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10967 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10969 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10973 _MM_FROUND_CUR_DIRECTION);
10976 extern __inline __m512
10977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10978 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10980 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10983 _mm512_setzero_ps (),
10985 _MM_FROUND_CUR_DIRECTION);
10988 extern __inline __m512d
10989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10990 _mm512_min_pd (__m512d __A, __m512d __B)
10992 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10995 _mm512_undefined_pd (),
10997 _MM_FROUND_CUR_DIRECTION);
11000 extern __inline __m512d
11001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11002 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11004 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11008 _MM_FROUND_CUR_DIRECTION);
11011 extern __inline __m512d
11012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11015 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11018 _mm512_setzero_pd (),
11020 _MM_FROUND_CUR_DIRECTION);
11023 extern __inline __m512
11024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11025 _mm512_min_ps (__m512 __A, __m512 __B)
11027 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11030 _mm512_undefined_ps (),
11032 _MM_FROUND_CUR_DIRECTION);
11035 extern __inline __m512
11036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11037 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11039 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11043 _MM_FROUND_CUR_DIRECTION);
11046 extern __inline __m512
11047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11048 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11050 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11053 _mm512_setzero_ps (),
11055 _MM_FROUND_CUR_DIRECTION);
11058 extern __inline __m512d
11059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11060 _mm512_scalef_pd (__m512d __A, __m512d __B)
11062 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11065 _mm512_undefined_pd (),
11067 _MM_FROUND_CUR_DIRECTION);
11070 extern __inline __m512d
11071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11072 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11074 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11078 _MM_FROUND_CUR_DIRECTION);
11081 extern __inline __m512d
11082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11083 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11085 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11088 _mm512_setzero_pd (),
11090 _MM_FROUND_CUR_DIRECTION);
11093 extern __inline __m512
11094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11095 _mm512_scalef_ps (__m512 __A, __m512 __B)
11097 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11100 _mm512_undefined_ps (),
11102 _MM_FROUND_CUR_DIRECTION);
11105 extern __inline __m512
11106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11107 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11109 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11113 _MM_FROUND_CUR_DIRECTION);
11116 extern __inline __m512
11117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11118 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11120 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11123 _mm512_setzero_ps (),
11125 _MM_FROUND_CUR_DIRECTION);
11128 extern __inline __m128d
11129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11130 _mm_scalef_sd (__m128d __A, __m128d __B)
11132 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11134 _MM_FROUND_CUR_DIRECTION);
11137 extern __inline __m128
11138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139 _mm_scalef_ss (__m128 __A, __m128 __B)
11141 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11143 _MM_FROUND_CUR_DIRECTION);
11146 extern __inline __m512d
11147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11150 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11154 _MM_FROUND_CUR_DIRECTION);
11157 extern __inline __m512d
11158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11161 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11165 _MM_FROUND_CUR_DIRECTION);
11168 extern __inline __m512d
11169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11172 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11176 _MM_FROUND_CUR_DIRECTION);
11179 extern __inline __m512d
11180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11183 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11187 _MM_FROUND_CUR_DIRECTION);
11190 extern __inline __m512
11191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11192 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11194 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11198 _MM_FROUND_CUR_DIRECTION);
11201 extern __inline __m512
11202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11203 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11205 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11209 _MM_FROUND_CUR_DIRECTION);
11212 extern __inline __m512
11213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11214 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11216 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11220 _MM_FROUND_CUR_DIRECTION);
11223 extern __inline __m512
11224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11225 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11227 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11231 _MM_FROUND_CUR_DIRECTION);
11234 extern __inline __m512d
11235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11236 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11238 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11242 _MM_FROUND_CUR_DIRECTION);
11245 extern __inline __m512d
11246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11247 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11249 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11253 _MM_FROUND_CUR_DIRECTION);
11256 extern __inline __m512d
11257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11260 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11264 _MM_FROUND_CUR_DIRECTION);
11267 extern __inline __m512d
11268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11271 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11275 _MM_FROUND_CUR_DIRECTION);
11278 extern __inline __m512
11279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11282 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11286 _MM_FROUND_CUR_DIRECTION);
11289 extern __inline __m512
11290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11293 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11297 _MM_FROUND_CUR_DIRECTION);
11300 extern __inline __m512
11301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11302 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11304 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11308 _MM_FROUND_CUR_DIRECTION);
11311 extern __inline __m512
11312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11313 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11315 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11319 _MM_FROUND_CUR_DIRECTION);
11322 extern __inline __m512d
11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11326 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11330 _MM_FROUND_CUR_DIRECTION);
11333 extern __inline __m512d
11334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11335 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11337 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11341 _MM_FROUND_CUR_DIRECTION);
11344 extern __inline __m512d
11345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11346 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11348 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11352 _MM_FROUND_CUR_DIRECTION);
11355 extern __inline __m512d
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11359 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11363 _MM_FROUND_CUR_DIRECTION);
11366 extern __inline __m512
11367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11370 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11374 _MM_FROUND_CUR_DIRECTION);
11377 extern __inline __m512
11378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11379 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11381 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11385 _MM_FROUND_CUR_DIRECTION);
11388 extern __inline __m512
11389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11392 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11396 _MM_FROUND_CUR_DIRECTION);
11399 extern __inline __m512
11400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11401 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11403 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11407 _MM_FROUND_CUR_DIRECTION);
11410 extern __inline __m512d
11411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11412 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11414 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11418 _MM_FROUND_CUR_DIRECTION);
11421 extern __inline __m512d
11422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11423 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11425 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11429 _MM_FROUND_CUR_DIRECTION);
11432 extern __inline __m512d
11433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11434 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11436 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11440 _MM_FROUND_CUR_DIRECTION);
11443 extern __inline __m512d
11444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11445 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11447 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11451 _MM_FROUND_CUR_DIRECTION);
11454 extern __inline __m512
11455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11456 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11458 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11462 _MM_FROUND_CUR_DIRECTION);
11465 extern __inline __m512
11466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11469 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11473 _MM_FROUND_CUR_DIRECTION);
11476 extern __inline __m512
11477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11478 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11480 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11484 _MM_FROUND_CUR_DIRECTION);
11487 extern __inline __m512
11488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11489 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11491 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11495 _MM_FROUND_CUR_DIRECTION);
11498 extern __inline __m512d
11499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11500 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11502 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11506 _MM_FROUND_CUR_DIRECTION);
11509 extern __inline __m512d
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11513 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11517 _MM_FROUND_CUR_DIRECTION);
11520 extern __inline __m512d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11524 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11528 _MM_FROUND_CUR_DIRECTION);
11531 extern __inline __m512d
11532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11533 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11535 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11539 _MM_FROUND_CUR_DIRECTION);
11542 extern __inline __m512
11543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11544 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11546 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11550 _MM_FROUND_CUR_DIRECTION);
11553 extern __inline __m512
11554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11555 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11557 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11561 _MM_FROUND_CUR_DIRECTION);
11564 extern __inline __m512
11565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11566 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11568 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11572 _MM_FROUND_CUR_DIRECTION);
11575 extern __inline __m512
11576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11577 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11579 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11583 _MM_FROUND_CUR_DIRECTION);
11586 extern __inline __m512d
11587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11588 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11590 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11594 _MM_FROUND_CUR_DIRECTION);
11597 extern __inline __m512d
11598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11599 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11601 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11605 _MM_FROUND_CUR_DIRECTION);
11608 extern __inline __m512d
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11612 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11616 _MM_FROUND_CUR_DIRECTION);
11619 extern __inline __m512d
11620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11621 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11623 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11627 _MM_FROUND_CUR_DIRECTION);
11630 extern __inline __m512
11631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11634 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11638 _MM_FROUND_CUR_DIRECTION);
11641 extern __inline __m512
11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11645 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11649 _MM_FROUND_CUR_DIRECTION);
11652 extern __inline __m512
11653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11654 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11656 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11660 _MM_FROUND_CUR_DIRECTION);
11663 extern __inline __m512
11664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11665 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11667 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11671 _MM_FROUND_CUR_DIRECTION);
11674 extern __inline __m256i
11675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11676 _mm512_cvttpd_epi32 (__m512d __A)
11678 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11680 _mm256_undefined_si256 (),
11682 _MM_FROUND_CUR_DIRECTION);
11685 extern __inline __m256i
11686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11687 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11689 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11692 _MM_FROUND_CUR_DIRECTION);
11695 extern __inline __m256i
11696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11697 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11699 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11701 _mm256_setzero_si256 (),
11703 _MM_FROUND_CUR_DIRECTION);
11706 extern __inline __m256i
11707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708 _mm512_cvttpd_epu32 (__m512d __A)
11710 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11712 _mm256_undefined_si256 (),
11714 _MM_FROUND_CUR_DIRECTION);
11717 extern __inline __m256i
11718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11719 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11721 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11724 _MM_FROUND_CUR_DIRECTION);
11727 extern __inline __m256i
11728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11729 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11731 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11733 _mm256_setzero_si256 (),
11735 _MM_FROUND_CUR_DIRECTION);
11738 extern __inline __m256i
11739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11740 _mm512_cvtpd_epi32 (__m512d __A)
11742 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11744 _mm256_undefined_si256 (),
11746 _MM_FROUND_CUR_DIRECTION);
11749 extern __inline __m256i
11750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11751 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11753 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11756 _MM_FROUND_CUR_DIRECTION);
11759 extern __inline __m256i
11760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11761 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11763 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11765 _mm256_setzero_si256 (),
11767 _MM_FROUND_CUR_DIRECTION);
11770 extern __inline __m256i
11771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11772 _mm512_cvtpd_epu32 (__m512d __A)
11774 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11776 _mm256_undefined_si256 (),
11778 _MM_FROUND_CUR_DIRECTION);
11781 extern __inline __m256i
11782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11783 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11785 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11788 _MM_FROUND_CUR_DIRECTION);
11791 extern __inline __m256i
11792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11795 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11797 _mm256_setzero_si256 (),
11799 _MM_FROUND_CUR_DIRECTION);
11802 extern __inline __m512i
11803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11804 _mm512_cvttps_epi32 (__m512 __A)
11806 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11808 _mm512_undefined_epi32 (),
11810 _MM_FROUND_CUR_DIRECTION);
11813 extern __inline __m512i
11814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11815 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11817 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11820 _MM_FROUND_CUR_DIRECTION);
11823 extern __inline __m512i
11824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11825 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11827 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11829 _mm512_setzero_si512 (),
11831 _MM_FROUND_CUR_DIRECTION);
11834 extern __inline __m512i
11835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11836 _mm512_cvttps_epu32 (__m512 __A)
11838 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11840 _mm512_undefined_epi32 (),
11842 _MM_FROUND_CUR_DIRECTION);
11845 extern __inline __m512i
11846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11847 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11849 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11852 _MM_FROUND_CUR_DIRECTION);
11855 extern __inline __m512i
11856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11857 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11859 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11861 _mm512_setzero_si512 (),
11863 _MM_FROUND_CUR_DIRECTION);
11866 extern __inline __m512i
11867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868 _mm512_cvtps_epi32 (__m512 __A)
11870 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11872 _mm512_undefined_epi32 (),
11874 _MM_FROUND_CUR_DIRECTION);
11877 extern __inline __m512i
11878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11879 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11881 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11884 _MM_FROUND_CUR_DIRECTION);
11887 extern __inline __m512i
11888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11889 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11891 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11893 _mm512_setzero_si512 (),
11895 _MM_FROUND_CUR_DIRECTION);
11898 extern __inline __m512i
11899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11900 _mm512_cvtps_epu32 (__m512 __A)
11902 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11904 _mm512_undefined_epi32 (),
11906 _MM_FROUND_CUR_DIRECTION);
11909 extern __inline __m512i
11910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11911 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11913 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11916 _MM_FROUND_CUR_DIRECTION);
11919 extern __inline __m512i
11920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11921 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11923 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11925 _mm512_setzero_si512 (),
11927 _MM_FROUND_CUR_DIRECTION);
11931 extern __inline __m128
11932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11933 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11935 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11936 _MM_FROUND_CUR_DIRECTION);
11939 extern __inline __m128d
11940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11941 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11943 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11944 _MM_FROUND_CUR_DIRECTION);
11948 extern __inline __m128
11949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11950 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11952 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11953 _MM_FROUND_CUR_DIRECTION);
11956 extern __inline __m512
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958 _mm512_cvtepi32_ps (__m512i __A)
11960 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11962 _mm512_undefined_ps (),
11964 _MM_FROUND_CUR_DIRECTION);
11967 extern __inline __m512
11968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11969 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11971 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11974 _MM_FROUND_CUR_DIRECTION);
11977 extern __inline __m512
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11981 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11983 _mm512_setzero_ps (),
11985 _MM_FROUND_CUR_DIRECTION);
11988 extern __inline __m512
11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990 _mm512_cvtepu32_ps (__m512i __A)
11992 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11994 _mm512_undefined_ps (),
11996 _MM_FROUND_CUR_DIRECTION);
11999 extern __inline __m512
12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12001 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12003 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12006 _MM_FROUND_CUR_DIRECTION);
12009 extern __inline __m512
12010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12011 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12013 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12015 _mm512_setzero_ps (),
12017 _MM_FROUND_CUR_DIRECTION);
12020 #ifdef __OPTIMIZE__
12021 extern __inline __m512d
12022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12023 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12025 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12030 _MM_FROUND_CUR_DIRECTION);
12033 extern __inline __m512d
12034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12035 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12036 __m512i __C, const int __imm)
12038 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12043 _MM_FROUND_CUR_DIRECTION);
12046 extern __inline __m512d
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12049 __m512i __C, const int __imm)
12051 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12056 _MM_FROUND_CUR_DIRECTION);
12059 extern __inline __m512
12060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12061 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12063 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12068 _MM_FROUND_CUR_DIRECTION);
12071 extern __inline __m512
12072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12074 __m512i __C, const int __imm)
12076 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12081 _MM_FROUND_CUR_DIRECTION);
12084 extern __inline __m512
12085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12086 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12087 __m512i __C, const int __imm)
12089 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12094 _MM_FROUND_CUR_DIRECTION);
12097 extern __inline __m128d
12098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12099 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12101 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12103 (__v2di) __C, __imm,
12105 _MM_FROUND_CUR_DIRECTION);
12108 extern __inline __m128d
12109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12110 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12111 __m128i __C, const int __imm)
12113 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12115 (__v2di) __C, __imm,
12117 _MM_FROUND_CUR_DIRECTION);
12120 extern __inline __m128d
12121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12122 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12123 __m128i __C, const int __imm)
12125 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12130 _MM_FROUND_CUR_DIRECTION);
12133 extern __inline __m128
12134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12135 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12137 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12139 (__v4si) __C, __imm,
12141 _MM_FROUND_CUR_DIRECTION);
12144 extern __inline __m128
12145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12146 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12147 __m128i __C, const int __imm)
12149 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12151 (__v4si) __C, __imm,
12153 _MM_FROUND_CUR_DIRECTION);
12156 extern __inline __m128
12157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12158 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12159 __m128i __C, const int __imm)
12161 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12163 (__v4si) __C, __imm,
12165 _MM_FROUND_CUR_DIRECTION);
12168 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12169 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12170 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12171 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12173 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12174 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12175 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12176 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12178 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12179 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12180 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12181 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12183 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12184 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12185 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12186 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12188 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12189 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12190 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12191 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12193 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12194 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12195 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12196 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12198 #define _mm_fixupimm_sd(X, Y, Z, C) \
12199 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12200 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12201 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12203 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12204 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12205 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12206 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12208 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12209 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12210 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12211 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12213 #define _mm_fixupimm_ss(X, Y, Z, C) \
12214 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12215 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12216 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12218 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12219 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12220 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12221 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12223 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12224 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12225 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12226 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12230 extern __inline unsigned long long
12231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12232 _mm_cvtss_u64 (__m128 __A)
12234 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12236 _MM_FROUND_CUR_DIRECTION);
12239 extern __inline unsigned long long
12240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12241 _mm_cvttss_u64 (__m128 __A)
12243 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12245 _MM_FROUND_CUR_DIRECTION);
12248 extern __inline long long
12249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12250 _mm_cvttss_i64 (__m128 __A)
12252 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12253 _MM_FROUND_CUR_DIRECTION);
12255 #endif /* __x86_64__ */
12257 extern __inline unsigned
12258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12259 _mm_cvtss_u32 (__m128 __A)
12261 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12262 _MM_FROUND_CUR_DIRECTION);
12265 extern __inline unsigned
12266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12267 _mm_cvttss_u32 (__m128 __A)
12269 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12270 _MM_FROUND_CUR_DIRECTION);
12273 extern __inline int
12274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12275 _mm_cvttss_i32 (__m128 __A)
12277 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12278 _MM_FROUND_CUR_DIRECTION);
12282 extern __inline unsigned long long
12283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12284 _mm_cvtsd_u64 (__m128d __A)
12286 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12288 _MM_FROUND_CUR_DIRECTION);
12291 extern __inline unsigned long long
12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293 _mm_cvttsd_u64 (__m128d __A)
12295 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12297 _MM_FROUND_CUR_DIRECTION);
12300 extern __inline long long
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm_cvttsd_i64 (__m128d __A)
12304 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12305 _MM_FROUND_CUR_DIRECTION);
12307 #endif /* __x86_64__ */
12309 extern __inline unsigned
12310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12311 _mm_cvtsd_u32 (__m128d __A)
12313 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12314 _MM_FROUND_CUR_DIRECTION);
12317 extern __inline unsigned
12318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12319 _mm_cvttsd_u32 (__m128d __A)
12321 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12322 _MM_FROUND_CUR_DIRECTION);
12325 extern __inline int
12326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327 _mm_cvttsd_i32 (__m128d __A)
12329 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12330 _MM_FROUND_CUR_DIRECTION);
12333 extern __inline __m512d
12334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12335 _mm512_cvtps_pd (__m256 __A)
12337 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12339 _mm512_undefined_pd (),
12341 _MM_FROUND_CUR_DIRECTION);
12344 extern __inline __m512d
12345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12346 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12348 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12351 _MM_FROUND_CUR_DIRECTION);
12354 extern __inline __m512d
12355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12356 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12358 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12360 _mm512_setzero_pd (),
12362 _MM_FROUND_CUR_DIRECTION);
12365 extern __inline __m512
12366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367 _mm512_cvtph_ps (__m256i __A)
12369 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12371 _mm512_undefined_ps (),
12373 _MM_FROUND_CUR_DIRECTION);
12376 extern __inline __m512
12377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12378 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12380 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12383 _MM_FROUND_CUR_DIRECTION);
12386 extern __inline __m512
12387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12390 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12392 _mm512_setzero_ps (),
12394 _MM_FROUND_CUR_DIRECTION);
12397 extern __inline __m256
12398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12399 _mm512_cvtpd_ps (__m512d __A)
12401 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12403 _mm256_undefined_ps (),
12405 _MM_FROUND_CUR_DIRECTION);
12408 extern __inline __m256
12409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12410 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12412 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12415 _MM_FROUND_CUR_DIRECTION);
12418 extern __inline __m256
12419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12420 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12422 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12424 _mm256_setzero_ps (),
12426 _MM_FROUND_CUR_DIRECTION);
12429 #ifdef __OPTIMIZE__
12430 extern __inline __m512
12431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12432 _mm512_getexp_ps (__m512 __A)
12434 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12436 _mm512_undefined_ps (),
12438 _MM_FROUND_CUR_DIRECTION);
12441 extern __inline __m512
12442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12443 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12445 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12448 _MM_FROUND_CUR_DIRECTION);
12451 extern __inline __m512
12452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12453 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12455 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12457 _mm512_setzero_ps (),
12459 _MM_FROUND_CUR_DIRECTION);
12462 extern __inline __m512d
12463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12464 _mm512_getexp_pd (__m512d __A)
12466 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12468 _mm512_undefined_pd (),
12470 _MM_FROUND_CUR_DIRECTION);
12473 extern __inline __m512d
12474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12475 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12477 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12480 _MM_FROUND_CUR_DIRECTION);
12483 extern __inline __m512d
12484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12485 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12487 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12489 _mm512_setzero_pd (),
12491 _MM_FROUND_CUR_DIRECTION);
12494 extern __inline __m128
12495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12496 _mm_getexp_ss (__m128 __A, __m128 __B)
12498 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12500 _MM_FROUND_CUR_DIRECTION);
12503 extern __inline __m128d
12504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12505 _mm_getexp_sd (__m128d __A, __m128d __B)
12507 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12509 _MM_FROUND_CUR_DIRECTION);
12512 extern __inline __m512d
12513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12515 _MM_MANTISSA_SIGN_ENUM __C)
12517 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12519 _mm512_undefined_pd (),
12521 _MM_FROUND_CUR_DIRECTION);
12524 extern __inline __m512d
12525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12526 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12527 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12529 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12532 _MM_FROUND_CUR_DIRECTION);
12535 extern __inline __m512d
12536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12537 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12538 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12540 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12543 _mm512_setzero_pd (),
12545 _MM_FROUND_CUR_DIRECTION);
12548 extern __inline __m512
12549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12550 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12551 _MM_MANTISSA_SIGN_ENUM __C)
12553 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12555 _mm512_undefined_ps (),
12557 _MM_FROUND_CUR_DIRECTION);
12560 extern __inline __m512
12561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12562 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12563 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12565 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12567 (__v16sf) __W, __U,
12568 _MM_FROUND_CUR_DIRECTION);
12571 extern __inline __m512
12572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12573 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12574 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12576 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12579 _mm512_setzero_ps (),
12581 _MM_FROUND_CUR_DIRECTION);
12584 extern __inline __m128d
12585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12586 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12587 _MM_MANTISSA_SIGN_ENUM __D)
12589 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12592 _MM_FROUND_CUR_DIRECTION);
12595 extern __inline __m128
12596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12597 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12598 _MM_MANTISSA_SIGN_ENUM __D)
12600 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12603 _MM_FROUND_CUR_DIRECTION);
12607 #define _mm512_getmant_pd(X, B, C) \
12608 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12609 (int)(((C)<<2) | (B)), \
12610 (__v8df)_mm512_undefined_pd(), \
12612 _MM_FROUND_CUR_DIRECTION))
12614 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12615 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12616 (int)(((C)<<2) | (B)), \
12617 (__v8df)(__m512d)(W), \
12619 _MM_FROUND_CUR_DIRECTION))
12621 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12622 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12623 (int)(((C)<<2) | (B)), \
12624 (__v8df)_mm512_setzero_pd(), \
12626 _MM_FROUND_CUR_DIRECTION))
12627 #define _mm512_getmant_ps(X, B, C) \
12628 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12629 (int)(((C)<<2) | (B)), \
12630 (__v16sf)_mm512_undefined_ps(), \
12632 _MM_FROUND_CUR_DIRECTION))
12634 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12635 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12636 (int)(((C)<<2) | (B)), \
12637 (__v16sf)(__m512)(W), \
12639 _MM_FROUND_CUR_DIRECTION))
12641 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12642 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12643 (int)(((C)<<2) | (B)), \
12644 (__v16sf)_mm512_setzero_ps(), \
12646 _MM_FROUND_CUR_DIRECTION))
12647 #define _mm_getmant_sd(X, Y, C, D) \
12648 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12649 (__v2df)(__m128d)(Y), \
12650 (int)(((D)<<2) | (C)), \
12651 _MM_FROUND_CUR_DIRECTION))
12653 #define _mm_getmant_ss(X, Y, C, D) \
12654 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12655 (__v4sf)(__m128)(Y), \
12656 (int)(((D)<<2) | (C)), \
12657 _MM_FROUND_CUR_DIRECTION))
12659 #define _mm_getexp_ss(A, B) \
12660 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12661 _MM_FROUND_CUR_DIRECTION))
12663 #define _mm_getexp_sd(A, B) \
12664 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12665 _MM_FROUND_CUR_DIRECTION))
12667 #define _mm512_getexp_ps(A) \
12668 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12669 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12671 #define _mm512_mask_getexp_ps(W, U, A) \
12672 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12673 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12675 #define _mm512_maskz_getexp_ps(U, A) \
12676 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12677 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12679 #define _mm512_getexp_pd(A) \
12680 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12681 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12683 #define _mm512_mask_getexp_pd(W, U, A) \
12684 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12685 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12687 #define _mm512_maskz_getexp_pd(U, A) \
12688 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12689 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12692 #ifdef __OPTIMIZE__
12693 extern __inline __m512
12694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12695 _mm512_roundscale_ps (__m512 __A, const int __imm)
12697 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12699 _mm512_undefined_ps (),
12701 _MM_FROUND_CUR_DIRECTION);
12704 extern __inline __m512
12705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12706 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12709 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12712 _MM_FROUND_CUR_DIRECTION);
12715 extern __inline __m512
12716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12717 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12719 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12722 _mm512_setzero_ps (),
12724 _MM_FROUND_CUR_DIRECTION);
12727 extern __inline __m512d
12728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12729 _mm512_roundscale_pd (__m512d __A, const int __imm)
12731 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12733 _mm512_undefined_pd (),
12735 _MM_FROUND_CUR_DIRECTION);
12738 extern __inline __m512d
12739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12740 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12743 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12746 _MM_FROUND_CUR_DIRECTION);
12749 extern __inline __m512d
12750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12751 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12753 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12756 _mm512_setzero_pd (),
12758 _MM_FROUND_CUR_DIRECTION);
12761 extern __inline __m128
12762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12763 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12765 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12766 (__v4sf) __B, __imm,
12767 _MM_FROUND_CUR_DIRECTION);
12770 extern __inline __m128d
12771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12772 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12774 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12775 (__v2df) __B, __imm,
12776 _MM_FROUND_CUR_DIRECTION);
12780 #define _mm512_roundscale_ps(A, B) \
12781 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12782 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12783 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12784 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12786 (__v16sf)(__m512)(A), \
12787 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12788 #define _mm512_maskz_roundscale_ps(A, B, C) \
12789 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12791 (__v16sf)_mm512_setzero_ps(),\
12792 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12793 #define _mm512_roundscale_pd(A, B) \
12794 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12795 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12796 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12797 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12799 (__v8df)(__m512d)(A), \
12800 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12801 #define _mm512_maskz_roundscale_pd(A, B, C) \
12802 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12804 (__v8df)_mm512_setzero_pd(),\
12805 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12806 #define _mm_roundscale_ss(A, B, C) \
12807 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12808 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12809 #define _mm_roundscale_sd(A, B, C) \
12810 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12811 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12814 #ifdef __OPTIMIZE__
12815 extern __inline __mmask8
12816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12817 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12819 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12822 _MM_FROUND_CUR_DIRECTION);
12825 extern __inline __mmask16
12826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12827 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12829 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12830 (__v16sf) __Y, __P,
12832 _MM_FROUND_CUR_DIRECTION);
12835 extern __inline __mmask16
12836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12837 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12839 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12840 (__v16sf) __Y, __P,
12842 _MM_FROUND_CUR_DIRECTION);
12845 extern __inline __mmask8
12846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12847 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12849 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12852 _MM_FROUND_CUR_DIRECTION);
12855 extern __inline __mmask8
12856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12857 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12859 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12862 _MM_FROUND_CUR_DIRECTION);
12865 extern __inline __mmask8
12866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12867 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12869 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12872 _MM_FROUND_CUR_DIRECTION);
12875 extern __inline __mmask8
12876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12877 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12879 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12882 _MM_FROUND_CUR_DIRECTION);
12885 extern __inline __mmask8
12886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12887 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12889 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12892 _MM_FROUND_CUR_DIRECTION);
12896 #define _mm512_cmp_pd_mask(X, Y, P) \
12897 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12898 (__v8df)(__m512d)(Y), (int)(P),\
12899 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12901 #define _mm512_cmp_ps_mask(X, Y, P) \
12902 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12903 (__v16sf)(__m512)(Y), (int)(P),\
12904 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12906 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12907 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12908 (__v8df)(__m512d)(Y), (int)(P),\
12909 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12911 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12912 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12913 (__v16sf)(__m512)(Y), (int)(P),\
12914 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12916 #define _mm_cmp_sd_mask(X, Y, P) \
12917 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12918 (__v2df)(__m128d)(Y), (int)(P),\
12919 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12921 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12922 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12923 (__v2df)(__m128d)(Y), (int)(P),\
12924 M,_MM_FROUND_CUR_DIRECTION))
12926 #define _mm_cmp_ss_mask(X, Y, P) \
12927 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12928 (__v4sf)(__m128)(Y), (int)(P), \
12929 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12931 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12932 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12933 (__v4sf)(__m128)(Y), (int)(P), \
12934 M,_MM_FROUND_CUR_DIRECTION))
12937 extern __inline __mmask16
12938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12939 _mm512_kmov (__mmask16 __A)
12941 return __builtin_ia32_kmov16 (__A);
12944 extern __inline __m512
12945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12946 _mm512_castpd_ps (__m512d __A)
12948 return (__m512) (__A);
12951 extern __inline __m512i
12952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12953 _mm512_castpd_si512 (__m512d __A)
12955 return (__m512i) (__A);
12958 extern __inline __m512d
12959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12960 _mm512_castps_pd (__m512 __A)
12962 return (__m512d) (__A);
12965 extern __inline __m512i
12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12967 _mm512_castps_si512 (__m512 __A)
12969 return (__m512i) (__A);
12972 extern __inline __m512
12973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12974 _mm512_castsi512_ps (__m512i __A)
12976 return (__m512) (__A);
12979 extern __inline __m512d
12980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12981 _mm512_castsi512_pd (__m512i __A)
12983 return (__m512d) (__A);
12986 extern __inline __m128d
12987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12988 _mm512_castpd512_pd128 (__m512d __A)
12990 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12993 extern __inline __m128
12994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12995 _mm512_castps512_ps128 (__m512 __A)
12997 return _mm512_extractf32x4_ps(__A, 0);
13000 extern __inline __m128i
13001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13002 _mm512_castsi512_si128 (__m512i __A)
13004 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13007 extern __inline __m256d
13008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13009 _mm512_castpd512_pd256 (__m512d __A)
13011 return _mm512_extractf64x4_pd(__A, 0);
13014 extern __inline __m256
13015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13016 _mm512_castps512_ps256 (__m512 __A)
13018 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13021 extern __inline __m256i
13022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13023 _mm512_castsi512_si256 (__m512i __A)
13025 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13028 extern __inline __m512d
13029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13030 _mm512_castpd128_pd512 (__m128d __A)
13032 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13035 extern __inline __m512
13036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13037 _mm512_castps128_ps512 (__m128 __A)
13039 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13042 extern __inline __m512i
13043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13044 _mm512_castsi128_si512 (__m128i __A)
13046 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13049 extern __inline __m512d
13050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13051 _mm512_castpd256_pd512 (__m256d __A)
13053 return __builtin_ia32_pd512_256pd (__A);
13056 extern __inline __m512
13057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13058 _mm512_castps256_ps512 (__m256 __A)
13060 return __builtin_ia32_ps512_256ps (__A);
13063 extern __inline __m512i
13064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13065 _mm512_castsi256_si512 (__m256i __A)
13067 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13070 extern __inline __mmask16
13071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13072 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13074 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13079 extern __inline __mmask16
13080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13081 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13083 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13084 (__v16si) __B, 0, __U);
13087 extern __inline __mmask8
13088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13089 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13091 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13092 (__v8di) __B, 0, __U);
13095 extern __inline __mmask8
13096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13097 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13099 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13104 extern __inline __mmask16
13105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13106 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13108 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13113 extern __inline __mmask16
13114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13115 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13117 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13118 (__v16si) __B, 6, __U);
13121 extern __inline __mmask8
13122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13123 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13125 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13126 (__v8di) __B, 6, __U);
13129 extern __inline __mmask8
13130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13131 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13133 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13138 #ifdef __DISABLE_AVX512F__
13139 #undef __DISABLE_AVX512F__
13140 #pragma GCC pop_options
13141 #endif /* __DISABLE_AVX512F__ */
13143 #endif /* _AVX512FINTRIN_H_INCLUDED */