gcc50: Disconnect from buildworld.
[dragonfly.git] / contrib / gcc-5.0 / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3    This file is part of GCC.
4
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics.  */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
48
49 /* The Intel API is flexible enough that we must allow aliasing with other
50    vector types, and their scalar components.  */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
54
55 typedef unsigned char  __mmask8;
56 typedef unsigned short __mmask16;
57
58 extern __inline __m512i
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm512_set_epi64 (long long __A, long long __B, long long __C,
61                   long long __D, long long __E, long long __F,
62                   long long __G, long long __H)
63 {
64   return __extension__ (__m512i) (__v8di)
65          { __H, __G, __F, __E, __D, __C, __B, __A };
66 }
67
68 /* Create the vector [A B C D E F G H I J K L M N O P].  */
69 extern __inline __m512i
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
72                   int __E, int __F, int __G, int __H,
73                   int __I, int __J, int __K, int __L,
74                   int __M, int __N, int __O, int __P)
75 {
76   return __extension__ (__m512i)(__v16si)
77          { __P, __O, __N, __M, __L, __K, __J, __I,
78            __H, __G, __F, __E, __D, __C, __B, __A };
79 }
80
81 extern __inline __m512d
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _mm512_set_pd (double __A, double __B, double __C, double __D,
84                double __E, double __F, double __G, double __H)
85 {
86   return __extension__ (__m512d)
87          { __H, __G, __F, __E, __D, __C, __B, __A };
88 }
89
90 extern __inline __m512
91 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 _mm512_set_ps (float __A, float __B, float __C, float __D,
93                float __E, float __F, float __G, float __H,
94                float __I, float __J, float __K, float __L,
95                float __M, float __N, float __O, float __P)
96 {
97   return __extension__ (__m512)
98          { __P, __O, __N, __M, __L, __K, __J, __I,
99            __H, __G, __F, __E, __D, __C, __B, __A };
100 }
101
102 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)                            \
103   _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
104
105 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,                            \
106                           e8,e9,e10,e11,e12,e13,e14,e15)                      \
107   _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
108
109 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)                               \
110   _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
111
112 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113   _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
114
115 extern __inline __m512
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm512_undefined_ps (void)
118 {
119   __m512 __Y = __Y;
120   return __Y;
121 }
122
123 extern __inline __m512d
124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125 _mm512_undefined_pd (void)
126 {
127   __m512d __Y = __Y;
128   return __Y;
129 }
130
131 extern __inline __m512i
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_undefined_epi32 (void)
134 {
135   __m512i __Y = __Y;
136   return __Y;
137 }
138
139 #define _mm512_undefined_si512 _mm512_undefined_epi32
140
141 extern __inline __m512i
142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
143 _mm512_set1_epi8 (char __A)
144 {
145   return __extension__ (__m512i)(__v64qi)
146          { __A, __A, __A, __A, __A, __A, __A, __A,
147            __A, __A, __A, __A, __A, __A, __A, __A,
148            __A, __A, __A, __A, __A, __A, __A, __A,
149            __A, __A, __A, __A, __A, __A, __A, __A,
150            __A, __A, __A, __A, __A, __A, __A, __A,
151            __A, __A, __A, __A, __A, __A, __A, __A,
152            __A, __A, __A, __A, __A, __A, __A, __A,
153            __A, __A, __A, __A, __A, __A, __A, __A };
154 }
155
156 extern __inline __m512i
157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
158 _mm512_set1_epi16 (short __A)
159 {
160   return __extension__ (__m512i)(__v32hi)
161          { __A, __A, __A, __A, __A, __A, __A, __A,
162            __A, __A, __A, __A, __A, __A, __A, __A,
163            __A, __A, __A, __A, __A, __A, __A, __A,
164            __A, __A, __A, __A, __A, __A, __A, __A };
165 }
166
167 extern __inline __m512d
168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169 _mm512_set1_pd (double __A)
170 {
171   return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
172                                                   (__v2df) { __A, },
173                                                   (__v8df)
174                                                   _mm512_undefined_pd (),
175                                                   (__mmask8) -1);
176 }
177
178 extern __inline __m512
179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
180 _mm512_set1_ps (float __A)
181 {
182   return (__m512) __builtin_ia32_broadcastss512 (__extension__
183                                                  (__v4sf) { __A, },
184                                                  (__v16sf)
185                                                  _mm512_undefined_ps (),
186                                                  (__mmask16) -1);
187 }
188
189 /* Create the vector [A B C D A B C D A B C D A B C D].  */
190 extern __inline __m512i
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
193 {
194   return __extension__ (__m512i)(__v16si)
195          { __D, __C, __B, __A, __D, __C, __B, __A,
196            __D, __C, __B, __A, __D, __C, __B, __A };
197 }
198
199 extern __inline __m512i
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
202                    long long __D)
203 {
204   return __extension__ (__m512i) (__v8di)
205          { __D, __C, __B, __A, __D, __C, __B, __A };
206 }
207
208 extern __inline __m512d
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm512_set4_pd (double __A, double __B, double __C, double __D)
211 {
212   return __extension__ (__m512d)
213          { __D, __C, __B, __A, __D, __C, __B, __A };
214 }
215
216 extern __inline __m512
217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
218 _mm512_set4_ps (float __A, float __B, float __C, float __D)
219 {
220   return __extension__ (__m512)
221          { __D, __C, __B, __A, __D, __C, __B, __A,
222            __D, __C, __B, __A, __D, __C, __B, __A };
223 }
224
225 #define _mm512_setr4_epi64(e0,e1,e2,e3)                                       \
226   _mm512_set4_epi64(e3,e2,e1,e0)
227
228 #define _mm512_setr4_epi32(e0,e1,e2,e3)                                       \
229   _mm512_set4_epi32(e3,e2,e1,e0)
230
231 #define _mm512_setr4_pd(e0,e1,e2,e3)                                          \
232   _mm512_set4_pd(e3,e2,e1,e0)
233
234 #define _mm512_setr4_ps(e0,e1,e2,e3)                                          \
235   _mm512_set4_ps(e3,e2,e1,e0)
236
237 extern __inline __m512
238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239 _mm512_setzero_ps (void)
240 {
241   return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
242                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
243 }
244
245 extern __inline __m512d
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm512_setzero_pd (void)
248 {
249   return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
250 }
251
252 extern __inline __m512i
253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
254 _mm512_setzero_epi32 (void)
255 {
256   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
257 }
258
259 extern __inline __m512i
260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261 _mm512_setzero_si512 (void)
262 {
263   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
264 }
265
266 extern __inline __m512d
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
269 {
270   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
271                                                   (__v8df) __W,
272                                                   (__mmask8) __U);
273 }
274
275 extern __inline __m512d
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
277 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
278 {
279   return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
280                                                   (__v8df)
281                                                   _mm512_setzero_pd (),
282                                                   (__mmask8) __U);
283 }
284
285 extern __inline __m512
286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
288 {
289   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
290                                                  (__v16sf) __W,
291                                                  (__mmask16) __U);
292 }
293
294 extern __inline __m512
295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
296 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
297 {
298   return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
299                                                  (__v16sf)
300                                                  _mm512_setzero_ps (),
301                                                  (__mmask16) __U);
302 }
303
304 extern __inline __m512d
305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
306 _mm512_load_pd (void const *__P)
307 {
308   return *(__m512d *) __P;
309 }
310
311 extern __inline __m512d
312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
314 {
315   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
316                                                    (__v8df) __W,
317                                                    (__mmask8) __U);
318 }
319
320 extern __inline __m512d
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
323 {
324   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
325                                                    (__v8df)
326                                                    _mm512_setzero_pd (),
327                                                    (__mmask8) __U);
328 }
329
330 extern __inline void
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm512_store_pd (void *__P, __m512d __A)
333 {
334   *(__m512d *) __P = __A;
335 }
336
337 extern __inline void
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
340 {
341   __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
342                                    (__mmask8) __U);
343 }
344
345 extern __inline __m512
346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347 _mm512_load_ps (void const *__P)
348 {
349   return *(__m512 *) __P;
350 }
351
352 extern __inline __m512
353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
354 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
355 {
356   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
357                                                   (__v16sf) __W,
358                                                   (__mmask16) __U);
359 }
360
361 extern __inline __m512
362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
364 {
365   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
366                                                   (__v16sf)
367                                                   _mm512_setzero_ps (),
368                                                   (__mmask16) __U);
369 }
370
371 extern __inline void
372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373 _mm512_store_ps (void *__P, __m512 __A)
374 {
375   *(__m512 *) __P = __A;
376 }
377
378 extern __inline void
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
381 {
382   __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
383                                    (__mmask16) __U);
384 }
385
386 extern __inline __m512i
387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
389 {
390   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
391                                                      (__v8di) __W,
392                                                      (__mmask8) __U);
393 }
394
395 extern __inline __m512i
396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
398 {
399   return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
400                                                      (__v8di)
401                                                      _mm512_setzero_si512 (),
402                                                      (__mmask8) __U);
403 }
404
405 extern __inline __m512i
406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
407 _mm512_load_epi64 (void const *__P)
408 {
409   return *(__m512i *) __P;
410 }
411
412 extern __inline __m512i
413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
414 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
415 {
416   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
417                                                         (__v8di) __W,
418                                                         (__mmask8) __U);
419 }
420
421 extern __inline __m512i
422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
423 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
424 {
425   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
426                                                         (__v8di)
427                                                         _mm512_setzero_si512 (),
428                                                         (__mmask8) __U);
429 }
430
431 extern __inline void
432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
433 _mm512_store_epi64 (void *__P, __m512i __A)
434 {
435   *(__m512i *) __P = __A;
436 }
437
438 extern __inline void
439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
441 {
442   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
443                                         (__mmask8) __U);
444 }
445
446 extern __inline __m512i
447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
448 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
449 {
450   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
451                                                      (__v16si) __W,
452                                                      (__mmask16) __U);
453 }
454
455 extern __inline __m512i
456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
457 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
458 {
459   return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
460                                                      (__v16si)
461                                                      _mm512_setzero_si512 (),
462                                                      (__mmask16) __U);
463 }
464
465 extern __inline __m512i
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm512_load_si512 (void const *__P)
468 {
469   return *(__m512i *) __P;
470 }
471
472 extern __inline __m512i
473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
474 _mm512_load_epi32 (void const *__P)
475 {
476   return *(__m512i *) __P;
477 }
478
479 extern __inline __m512i
480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
481 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
482 {
483   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
484                                                         (__v16si) __W,
485                                                         (__mmask16) __U);
486 }
487
488 extern __inline __m512i
489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
491 {
492   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
493                                                         (__v16si)
494                                                         _mm512_setzero_si512 (),
495                                                         (__mmask16) __U);
496 }
497
498 extern __inline void
499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
500 _mm512_store_si512 (void *__P, __m512i __A)
501 {
502   *(__m512i *) __P = __A;
503 }
504
505 extern __inline void
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 _mm512_store_epi32 (void *__P, __m512i __A)
508 {
509   *(__m512i *) __P = __A;
510 }
511
512 extern __inline void
513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
514 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
515 {
516   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
517                                         (__mmask16) __U);
518 }
519
520 extern __inline __m512i
521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
522 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
523 {
524   return (__m512i) ((__v16su) __A * (__v16su) __B);
525 }
526
527 extern __inline __m512i
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
529 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
530 {
531   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
532                                                   (__v16si) __B,
533                                                   (__v16si)
534                                                   _mm512_setzero_si512 (),
535                                                   __M);
536 }
537
538 extern __inline __m512i
539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
540 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
541 {
542   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
543                                                   (__v16si) __B,
544                                                   (__v16si) __W, __M);
545 }
546
547 extern __inline __m512i
548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
550 {
551   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
552                                                   (__v16si) __Y,
553                                                   (__v16si)
554                                                   _mm512_undefined_epi32 (),
555                                                   (__mmask16) -1);
556 }
557
558 extern __inline __m512i
559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
561 {
562   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
563                                                   (__v16si) __Y,
564                                                   (__v16si) __W,
565                                                   (__mmask16) __U);
566 }
567
568 extern __inline __m512i
569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
570 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
571 {
572   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
573                                                   (__v16si) __Y,
574                                                   (__v16si)
575                                                   _mm512_setzero_si512 (),
576                                                   (__mmask16) __U);
577 }
578
579 extern __inline __m512i
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
582 {
583   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
584                                                   (__v16si) __Y,
585                                                   (__v16si)
586                                                   _mm512_undefined_epi32 (),
587                                                   (__mmask16) -1);
588 }
589
590 extern __inline __m512i
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
593 {
594   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
595                                                   (__v16si) __Y,
596                                                   (__v16si) __W,
597                                                   (__mmask16) __U);
598 }
599
600 extern __inline __m512i
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
603 {
604   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
605                                                   (__v16si) __Y,
606                                                   (__v16si)
607                                                   _mm512_setzero_si512 (),
608                                                   (__mmask16) __U);
609 }
610
611 extern __inline __m512i
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
614 {
615   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
616                                                   (__v16si) __Y,
617                                                   (__v16si)
618                                                   _mm512_undefined_epi32 (),
619                                                   (__mmask16) -1);
620 }
621
622 extern __inline __m512i
623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
624 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
625 {
626   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
627                                                   (__v16si) __Y,
628                                                   (__v16si) __W,
629                                                   (__mmask16) __U);
630 }
631
632 extern __inline __m512i
633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
634 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
635 {
636   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
637                                                   (__v16si) __Y,
638                                                   (__v16si)
639                                                   _mm512_setzero_si512 (),
640                                                   (__mmask16) __U);
641 }
642
643 extern __inline __m512i
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
645 _mm512_add_epi64 (__m512i __A, __m512i __B)
646 {
647   return (__m512i) ((__v8du) __A + (__v8du) __B);
648 }
649
650 extern __inline __m512i
651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
652 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
653 {
654   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
655                                                  (__v8di) __B,
656                                                  (__v8di) __W,
657                                                  (__mmask8) __U);
658 }
659
660 extern __inline __m512i
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
663 {
664   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
665                                                  (__v8di) __B,
666                                                  (__v8di)
667                                                  _mm512_setzero_si512 (),
668                                                  (__mmask8) __U);
669 }
670
671 extern __inline __m512i
672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
673 _mm512_sub_epi64 (__m512i __A, __m512i __B)
674 {
675   return (__m512i) ((__v8du) __A - (__v8du) __B);
676 }
677
678 extern __inline __m512i
679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
680 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
681 {
682   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
683                                                  (__v8di) __B,
684                                                  (__v8di) __W,
685                                                  (__mmask8) __U);
686 }
687
688 extern __inline __m512i
689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
691 {
692   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
693                                                  (__v8di) __B,
694                                                  (__v8di)
695                                                  _mm512_setzero_si512 (),
696                                                  (__mmask8) __U);
697 }
698
699 extern __inline __m512i
700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
702 {
703   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
704                                                  (__v8di) __Y,
705                                                  (__v8di)
706                                                  _mm512_undefined_pd (),
707                                                  (__mmask8) -1);
708 }
709
710 extern __inline __m512i
711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
712 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
713 {
714   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
715                                                  (__v8di) __Y,
716                                                  (__v8di) __W,
717                                                  (__mmask8) __U);
718 }
719
720 extern __inline __m512i
721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
723 {
724   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
725                                                  (__v8di) __Y,
726                                                  (__v8di)
727                                                  _mm512_setzero_si512 (),
728                                                  (__mmask8) __U);
729 }
730
731 extern __inline __m512i
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
734 {
735   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
736                                                  (__v8di) __Y,
737                                                  (__v8di)
738                                                  _mm512_undefined_epi32 (),
739                                                  (__mmask8) -1);
740 }
741
742 extern __inline __m512i
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
745 {
746   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
747                                                  (__v8di) __Y,
748                                                  (__v8di) __W,
749                                                  (__mmask8) __U);
750 }
751
752 extern __inline __m512i
753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
755 {
756   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
757                                                  (__v8di) __Y,
758                                                  (__v8di)
759                                                  _mm512_setzero_si512 (),
760                                                  (__mmask8) __U);
761 }
762
763 extern __inline __m512i
764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
766 {
767   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
768                                                  (__v8di) __Y,
769                                                  (__v8di)
770                                                  _mm512_undefined_epi32 (),
771                                                  (__mmask8) -1);
772 }
773
774 extern __inline __m512i
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
777 {
778   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
779                                                  (__v8di) __Y,
780                                                  (__v8di) __W,
781                                                  (__mmask8) __U);
782 }
783
784 extern __inline __m512i
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
787 {
788   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
789                                                  (__v8di) __Y,
790                                                  (__v8di)
791                                                  _mm512_setzero_si512 (),
792                                                  (__mmask8) __U);
793 }
794
795 extern __inline __m512i
796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797 _mm512_add_epi32 (__m512i __A, __m512i __B)
798 {
799   return (__m512i) ((__v16su) __A + (__v16su) __B);
800 }
801
802 extern __inline __m512i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
805 {
806   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
807                                                  (__v16si) __B,
808                                                  (__v16si) __W,
809                                                  (__mmask16) __U);
810 }
811
812 extern __inline __m512i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
815 {
816   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
817                                                  (__v16si) __B,
818                                                  (__v16si)
819                                                  _mm512_setzero_si512 (),
820                                                  (__mmask16) __U);
821 }
822
823 extern __inline __m512i
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
826 {
827   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
828                                                   (__v16si) __Y,
829                                                   (__v8di)
830                                                   _mm512_undefined_epi32 (),
831                                                   (__mmask8) -1);
832 }
833
834 extern __inline __m512i
835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
837 {
838   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
839                                                   (__v16si) __Y,
840                                                   (__v8di) __W, __M);
841 }
842
843 extern __inline __m512i
844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
845 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
846 {
847   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
848                                                   (__v16si) __Y,
849                                                   (__v8di)
850                                                   _mm512_setzero_si512 (),
851                                                   __M);
852 }
853
854 extern __inline __m512i
855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
856 _mm512_sub_epi32 (__m512i __A, __m512i __B)
857 {
858   return (__m512i) ((__v16su) __A - (__v16su) __B);
859 }
860
861 extern __inline __m512i
862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
863 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
864 {
865   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
866                                                  (__v16si) __B,
867                                                  (__v16si) __W,
868                                                  (__mmask16) __U);
869 }
870
871 extern __inline __m512i
872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
873 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
874 {
875   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
876                                                  (__v16si) __B,
877                                                  (__v16si)
878                                                  _mm512_setzero_si512 (),
879                                                  (__mmask16) __U);
880 }
881
882 extern __inline __m512i
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
885 {
886   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
887                                                    (__v16si) __Y,
888                                                    (__v8di)
889                                                    _mm512_undefined_epi32 (),
890                                                    (__mmask8) -1);
891 }
892
893 extern __inline __m512i
894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
895 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
896 {
897   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
898                                                    (__v16si) __Y,
899                                                    (__v8di) __W, __M);
900 }
901
902 extern __inline __m512i
903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
904 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
905 {
906   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
907                                                    (__v16si) __Y,
908                                                    (__v8di)
909                                                    _mm512_setzero_si512 (),
910                                                    __M);
911 }
912
913 #ifdef __OPTIMIZE__
914 extern __inline __m512i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
917 {
918   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
919                                                   (__v8di)
920                                                   _mm512_undefined_epi32 (),
921                                                   (__mmask8) -1);
922 }
923
924 extern __inline __m512i
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
926 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
927                         unsigned int __B)
928 {
929   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
930                                                   (__v8di) __W,
931                                                   (__mmask8) __U);
932 }
933
934 extern __inline __m512i
935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
936 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
937 {
938   return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
939                                                   (__v8di)
940                                                   _mm512_setzero_si512 (),
941                                                   (__mmask8) __U);
942 }
943 #else
944 #define _mm512_slli_epi64(X, C)                                            \
945   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
946     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
947     (__mmask8)-1))
948
949 #define _mm512_mask_slli_epi64(W, U, X, C)                                 \
950   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
951     (__v8di)(__m512i)(W),\
952     (__mmask8)(U)))
953
954 #define _mm512_maskz_slli_epi64(U, X, C)                                   \
955   ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
956     (__v8di)(__m512i)_mm512_setzero_si512 (),\
957     (__mmask8)(U)))
958 #endif
959
960 extern __inline __m512i
961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
962 _mm512_sll_epi64 (__m512i __A, __m128i __B)
963 {
964   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
965                                                  (__v2di) __B,
966                                                  (__v8di)
967                                                  _mm512_undefined_epi32 (),
968                                                  (__mmask8) -1);
969 }
970
971 extern __inline __m512i
972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
973 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
974 {
975   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
976                                                  (__v2di) __B,
977                                                  (__v8di) __W,
978                                                  (__mmask8) __U);
979 }
980
981 extern __inline __m512i
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
984 {
985   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
986                                                  (__v2di) __B,
987                                                  (__v8di)
988                                                  _mm512_setzero_si512 (),
989                                                  (__mmask8) __U);
990 }
991
992 #ifdef __OPTIMIZE__
993 extern __inline __m512i
994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
996 {
997   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
998                                                   (__v8di)
999                                                   _mm512_undefined_epi32 (),
1000                                                   (__mmask8) -1);
1001 }
1002
1003 extern __inline __m512i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1006                         __m512i __A, unsigned int __B)
1007 {
1008   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1009                                                   (__v8di) __W,
1010                                                   (__mmask8) __U);
1011 }
1012
1013 extern __inline __m512i
1014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1015 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1016 {
1017   return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1018                                                   (__v8di)
1019                                                   _mm512_setzero_si512 (),
1020                                                   (__mmask8) __U);
1021 }
1022 #else
1023 #define _mm512_srli_epi64(X, C)                                            \
1024   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1025     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1026     (__mmask8)-1))
1027
1028 #define _mm512_mask_srli_epi64(W, U, X, C)                                 \
1029   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1030     (__v8di)(__m512i)(W),\
1031     (__mmask8)(U)))
1032
1033 #define _mm512_maskz_srli_epi64(U, X, C)                                   \
1034   ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1035     (__v8di)(__m512i)_mm512_setzero_si512 (),\
1036     (__mmask8)(U)))
1037 #endif
1038
1039 extern __inline __m512i
1040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1041 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1042 {
1043   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1044                                                  (__v2di) __B,
1045                                                  (__v8di)
1046                                                  _mm512_undefined_epi32 (),
1047                                                  (__mmask8) -1);
1048 }
1049
1050 extern __inline __m512i
1051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1052 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1053 {
1054   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1055                                                  (__v2di) __B,
1056                                                  (__v8di) __W,
1057                                                  (__mmask8) __U);
1058 }
1059
1060 extern __inline __m512i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1063 {
1064   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1065                                                  (__v2di) __B,
1066                                                  (__v8di)
1067                                                  _mm512_setzero_si512 (),
1068                                                  (__mmask8) __U);
1069 }
1070
1071 #ifdef __OPTIMIZE__
1072 extern __inline __m512i
1073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1075 {
1076   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1077                                                   (__v8di)
1078                                                   _mm512_undefined_epi32 (),
1079                                                   (__mmask8) -1);
1080 }
1081
1082 extern __inline __m512i
1083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1085                         unsigned int __B)
1086 {
1087   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1088                                                   (__v8di) __W,
1089                                                   (__mmask8) __U);
1090 }
1091
1092 extern __inline __m512i
1093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1094 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1095 {
1096   return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1097                                                   (__v8di)
1098                                                   _mm512_setzero_si512 (),
1099                                                   (__mmask8) __U);
1100 }
1101 #else
1102 #define _mm512_srai_epi64(X, C)                                            \
1103   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1104     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
1105     (__mmask8)-1))
1106
1107 #define _mm512_mask_srai_epi64(W, U, X, C)                                 \
1108   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1109     (__v8di)(__m512i)(W),\
1110     (__mmask8)(U)))
1111
1112 #define _mm512_maskz_srai_epi64(U, X, C)                                   \
1113   ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1114     (__v8di)(__m512i)_mm512_setzero_si512 (),\
1115     (__mmask8)(U)))
1116 #endif
1117
1118 extern __inline __m512i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1121 {
1122   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1123                                                  (__v2di) __B,
1124                                                  (__v8di)
1125                                                  _mm512_undefined_epi32 (),
1126                                                  (__mmask8) -1);
1127 }
1128
1129 extern __inline __m512i
1130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1131 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1132 {
1133   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1134                                                  (__v2di) __B,
1135                                                  (__v8di) __W,
1136                                                  (__mmask8) __U);
1137 }
1138
1139 extern __inline __m512i
1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1142 {
1143   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1144                                                  (__v2di) __B,
1145                                                  (__v8di)
1146                                                  _mm512_setzero_si512 (),
1147                                                  (__mmask8) __U);
1148 }
1149
1150 #ifdef __OPTIMIZE__
1151 extern __inline __m512i
1152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1153 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1154 {
1155   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1156                                                   (__v16si)
1157                                                   _mm512_undefined_epi32 (),
1158                                                   (__mmask16) -1);
1159 }
1160
1161 extern __inline __m512i
1162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1163 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1164                         unsigned int __B)
1165 {
1166   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1167                                                   (__v16si) __W,
1168                                                   (__mmask16) __U);
1169 }
1170
1171 extern __inline __m512i
1172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1173 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1174 {
1175   return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1176                                                   (__v16si)
1177                                                   _mm512_setzero_si512 (),
1178                                                   (__mmask16) __U);
1179 }
1180 #else
1181 #define _mm512_slli_epi32(X, C)                                             \
1182   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1183     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1184     (__mmask16)-1))
1185
1186 #define _mm512_mask_slli_epi32(W, U, X, C)                                  \
1187   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1188     (__v16si)(__m512i)(W),\
1189     (__mmask16)(U)))
1190
1191 #define _mm512_maskz_slli_epi32(U, X, C)                                    \
1192   ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1193     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1194     (__mmask16)(U)))
1195 #endif
1196
1197 extern __inline __m512i
1198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1200 {
1201   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1202                                                  (__v4si) __B,
1203                                                  (__v16si)
1204                                                  _mm512_undefined_epi32 (),
1205                                                  (__mmask16) -1);
1206 }
1207
1208 extern __inline __m512i
1209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1210 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1211 {
1212   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1213                                                  (__v4si) __B,
1214                                                  (__v16si) __W,
1215                                                  (__mmask16) __U);
1216 }
1217
1218 extern __inline __m512i
1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1221 {
1222   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1223                                                  (__v4si) __B,
1224                                                  (__v16si)
1225                                                  _mm512_setzero_si512 (),
1226                                                  (__mmask16) __U);
1227 }
1228
1229 #ifdef __OPTIMIZE__
1230 extern __inline __m512i
1231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1232 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1233 {
1234   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1235                                                   (__v16si)
1236                                                   _mm512_undefined_epi32 (),
1237                                                   (__mmask16) -1);
1238 }
1239
1240 extern __inline __m512i
1241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1242 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1243                         __m512i __A, unsigned int __B)
1244 {
1245   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1246                                                   (__v16si) __W,
1247                                                   (__mmask16) __U);
1248 }
1249
1250 extern __inline __m512i
1251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1252 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1253 {
1254   return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1255                                                   (__v16si)
1256                                                   _mm512_setzero_si512 (),
1257                                                   (__mmask16) __U);
1258 }
1259 #else
1260 #define _mm512_srli_epi32(X, C)                                             \
1261   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1262     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1263     (__mmask16)-1))
1264
1265 #define _mm512_mask_srli_epi32(W, U, X, C)                                  \
1266   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1267     (__v16si)(__m512i)(W),\
1268     (__mmask16)(U)))
1269
1270 #define _mm512_maskz_srli_epi32(U, X, C)                                    \
1271   ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1272     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1273     (__mmask16)(U)))
1274 #endif
1275
1276 extern __inline __m512i
1277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1279 {
1280   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1281                                                  (__v4si) __B,
1282                                                  (__v16si)
1283                                                  _mm512_undefined_epi32 (),
1284                                                  (__mmask16) -1);
1285 }
1286
1287 extern __inline __m512i
1288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1290 {
1291   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1292                                                  (__v4si) __B,
1293                                                  (__v16si) __W,
1294                                                  (__mmask16) __U);
1295 }
1296
1297 extern __inline __m512i
1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1299 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1300 {
1301   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1302                                                  (__v4si) __B,
1303                                                  (__v16si)
1304                                                  _mm512_setzero_si512 (),
1305                                                  (__mmask16) __U);
1306 }
1307
1308 #ifdef __OPTIMIZE__
1309 extern __inline __m512i
1310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1311 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1312 {
1313   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1314                                                   (__v16si)
1315                                                   _mm512_undefined_epi32 (),
1316                                                   (__mmask16) -1);
1317 }
1318
1319 extern __inline __m512i
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1322                         unsigned int __B)
1323 {
1324   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1325                                                   (__v16si) __W,
1326                                                   (__mmask16) __U);
1327 }
1328
1329 extern __inline __m512i
1330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1332 {
1333   return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1334                                                   (__v16si)
1335                                                   _mm512_setzero_si512 (),
1336                                                   (__mmask16) __U);
1337 }
1338 #else
1339 #define _mm512_srai_epi32(X, C)                                             \
1340   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1341     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
1342     (__mmask16)-1))
1343
1344 #define _mm512_mask_srai_epi32(W, U, X, C)                                  \
1345   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1346     (__v16si)(__m512i)(W),\
1347     (__mmask16)(U)))
1348
1349 #define _mm512_maskz_srai_epi32(U, X, C)                                    \
1350   ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1351     (__v16si)(__m512i)_mm512_setzero_si512 (),\
1352     (__mmask16)(U)))
1353 #endif
1354
1355 extern __inline __m512i
1356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1357 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1358 {
1359   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1360                                                  (__v4si) __B,
1361                                                  (__v16si)
1362                                                  _mm512_undefined_epi32 (),
1363                                                  (__mmask16) -1);
1364 }
1365
1366 extern __inline __m512i
1367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1368 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1369 {
1370   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1371                                                  (__v4si) __B,
1372                                                  (__v16si) __W,
1373                                                  (__mmask16) __U);
1374 }
1375
1376 extern __inline __m512i
1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1378 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1379 {
1380   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1381                                                  (__v4si) __B,
1382                                                  (__v16si)
1383                                                  _mm512_setzero_si512 (),
1384                                                  (__mmask16) __U);
1385 }
1386
1387 #ifdef __OPTIMIZE__
1388 extern __inline __m128d
1389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1391 {
1392   return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1393                                                (__v2df) __B,
1394                                                __R);
1395 }
1396
1397 extern __inline __m128
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1400 {
1401   return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1402                                               (__v4sf) __B,
1403                                               __R);
1404 }
1405
1406 extern __inline __m128d
1407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1409 {
1410   return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1411                                                (__v2df) __B,
1412                                                __R);
1413 }
1414
1415 extern __inline __m128
1416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1418 {
1419   return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1420                                               (__v4sf) __B,
1421                                               __R);
1422 }
1423
1424 #else
1425 #define _mm_add_round_sd(A, B, C)            \
1426     (__m128d)__builtin_ia32_addsd_round(A, B, C)
1427
1428 #define _mm_add_round_ss(A, B, C)            \
1429     (__m128)__builtin_ia32_addss_round(A, B, C)
1430
1431 #define _mm_sub_round_sd(A, B, C)            \
1432     (__m128d)__builtin_ia32_subsd_round(A, B, C)
1433
1434 #define _mm_sub_round_ss(A, B, C)            \
1435     (__m128)__builtin_ia32_subss_round(A, B, C)
1436 #endif
1437
1438 #ifdef __OPTIMIZE__
1439 extern __inline __m512i
1440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1441 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1442 {
1443   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1444                                                      (__v8di) __B,
1445                                                      (__v8di) __C, imm,
1446                                                      (__mmask8) -1);
1447 }
1448
1449 extern __inline __m512i
1450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1452                                 __m512i __C, const int imm)
1453 {
1454   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1455                                                      (__v8di) __B,
1456                                                      (__v8di) __C, imm,
1457                                                      (__mmask8) __U);
1458 }
1459
1460 extern __inline __m512i
1461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1462 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1463                                  __m512i __C, const int imm)
1464 {
1465   return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1466                                                       (__v8di) __B,
1467                                                       (__v8di) __C,
1468                                                       imm, (__mmask8) __U);
1469 }
1470
1471 extern __inline __m512i
1472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1474 {
1475   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1476                                                      (__v16si) __B,
1477                                                      (__v16si) __C,
1478                                                      imm, (__mmask16) -1);
1479 }
1480
1481 extern __inline __m512i
1482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1484                                 __m512i __C, const int imm)
1485 {
1486   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1487                                                      (__v16si) __B,
1488                                                      (__v16si) __C,
1489                                                      imm, (__mmask16) __U);
1490 }
1491
1492 extern __inline __m512i
1493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1495                                  __m512i __C, const int imm)
1496 {
1497   return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1498                                                       (__v16si) __B,
1499                                                       (__v16si) __C,
1500                                                       imm, (__mmask16) __U);
1501 }
1502 #else
1503 #define _mm512_ternarylogic_epi64(A, B, C, I)                           \
1504   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),    \
1505     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1506 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)                   \
1507   ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),    \
1508     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1509 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)                  \
1510   ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),   \
1511     (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1512 #define _mm512_ternarylogic_epi32(A, B, C, I)                           \
1513   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),   \
1514     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1515     (__mmask16)-1))
1516 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)                   \
1517   ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),   \
1518     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1519     (__mmask16)(U)))
1520 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)                  \
1521   ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),  \
1522     (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),             \
1523     (__mmask16)(U)))
1524 #endif
1525
1526 extern __inline __m512d
1527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 _mm512_rcp14_pd (__m512d __A)
1529 {
1530   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1531                                                    (__v8df)
1532                                                    _mm512_undefined_pd (),
1533                                                    (__mmask8) -1);
1534 }
1535
1536 extern __inline __m512d
1537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1539 {
1540   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1541                                                    (__v8df) __W,
1542                                                    (__mmask8) __U);
1543 }
1544
1545 extern __inline __m512d
1546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1547 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1548 {
1549   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1550                                                    (__v8df)
1551                                                    _mm512_setzero_pd (),
1552                                                    (__mmask8) __U);
1553 }
1554
1555 extern __inline __m512
1556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557 _mm512_rcp14_ps (__m512 __A)
1558 {
1559   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1560                                                   (__v16sf)
1561                                                   _mm512_undefined_ps (),
1562                                                   (__mmask16) -1);
1563 }
1564
1565 extern __inline __m512
1566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1568 {
1569   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1570                                                   (__v16sf) __W,
1571                                                   (__mmask16) __U);
1572 }
1573
1574 extern __inline __m512
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1577 {
1578   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1579                                                   (__v16sf)
1580                                                   _mm512_setzero_ps (),
1581                                                   (__mmask16) __U);
1582 }
1583
1584 extern __inline __m128d
1585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1586 _mm_rcp14_sd (__m128d __A, __m128d __B)
1587 {
1588   return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1589                                            (__v2df) __A);
1590 }
1591
1592 extern __inline __m128
1593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1594 _mm_rcp14_ss (__m128 __A, __m128 __B)
1595 {
1596   return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1597                                           (__v4sf) __A);
1598 }
1599
1600 extern __inline __m512d
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602 _mm512_rsqrt14_pd (__m512d __A)
1603 {
1604   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1605                                                      (__v8df)
1606                                                      _mm512_undefined_pd (),
1607                                                      (__mmask8) -1);
1608 }
1609
1610 extern __inline __m512d
1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1612 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1613 {
1614   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1615                                                      (__v8df) __W,
1616                                                      (__mmask8) __U);
1617 }
1618
1619 extern __inline __m512d
1620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1621 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1622 {
1623   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1624                                                      (__v8df)
1625                                                      _mm512_setzero_pd (),
1626                                                      (__mmask8) __U);
1627 }
1628
1629 extern __inline __m512
1630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1631 _mm512_rsqrt14_ps (__m512 __A)
1632 {
1633   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1634                                                     (__v16sf)
1635                                                     _mm512_undefined_ps (),
1636                                                     (__mmask16) -1);
1637 }
1638
1639 extern __inline __m512
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1642 {
1643   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1644                                                     (__v16sf) __W,
1645                                                     (__mmask16) __U);
1646 }
1647
1648 extern __inline __m512
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1651 {
1652   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1653                                                     (__v16sf)
1654                                                     _mm512_setzero_ps (),
1655                                                     (__mmask16) __U);
1656 }
1657
1658 extern __inline __m128d
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1661 {
1662   return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1663                                              (__v2df) __A);
1664 }
1665
1666 extern __inline __m128
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1668 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1669 {
1670   return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1671                                             (__v4sf) __A);
1672 }
1673
1674 #ifdef __OPTIMIZE__
1675 extern __inline __m512d
1676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1678 {
1679   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1680                                                   (__v8df)
1681                                                   _mm512_undefined_pd (),
1682                                                   (__mmask8) -1, __R);
1683 }
1684
1685 extern __inline __m512d
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1688                            const int __R)
1689 {
1690   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1691                                                   (__v8df) __W,
1692                                                   (__mmask8) __U, __R);
1693 }
1694
1695 extern __inline __m512d
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1698 {
1699   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1700                                                   (__v8df)
1701                                                   _mm512_setzero_pd (),
1702                                                   (__mmask8) __U, __R);
1703 }
1704
1705 extern __inline __m512
1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1707 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1708 {
1709   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1710                                                  (__v16sf)
1711                                                  _mm512_undefined_ps (),
1712                                                  (__mmask16) -1, __R);
1713 }
1714
1715 extern __inline __m512
1716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1717 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1718 {
1719   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1720                                                  (__v16sf) __W,
1721                                                  (__mmask16) __U, __R);
1722 }
1723
1724 extern __inline __m512
1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1726 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1727 {
1728   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1729                                                  (__v16sf)
1730                                                  _mm512_setzero_ps (),
1731                                                  (__mmask16) __U, __R);
1732 }
1733
1734 extern __inline __m128d
1735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1736 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1737 {
1738   return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1739                                                 (__v2df) __A,
1740                                                 __R);
1741 }
1742
1743 extern __inline __m128
1744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1745 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1746 {
1747   return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1748                                                (__v4sf) __A,
1749                                                __R);
1750 }
1751 #else
1752 #define _mm512_sqrt_round_pd(A, C)            \
1753     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1754
1755 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1756     (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1757
1758 #define _mm512_maskz_sqrt_round_pd(U, A, C)   \
1759     (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1760
1761 #define _mm512_sqrt_round_ps(A, C)            \
1762     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1763
1764 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1765     (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1766
1767 #define _mm512_maskz_sqrt_round_ps(U, A, C)   \
1768     (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1769
1770 #define _mm_sqrt_round_sd(A, B, C)            \
1771     (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1772
1773 #define _mm_sqrt_round_ss(A, B, C)            \
1774     (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1775 #endif
1776
1777 extern __inline __m512i
1778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1779 _mm512_cvtepi8_epi32 (__m128i __A)
1780 {
1781   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1782                                                     (__v16si)
1783                                                     _mm512_undefined_epi32 (),
1784                                                     (__mmask16) -1);
1785 }
1786
1787 extern __inline __m512i
1788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1789 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1790 {
1791   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1792                                                     (__v16si) __W,
1793                                                     (__mmask16) __U);
1794 }
1795
1796 extern __inline __m512i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1799 {
1800   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1801                                                     (__v16si)
1802                                                     _mm512_setzero_si512 (),
1803                                                     (__mmask16) __U);
1804 }
1805
1806 extern __inline __m512i
1807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808 _mm512_cvtepi8_epi64 (__m128i __A)
1809 {
1810   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1811                                                     (__v8di)
1812                                                     _mm512_undefined_epi32 (),
1813                                                     (__mmask8) -1);
1814 }
1815
1816 extern __inline __m512i
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1819 {
1820   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1821                                                     (__v8di) __W,
1822                                                     (__mmask8) __U);
1823 }
1824
1825 extern __inline __m512i
1826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1827 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1828 {
1829   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1830                                                     (__v8di)
1831                                                     _mm512_setzero_si512 (),
1832                                                     (__mmask8) __U);
1833 }
1834
1835 extern __inline __m512i
1836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1837 _mm512_cvtepi16_epi32 (__m256i __A)
1838 {
1839   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1840                                                     (__v16si)
1841                                                     _mm512_undefined_epi32 (),
1842                                                     (__mmask16) -1);
1843 }
1844
1845 extern __inline __m512i
1846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1847 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1848 {
1849   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1850                                                     (__v16si) __W,
1851                                                     (__mmask16) __U);
1852 }
1853
1854 extern __inline __m512i
1855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1857 {
1858   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1859                                                     (__v16si)
1860                                                     _mm512_setzero_si512 (),
1861                                                     (__mmask16) __U);
1862 }
1863
1864 extern __inline __m512i
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm512_cvtepi16_epi64 (__m128i __A)
1867 {
1868   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1869                                                     (__v8di)
1870                                                     _mm512_undefined_epi32 (),
1871                                                     (__mmask8) -1);
1872 }
1873
1874 extern __inline __m512i
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1877 {
1878   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1879                                                     (__v8di) __W,
1880                                                     (__mmask8) __U);
1881 }
1882
1883 extern __inline __m512i
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1886 {
1887   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1888                                                     (__v8di)
1889                                                     _mm512_setzero_si512 (),
1890                                                     (__mmask8) __U);
1891 }
1892
1893 extern __inline __m512i
1894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1895 _mm512_cvtepi32_epi64 (__m256i __X)
1896 {
1897   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1898                                                     (__v8di)
1899                                                     _mm512_undefined_epi32 (),
1900                                                     (__mmask8) -1);
1901 }
1902
1903 extern __inline __m512i
1904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1905 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1906 {
1907   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1908                                                     (__v8di) __W,
1909                                                     (__mmask8) __U);
1910 }
1911
1912 extern __inline __m512i
1913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1915 {
1916   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1917                                                     (__v8di)
1918                                                     _mm512_setzero_si512 (),
1919                                                     (__mmask8) __U);
1920 }
1921
1922 extern __inline __m512i
1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924 _mm512_cvtepu8_epi32 (__m128i __A)
1925 {
1926   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1927                                                     (__v16si)
1928                                                     _mm512_undefined_epi32 (),
1929                                                     (__mmask16) -1);
1930 }
1931
1932 extern __inline __m512i
1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1935 {
1936   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1937                                                     (__v16si) __W,
1938                                                     (__mmask16) __U);
1939 }
1940
1941 extern __inline __m512i
1942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1943 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1944 {
1945   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1946                                                     (__v16si)
1947                                                     _mm512_setzero_si512 (),
1948                                                     (__mmask16) __U);
1949 }
1950
1951 extern __inline __m512i
1952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953 _mm512_cvtepu8_epi64 (__m128i __A)
1954 {
1955   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1956                                                     (__v8di)
1957                                                     _mm512_undefined_epi32 (),
1958                                                     (__mmask8) -1);
1959 }
1960
1961 extern __inline __m512i
1962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1963 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1964 {
1965   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1966                                                     (__v8di) __W,
1967                                                     (__mmask8) __U);
1968 }
1969
1970 extern __inline __m512i
1971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1972 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1973 {
1974   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1975                                                     (__v8di)
1976                                                     _mm512_setzero_si512 (),
1977                                                     (__mmask8) __U);
1978 }
1979
1980 extern __inline __m512i
1981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1982 _mm512_cvtepu16_epi32 (__m256i __A)
1983 {
1984   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1985                                                     (__v16si)
1986                                                     _mm512_undefined_epi32 (),
1987                                                     (__mmask16) -1);
1988 }
1989
1990 extern __inline __m512i
1991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1992 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1993 {
1994   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1995                                                     (__v16si) __W,
1996                                                     (__mmask16) __U);
1997 }
1998
1999 extern __inline __m512i
2000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2002 {
2003   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2004                                                     (__v16si)
2005                                                     _mm512_setzero_si512 (),
2006                                                     (__mmask16) __U);
2007 }
2008
2009 extern __inline __m512i
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm512_cvtepu16_epi64 (__m128i __A)
2012 {
2013   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2014                                                     (__v8di)
2015                                                     _mm512_undefined_epi32 (),
2016                                                     (__mmask8) -1);
2017 }
2018
2019 extern __inline __m512i
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2022 {
2023   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2024                                                     (__v8di) __W,
2025                                                     (__mmask8) __U);
2026 }
2027
2028 extern __inline __m512i
2029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2030 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2031 {
2032   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2033                                                     (__v8di)
2034                                                     _mm512_setzero_si512 (),
2035                                                     (__mmask8) __U);
2036 }
2037
2038 extern __inline __m512i
2039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2040 _mm512_cvtepu32_epi64 (__m256i __X)
2041 {
2042   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2043                                                     (__v8di)
2044                                                     _mm512_undefined_epi32 (),
2045                                                     (__mmask8) -1);
2046 }
2047
2048 extern __inline __m512i
2049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2050 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2051 {
2052   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2053                                                     (__v8di) __W,
2054                                                     (__mmask8) __U);
2055 }
2056
2057 extern __inline __m512i
2058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2059 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2060 {
2061   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2062                                                     (__v8di)
2063                                                     _mm512_setzero_si512 (),
2064                                                     (__mmask8) __U);
2065 }
2066
2067 #ifdef __OPTIMIZE__
2068 extern __inline __m512d
2069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2070 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2071 {
2072   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2073                                                  (__v8df) __B,
2074                                                  (__v8df)
2075                                                  _mm512_undefined_pd (),
2076                                                  (__mmask8) -1, __R);
2077 }
2078
2079 extern __inline __m512d
2080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2081 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2082                           __m512d __B, const int __R)
2083 {
2084   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2085                                                  (__v8df) __B,
2086                                                  (__v8df) __W,
2087                                                  (__mmask8) __U, __R);
2088 }
2089
2090 extern __inline __m512d
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2093                            const int __R)
2094 {
2095   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2096                                                  (__v8df) __B,
2097                                                  (__v8df)
2098                                                  _mm512_setzero_pd (),
2099                                                  (__mmask8) __U, __R);
2100 }
2101
2102 extern __inline __m512
2103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2104 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2105 {
2106   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2107                                                 (__v16sf) __B,
2108                                                 (__v16sf)
2109                                                 _mm512_undefined_ps (),
2110                                                 (__mmask16) -1, __R);
2111 }
2112
2113 extern __inline __m512
2114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2115 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2116                           __m512 __B, const int __R)
2117 {
2118   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2119                                                 (__v16sf) __B,
2120                                                 (__v16sf) __W,
2121                                                 (__mmask16) __U, __R);
2122 }
2123
2124 extern __inline __m512
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2127 {
2128   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2129                                                 (__v16sf) __B,
2130                                                 (__v16sf)
2131                                                 _mm512_setzero_ps (),
2132                                                 (__mmask16) __U, __R);
2133 }
2134
2135 extern __inline __m512d
2136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2137 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2138 {
2139   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2140                                                  (__v8df) __B,
2141                                                  (__v8df)
2142                                                  _mm512_undefined_pd (),
2143                                                  (__mmask8) -1, __R);
2144 }
2145
2146 extern __inline __m512d
2147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2148 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2149                           __m512d __B, const int __R)
2150 {
2151   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2152                                                  (__v8df) __B,
2153                                                  (__v8df) __W,
2154                                                  (__mmask8) __U, __R);
2155 }
2156
2157 extern __inline __m512d
2158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2159 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2160                            const int __R)
2161 {
2162   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2163                                                  (__v8df) __B,
2164                                                  (__v8df)
2165                                                  _mm512_setzero_pd (),
2166                                                  (__mmask8) __U, __R);
2167 }
2168
2169 extern __inline __m512
2170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2172 {
2173   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2174                                                 (__v16sf) __B,
2175                                                 (__v16sf)
2176                                                 _mm512_undefined_ps (),
2177                                                 (__mmask16) -1, __R);
2178 }
2179
2180 extern __inline __m512
2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2183                           __m512 __B, const int __R)
2184 {
2185   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2186                                                 (__v16sf) __B,
2187                                                 (__v16sf) __W,
2188                                                 (__mmask16) __U, __R);
2189 }
2190
2191 extern __inline __m512
2192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2193 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2194 {
2195   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2196                                                 (__v16sf) __B,
2197                                                 (__v16sf)
2198                                                 _mm512_setzero_ps (),
2199                                                 (__mmask16) __U, __R);
2200 }
2201 #else
2202 #define _mm512_add_round_pd(A, B, C)            \
2203     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2204
2205 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2206     (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2207
2208 #define _mm512_maskz_add_round_pd(U, A, B, C)   \
2209     (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2210
2211 #define _mm512_add_round_ps(A, B, C)            \
2212     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2213
2214 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2215     (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2216
2217 #define _mm512_maskz_add_round_ps(U, A, B, C)   \
2218     (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2219
2220 #define _mm512_sub_round_pd(A, B, C)            \
2221     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2222
2223 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2224     (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2225
2226 #define _mm512_maskz_sub_round_pd(U, A, B, C)   \
2227     (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2228
2229 #define _mm512_sub_round_ps(A, B, C)            \
2230     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2231
2232 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2233     (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2234
2235 #define _mm512_maskz_sub_round_ps(U, A, B, C)   \
2236     (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2237 #endif
2238
2239 #ifdef __OPTIMIZE__
2240 extern __inline __m512d
2241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2243 {
2244   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2245                                                  (__v8df) __B,
2246                                                  (__v8df)
2247                                                  _mm512_undefined_pd (),
2248                                                  (__mmask8) -1, __R);
2249 }
2250
2251 extern __inline __m512d
2252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2254                           __m512d __B, const int __R)
2255 {
2256   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2257                                                  (__v8df) __B,
2258                                                  (__v8df) __W,
2259                                                  (__mmask8) __U, __R);
2260 }
2261
2262 extern __inline __m512d
2263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2264 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2265                            const int __R)
2266 {
2267   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2268                                                  (__v8df) __B,
2269                                                  (__v8df)
2270                                                  _mm512_setzero_pd (),
2271                                                  (__mmask8) __U, __R);
2272 }
2273
2274 extern __inline __m512
2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2277 {
2278   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2279                                                 (__v16sf) __B,
2280                                                 (__v16sf)
2281                                                 _mm512_undefined_ps (),
2282                                                 (__mmask16) -1, __R);
2283 }
2284
2285 extern __inline __m512
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2288                           __m512 __B, const int __R)
2289 {
2290   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2291                                                 (__v16sf) __B,
2292                                                 (__v16sf) __W,
2293                                                 (__mmask16) __U, __R);
2294 }
2295
2296 extern __inline __m512
2297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2298 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2299 {
2300   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2301                                                 (__v16sf) __B,
2302                                                 (__v16sf)
2303                                                 _mm512_setzero_ps (),
2304                                                 (__mmask16) __U, __R);
2305 }
2306
2307 extern __inline __m512d
2308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2309 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2310 {
2311   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2312                                                  (__v8df) __V,
2313                                                  (__v8df)
2314                                                  _mm512_undefined_pd (),
2315                                                  (__mmask8) -1, __R);
2316 }
2317
2318 extern __inline __m512d
2319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2320 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2321                           __m512d __V, const int __R)
2322 {
2323   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2324                                                  (__v8df) __V,
2325                                                  (__v8df) __W,
2326                                                  (__mmask8) __U, __R);
2327 }
2328
2329 extern __inline __m512d
2330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2331 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2332                            const int __R)
2333 {
2334   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2335                                                  (__v8df) __V,
2336                                                  (__v8df)
2337                                                  _mm512_setzero_pd (),
2338                                                  (__mmask8) __U, __R);
2339 }
2340
2341 extern __inline __m512
2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2343 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2344 {
2345   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2346                                                 (__v16sf) __B,
2347                                                 (__v16sf)
2348                                                 _mm512_undefined_ps (),
2349                                                 (__mmask16) -1, __R);
2350 }
2351
2352 extern __inline __m512
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2355                           __m512 __B, const int __R)
2356 {
2357   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2358                                                 (__v16sf) __B,
2359                                                 (__v16sf) __W,
2360                                                 (__mmask16) __U, __R);
2361 }
2362
2363 extern __inline __m512
2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2365 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2366 {
2367   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2368                                                 (__v16sf) __B,
2369                                                 (__v16sf)
2370                                                 _mm512_setzero_ps (),
2371                                                 (__mmask16) __U, __R);
2372 }
2373
2374 extern __inline __m128d
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2376 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2377 {
2378   return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2379                                                (__v2df) __B,
2380                                                __R);
2381 }
2382
2383 extern __inline __m128
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2386 {
2387   return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2388                                               (__v4sf) __B,
2389                                               __R);
2390 }
2391
2392 extern __inline __m128d
2393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2394 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2395 {
2396   return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2397                                                (__v2df) __B,
2398                                                __R);
2399 }
2400
2401 extern __inline __m128
2402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2403 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2404 {
2405   return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2406                                               (__v4sf) __B,
2407                                               __R);
2408 }
2409
2410 #else
2411 #define _mm512_mul_round_pd(A, B, C)            \
2412     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2413
2414 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2415     (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2416
2417 #define _mm512_maskz_mul_round_pd(U, A, B, C)   \
2418     (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2419
2420 #define _mm512_mul_round_ps(A, B, C)            \
2421     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2422
2423 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2424     (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2425
2426 #define _mm512_maskz_mul_round_ps(U, A, B, C)   \
2427     (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2428
2429 #define _mm512_div_round_pd(A, B, C)            \
2430     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2431
2432 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2433     (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2434
2435 #define _mm512_maskz_div_round_pd(U, A, B, C)   \
2436     (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2437
2438 #define _mm512_div_round_ps(A, B, C)            \
2439     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2440
2441 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2442     (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2443
2444 #define _mm512_maskz_div_round_ps(U, A, B, C)   \
2445     (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2446
2447 #define _mm_mul_round_sd(A, B, C)            \
2448     (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2449
2450 #define _mm_mul_round_ss(A, B, C)            \
2451     (__m128)__builtin_ia32_mulss_round(A, B, C)
2452
2453 #define _mm_div_round_sd(A, B, C)            \
2454     (__m128d)__builtin_ia32_divsd_round(A, B, C)
2455
2456 #define _mm_div_round_ss(A, B, C)            \
2457     (__m128)__builtin_ia32_divss_round(A, B, C)
2458 #endif
2459
2460 #ifdef __OPTIMIZE__
2461 extern __inline __m512d
2462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2463 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2464 {
2465   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2466                                                  (__v8df) __B,
2467                                                  (__v8df)
2468                                                  _mm512_undefined_pd (),
2469                                                  (__mmask8) -1, __R);
2470 }
2471
2472 extern __inline __m512d
2473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2475                           __m512d __B, const int __R)
2476 {
2477   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2478                                                  (__v8df) __B,
2479                                                  (__v8df) __W,
2480                                                  (__mmask8) __U, __R);
2481 }
2482
2483 extern __inline __m512d
2484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2485 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2486                            const int __R)
2487 {
2488   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2489                                                  (__v8df) __B,
2490                                                  (__v8df)
2491                                                  _mm512_setzero_pd (),
2492                                                  (__mmask8) __U, __R);
2493 }
2494
2495 extern __inline __m512
2496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2497 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2498 {
2499   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2500                                                 (__v16sf) __B,
2501                                                 (__v16sf)
2502                                                 _mm512_undefined_ps (),
2503                                                 (__mmask16) -1, __R);
2504 }
2505
2506 extern __inline __m512
2507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2509                           __m512 __B, const int __R)
2510 {
2511   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2512                                                 (__v16sf) __B,
2513                                                 (__v16sf) __W,
2514                                                 (__mmask16) __U, __R);
2515 }
2516
2517 extern __inline __m512
2518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2519 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2520 {
2521   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2522                                                 (__v16sf) __B,
2523                                                 (__v16sf)
2524                                                 _mm512_setzero_ps (),
2525                                                 (__mmask16) __U, __R);
2526 }
2527
2528 extern __inline __m512d
2529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2531 {
2532   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2533                                                  (__v8df) __B,
2534                                                  (__v8df)
2535                                                  _mm512_undefined_pd (),
2536                                                  (__mmask8) -1, __R);
2537 }
2538
2539 extern __inline __m512d
2540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2541 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2542                           __m512d __B, const int __R)
2543 {
2544   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2545                                                  (__v8df) __B,
2546                                                  (__v8df) __W,
2547                                                  (__mmask8) __U, __R);
2548 }
2549
2550 extern __inline __m512d
2551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2553                            const int __R)
2554 {
2555   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2556                                                  (__v8df) __B,
2557                                                  (__v8df)
2558                                                  _mm512_setzero_pd (),
2559                                                  (__mmask8) __U, __R);
2560 }
2561
2562 extern __inline __m512
2563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2565 {
2566   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2567                                                 (__v16sf) __B,
2568                                                 (__v16sf)
2569                                                 _mm512_undefined_ps (),
2570                                                 (__mmask16) -1, __R);
2571 }
2572
2573 extern __inline __m512
2574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2576                           __m512 __B, const int __R)
2577 {
2578   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2579                                                 (__v16sf) __B,
2580                                                 (__v16sf) __W,
2581                                                 (__mmask16) __U, __R);
2582 }
2583
2584 extern __inline __m512
2585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2587 {
2588   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2589                                                 (__v16sf) __B,
2590                                                 (__v16sf)
2591                                                 _mm512_setzero_ps (),
2592                                                 (__mmask16) __U, __R);
2593 }
2594 #else
2595 #define _mm512_max_round_pd(A, B,  R) \
2596     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2597
2598 #define _mm512_mask_max_round_pd(W, U,  A, B, R) \
2599     (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2600
2601 #define _mm512_maskz_max_round_pd(U, A,  B, R) \
2602     (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2603
2604 #define _mm512_max_round_ps(A, B,  R) \
2605     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2606
2607 #define _mm512_mask_max_round_ps(W, U,  A, B, R) \
2608     (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2609
2610 #define _mm512_maskz_max_round_ps(U, A,  B, R) \
2611     (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2612
2613 #define _mm512_min_round_pd(A, B,  R) \
2614     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2615
2616 #define _mm512_mask_min_round_pd(W, U,  A, B, R) \
2617     (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2618
2619 #define _mm512_maskz_min_round_pd(U, A,  B, R) \
2620     (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2621
2622 #define _mm512_min_round_ps(A, B, R) \
2623     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2624
2625 #define _mm512_mask_min_round_ps(W, U,  A, B, R) \
2626     (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2627
2628 #define _mm512_maskz_min_round_ps(U, A,  B, R) \
2629     (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2630 #endif
2631
2632 #ifdef __OPTIMIZE__
2633 extern __inline __m512d
2634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2635 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2636 {
2637   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2638                                                     (__v8df) __B,
2639                                                     (__v8df)
2640                                                     _mm512_undefined_pd (),
2641                                                     (__mmask8) -1, __R);
2642 }
2643
2644 extern __inline __m512d
2645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2646 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2647                              __m512d __B, const int __R)
2648 {
2649   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2650                                                     (__v8df) __B,
2651                                                     (__v8df) __W,
2652                                                     (__mmask8) __U, __R);
2653 }
2654
2655 extern __inline __m512d
2656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2657 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2658                               const int __R)
2659 {
2660   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2661                                                     (__v8df) __B,
2662                                                     (__v8df)
2663                                                     _mm512_setzero_pd (),
2664                                                     (__mmask8) __U, __R);
2665 }
2666
2667 extern __inline __m512
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2670 {
2671   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2672                                                    (__v16sf) __B,
2673                                                    (__v16sf)
2674                                                    _mm512_undefined_ps (),
2675                                                    (__mmask16) -1, __R);
2676 }
2677
2678 extern __inline __m512
2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2680 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2681                              __m512 __B, const int __R)
2682 {
2683   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2684                                                    (__v16sf) __B,
2685                                                    (__v16sf) __W,
2686                                                    (__mmask16) __U, __R);
2687 }
2688
2689 extern __inline __m512
2690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2691 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2692                               const int __R)
2693 {
2694   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2695                                                    (__v16sf) __B,
2696                                                    (__v16sf)
2697                                                    _mm512_setzero_ps (),
2698                                                    (__mmask16) __U, __R);
2699 }
2700
2701 extern __inline __m128d
2702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2703 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2704 {
2705   return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2706                                                   (__v2df) __B,
2707                                                   __R);
2708 }
2709
2710 extern __inline __m128
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2713 {
2714   return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2715                                                  (__v4sf) __B,
2716                                                  __R);
2717 }
2718 #else
2719 #define _mm512_scalef_round_pd(A, B, C)            \
2720     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2721
2722 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2723     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2724
2725 #define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
2726     (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2727
2728 #define _mm512_scalef_round_ps(A, B, C)            \
2729     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2730
2731 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2732     (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2733
2734 #define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
2735     (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2736
2737 #define _mm_scalef_round_sd(A, B, C)            \
2738     (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2739
2740 #define _mm_scalef_round_ss(A, B, C)            \
2741     (__m128)__builtin_ia32_scalefss_round(A, B, C)
2742 #endif
2743
2744 #ifdef __OPTIMIZE__
2745 extern __inline __m512d
2746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2748 {
2749   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2750                                                     (__v8df) __B,
2751                                                     (__v8df) __C,
2752                                                     (__mmask8) -1, __R);
2753 }
2754
2755 extern __inline __m512d
2756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2757 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2758                             __m512d __C, const int __R)
2759 {
2760   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2761                                                     (__v8df) __B,
2762                                                     (__v8df) __C,
2763                                                     (__mmask8) __U, __R);
2764 }
2765
2766 extern __inline __m512d
2767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2768 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2769                              __mmask8 __U, const int __R)
2770 {
2771   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2772                                                      (__v8df) __B,
2773                                                      (__v8df) __C,
2774                                                      (__mmask8) __U, __R);
2775 }
2776
2777 extern __inline __m512d
2778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2779 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2780                              __m512d __C, const int __R)
2781 {
2782   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2783                                                      (__v8df) __B,
2784                                                      (__v8df) __C,
2785                                                      (__mmask8) __U, __R);
2786 }
2787
2788 extern __inline __m512
2789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2791 {
2792   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2793                                                    (__v16sf) __B,
2794                                                    (__v16sf) __C,
2795                                                    (__mmask16) -1, __R);
2796 }
2797
2798 extern __inline __m512
2799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2801                             __m512 __C, const int __R)
2802 {
2803   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2804                                                    (__v16sf) __B,
2805                                                    (__v16sf) __C,
2806                                                    (__mmask16) __U, __R);
2807 }
2808
2809 extern __inline __m512
2810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2811 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2812                              __mmask16 __U, const int __R)
2813 {
2814   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2815                                                     (__v16sf) __B,
2816                                                     (__v16sf) __C,
2817                                                     (__mmask16) __U, __R);
2818 }
2819
2820 extern __inline __m512
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2823                              __m512 __C, const int __R)
2824 {
2825   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2826                                                     (__v16sf) __B,
2827                                                     (__v16sf) __C,
2828                                                     (__mmask16) __U, __R);
2829 }
2830
2831 extern __inline __m512d
2832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2834 {
2835   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2836                                                     (__v8df) __B,
2837                                                     -(__v8df) __C,
2838                                                     (__mmask8) -1, __R);
2839 }
2840
2841 extern __inline __m512d
2842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2844                             __m512d __C, const int __R)
2845 {
2846   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2847                                                     (__v8df) __B,
2848                                                     -(__v8df) __C,
2849                                                     (__mmask8) __U, __R);
2850 }
2851
2852 extern __inline __m512d
2853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2855                              __mmask8 __U, const int __R)
2856 {
2857   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2858                                                      (__v8df) __B,
2859                                                      (__v8df) __C,
2860                                                      (__mmask8) __U, __R);
2861 }
2862
2863 extern __inline __m512d
2864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2865 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2866                              __m512d __C, const int __R)
2867 {
2868   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2869                                                      (__v8df) __B,
2870                                                      -(__v8df) __C,
2871                                                      (__mmask8) __U, __R);
2872 }
2873
2874 extern __inline __m512
2875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2876 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2877 {
2878   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2879                                                    (__v16sf) __B,
2880                                                    -(__v16sf) __C,
2881                                                    (__mmask16) -1, __R);
2882 }
2883
2884 extern __inline __m512
2885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2886 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2887                             __m512 __C, const int __R)
2888 {
2889   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2890                                                    (__v16sf) __B,
2891                                                    -(__v16sf) __C,
2892                                                    (__mmask16) __U, __R);
2893 }
2894
2895 extern __inline __m512
2896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2897 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2898                              __mmask16 __U, const int __R)
2899 {
2900   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2901                                                     (__v16sf) __B,
2902                                                     (__v16sf) __C,
2903                                                     (__mmask16) __U, __R);
2904 }
2905
2906 extern __inline __m512
2907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2908 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2909                              __m512 __C, const int __R)
2910 {
2911   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2912                                                     (__v16sf) __B,
2913                                                     -(__v16sf) __C,
2914                                                     (__mmask16) __U, __R);
2915 }
2916
2917 extern __inline __m512d
2918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2920 {
2921   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2922                                                        (__v8df) __B,
2923                                                        (__v8df) __C,
2924                                                        (__mmask8) -1, __R);
2925 }
2926
2927 extern __inline __m512d
2928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2930                                __m512d __C, const int __R)
2931 {
2932   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2933                                                        (__v8df) __B,
2934                                                        (__v8df) __C,
2935                                                        (__mmask8) __U, __R);
2936 }
2937
2938 extern __inline __m512d
2939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2941                                 __mmask8 __U, const int __R)
2942 {
2943   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2944                                                         (__v8df) __B,
2945                                                         (__v8df) __C,
2946                                                         (__mmask8) __U, __R);
2947 }
2948
2949 extern __inline __m512d
2950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2951 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2952                                 __m512d __C, const int __R)
2953 {
2954   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2955                                                         (__v8df) __B,
2956                                                         (__v8df) __C,
2957                                                         (__mmask8) __U, __R);
2958 }
2959
2960 extern __inline __m512
2961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2962 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2963 {
2964   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2965                                                       (__v16sf) __B,
2966                                                       (__v16sf) __C,
2967                                                       (__mmask16) -1, __R);
2968 }
2969
2970 extern __inline __m512
2971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2972 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2973                                __m512 __C, const int __R)
2974 {
2975   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2976                                                       (__v16sf) __B,
2977                                                       (__v16sf) __C,
2978                                                       (__mmask16) __U, __R);
2979 }
2980
2981 extern __inline __m512
2982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2984                                 __mmask16 __U, const int __R)
2985 {
2986   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2987                                                        (__v16sf) __B,
2988                                                        (__v16sf) __C,
2989                                                        (__mmask16) __U, __R);
2990 }
2991
2992 extern __inline __m512
2993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2994 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2995                                 __m512 __C, const int __R)
2996 {
2997   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2998                                                        (__v16sf) __B,
2999                                                        (__v16sf) __C,
3000                                                        (__mmask16) __U, __R);
3001 }
3002
3003 extern __inline __m512d
3004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3006 {
3007   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3008                                                        (__v8df) __B,
3009                                                        -(__v8df) __C,
3010                                                        (__mmask8) -1, __R);
3011 }
3012
3013 extern __inline __m512d
3014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3016                                __m512d __C, const int __R)
3017 {
3018   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3019                                                        (__v8df) __B,
3020                                                        -(__v8df) __C,
3021                                                        (__mmask8) __U, __R);
3022 }
3023
3024 extern __inline __m512d
3025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3026 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3027                                 __mmask8 __U, const int __R)
3028 {
3029   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3030                                                         (__v8df) __B,
3031                                                         (__v8df) __C,
3032                                                         (__mmask8) __U, __R);
3033 }
3034
3035 extern __inline __m512d
3036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3037 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3038                                 __m512d __C, const int __R)
3039 {
3040   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3041                                                         (__v8df) __B,
3042                                                         -(__v8df) __C,
3043                                                         (__mmask8) __U, __R);
3044 }
3045
3046 extern __inline __m512
3047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3048 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3049 {
3050   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3051                                                       (__v16sf) __B,
3052                                                       -(__v16sf) __C,
3053                                                       (__mmask16) -1, __R);
3054 }
3055
3056 extern __inline __m512
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3059                                __m512 __C, const int __R)
3060 {
3061   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3062                                                       (__v16sf) __B,
3063                                                       -(__v16sf) __C,
3064                                                       (__mmask16) __U, __R);
3065 }
3066
3067 extern __inline __m512
3068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3070                                 __mmask16 __U, const int __R)
3071 {
3072   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3073                                                        (__v16sf) __B,
3074                                                        (__v16sf) __C,
3075                                                        (__mmask16) __U, __R);
3076 }
3077
3078 extern __inline __m512
3079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3081                                 __m512 __C, const int __R)
3082 {
3083   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3084                                                        (__v16sf) __B,
3085                                                        -(__v16sf) __C,
3086                                                        (__mmask16) __U, __R);
3087 }
3088
3089 extern __inline __m512d
3090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3092 {
3093   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3094                                                     (__v8df) __B,
3095                                                     (__v8df) __C,
3096                                                     (__mmask8) -1, __R);
3097 }
3098
3099 extern __inline __m512d
3100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3102                              __m512d __C, const int __R)
3103 {
3104   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3105                                                      (__v8df) __B,
3106                                                      (__v8df) __C,
3107                                                      (__mmask8) __U, __R);
3108 }
3109
3110 extern __inline __m512d
3111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3112 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3113                               __mmask8 __U, const int __R)
3114 {
3115   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3116                                                      (__v8df) __B,
3117                                                      (__v8df) __C,
3118                                                      (__mmask8) __U, __R);
3119 }
3120
3121 extern __inline __m512d
3122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3124                               __m512d __C, const int __R)
3125 {
3126   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3127                                                      (__v8df) __B,
3128                                                      (__v8df) __C,
3129                                                      (__mmask8) __U, __R);
3130 }
3131
3132 extern __inline __m512
3133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3134 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3135 {
3136   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3137                                                    (__v16sf) __B,
3138                                                    (__v16sf) __C,
3139                                                    (__mmask16) -1, __R);
3140 }
3141
3142 extern __inline __m512
3143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3144 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3145                              __m512 __C, const int __R)
3146 {
3147   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3148                                                     (__v16sf) __B,
3149                                                     (__v16sf) __C,
3150                                                     (__mmask16) __U, __R);
3151 }
3152
3153 extern __inline __m512
3154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3155 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3156                               __mmask16 __U, const int __R)
3157 {
3158   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3159                                                     (__v16sf) __B,
3160                                                     (__v16sf) __C,
3161                                                     (__mmask16) __U, __R);
3162 }
3163
3164 extern __inline __m512
3165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3166 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3167                               __m512 __C, const int __R)
3168 {
3169   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3170                                                     (__v16sf) __B,
3171                                                     (__v16sf) __C,
3172                                                     (__mmask16) __U, __R);
3173 }
3174
3175 extern __inline __m512d
3176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3178 {
3179   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3180                                                     (__v8df) __B,
3181                                                     -(__v8df) __C,
3182                                                     (__mmask8) -1, __R);
3183 }
3184
3185 extern __inline __m512d
3186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3187 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3188                              __m512d __C, const int __R)
3189 {
3190   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3191                                                      (__v8df) __B,
3192                                                      (__v8df) __C,
3193                                                      (__mmask8) __U, __R);
3194 }
3195
3196 extern __inline __m512d
3197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3198 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3199                               __mmask8 __U, const int __R)
3200 {
3201   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3202                                                       (__v8df) __B,
3203                                                       (__v8df) __C,
3204                                                       (__mmask8) __U, __R);
3205 }
3206
3207 extern __inline __m512d
3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3209 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3210                               __m512d __C, const int __R)
3211 {
3212   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3213                                                      (__v8df) __B,
3214                                                      -(__v8df) __C,
3215                                                      (__mmask8) __U, __R);
3216 }
3217
3218 extern __inline __m512
3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3220 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3221 {
3222   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3223                                                    (__v16sf) __B,
3224                                                    -(__v16sf) __C,
3225                                                    (__mmask16) -1, __R);
3226 }
3227
3228 extern __inline __m512
3229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3230 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3231                              __m512 __C, const int __R)
3232 {
3233   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3234                                                     (__v16sf) __B,
3235                                                     (__v16sf) __C,
3236                                                     (__mmask16) __U, __R);
3237 }
3238
3239 extern __inline __m512
3240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3241 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3242                               __mmask16 __U, const int __R)
3243 {
3244   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3245                                                      (__v16sf) __B,
3246                                                      (__v16sf) __C,
3247                                                      (__mmask16) __U, __R);
3248 }
3249
3250 extern __inline __m512
3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3252 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3253                               __m512 __C, const int __R)
3254 {
3255   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3256                                                     (__v16sf) __B,
3257                                                     -(__v16sf) __C,
3258                                                     (__mmask16) __U, __R);
3259 }
3260 #else
3261 #define _mm512_fmadd_round_pd(A, B, C, R)            \
3262     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3263
3264 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
3265     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3266
3267 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
3268     (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3269
3270 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
3271     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3272
3273 #define _mm512_fmadd_round_ps(A, B, C, R)            \
3274     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3275
3276 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
3277     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3278
3279 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
3280     (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3281
3282 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
3283     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3284
3285 #define _mm512_fmsub_round_pd(A, B, C, R)            \
3286     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3287
3288 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
3289     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3290
3291 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
3292     (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3293
3294 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
3295     (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3296
3297 #define _mm512_fmsub_round_ps(A, B, C, R)            \
3298     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3299
3300 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
3301     (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3302
3303 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
3304     (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3305
3306 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
3307     (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3308
3309 #define _mm512_fmaddsub_round_pd(A, B, C, R)            \
3310     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3311
3312 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
3313     (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3314
3315 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
3316     (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3317
3318 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
3319     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3320
3321 #define _mm512_fmaddsub_round_ps(A, B, C, R)            \
3322     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3323
3324 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
3325     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3326
3327 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
3328     (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3329
3330 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
3331     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3332
3333 #define _mm512_fmsubadd_round_pd(A, B, C, R)            \
3334     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3335
3336 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
3337     (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3338
3339 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
3340     (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3341
3342 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
3343     (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3344
3345 #define _mm512_fmsubadd_round_ps(A, B, C, R)            \
3346     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3347
3348 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
3349     (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3350
3351 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
3352     (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3353
3354 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
3355     (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3356
3357 #define _mm512_fnmadd_round_pd(A, B, C, R)            \
3358     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3359
3360 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
3361     (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3362
3363 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
3364     (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3365
3366 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
3367     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3368
3369 #define _mm512_fnmadd_round_ps(A, B, C, R)            \
3370     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3371
3372 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
3373     (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3374
3375 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
3376     (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3377
3378 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
3379     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3380
3381 #define _mm512_fnmsub_round_pd(A, B, C, R)            \
3382     (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3383
3384 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
3385     (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3386
3387 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
3388     (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3389
3390 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
3391     (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3392
3393 #define _mm512_fnmsub_round_ps(A, B, C, R)            \
3394     (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3395
3396 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
3397     (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3398
3399 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
3400     (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3401
3402 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
3403     (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3404 #endif
3405
3406 extern __inline __m512i
3407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408 _mm512_abs_epi64 (__m512i __A)
3409 {
3410   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3411                                                  (__v8di)
3412                                                  _mm512_undefined_epi32 (),
3413                                                  (__mmask8) -1);
3414 }
3415
3416 extern __inline __m512i
3417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3418 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3419 {
3420   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3421                                                  (__v8di) __W,
3422                                                  (__mmask8) __U);
3423 }
3424
3425 extern __inline __m512i
3426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3428 {
3429   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3430                                                  (__v8di)
3431                                                  _mm512_setzero_si512 (),
3432                                                  (__mmask8) __U);
3433 }
3434
3435 extern __inline __m512i
3436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3437 _mm512_abs_epi32 (__m512i __A)
3438 {
3439   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3440                                                  (__v16si)
3441                                                  _mm512_undefined_epi32 (),
3442                                                  (__mmask16) -1);
3443 }
3444
3445 extern __inline __m512i
3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3448 {
3449   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3450                                                  (__v16si) __W,
3451                                                  (__mmask16) __U);
3452 }
3453
3454 extern __inline __m512i
3455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3456 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3457 {
3458   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3459                                                  (__v16si)
3460                                                  _mm512_setzero_si512 (),
3461                                                  (__mmask16) __U);
3462 }
3463
3464 extern __inline __m512
3465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466 _mm512_broadcastss_ps (__m128 __A)
3467 {
3468   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3469                                                  (__v16sf)
3470                                                  _mm512_undefined_ps (),
3471                                                  (__mmask16) -1);
3472 }
3473
3474 extern __inline __m512
3475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3476 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3477 {
3478   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3479                                                  (__v16sf) __O, __M);
3480 }
3481
3482 extern __inline __m512
3483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3484 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3485 {
3486   return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3487                                                  (__v16sf)
3488                                                  _mm512_setzero_ps (),
3489                                                  __M);
3490 }
3491
3492 extern __inline __m512d
3493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3494 _mm512_broadcastsd_pd (__m128d __A)
3495 {
3496   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3497                                                   (__v8df)
3498                                                   _mm512_undefined_pd (),
3499                                                   (__mmask8) -1);
3500 }
3501
3502 extern __inline __m512d
3503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3504 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3505 {
3506   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3507                                                   (__v8df) __O, __M);
3508 }
3509
3510 extern __inline __m512d
3511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3513 {
3514   return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3515                                                   (__v8df)
3516                                                   _mm512_setzero_pd (),
3517                                                   __M);
3518 }
3519
3520 extern __inline __m512i
3521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3522 _mm512_broadcastd_epi32 (__m128i __A)
3523 {
3524   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3525                                                   (__v16si)
3526                                                   _mm512_undefined_epi32 (),
3527                                                   (__mmask16) -1);
3528 }
3529
3530 extern __inline __m512i
3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3533 {
3534   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3535                                                   (__v16si) __O, __M);
3536 }
3537
3538 extern __inline __m512i
3539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3540 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3541 {
3542   return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3543                                                   (__v16si)
3544                                                   _mm512_setzero_si512 (),
3545                                                   __M);
3546 }
3547
3548 extern __inline __m512i
3549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3550 _mm512_set1_epi32 (int __A)
3551 {
3552   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3553                                                            (__v16si)
3554                                                            _mm512_undefined_epi32 (),
3555                                                            (__mmask16)(-1));
3556 }
3557
3558 extern __inline __m512i
3559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3561 {
3562   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3563                                                            __M);
3564 }
3565
3566 extern __inline __m512i
3567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3568 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3569 {
3570   return (__m512i)
3571          __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3572                                                  (__v16si) _mm512_setzero_si512 (),
3573                                                  __M);
3574 }
3575
3576 extern __inline __m512i
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm512_broadcastq_epi64 (__m128i __A)
3579 {
3580   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3581                                                   (__v8di)
3582                                                   _mm512_undefined_epi32 (),
3583                                                   (__mmask8) -1);
3584 }
3585
3586 extern __inline __m512i
3587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3588 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3589 {
3590   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3591                                                   (__v8di) __O, __M);
3592 }
3593
3594 extern __inline __m512i
3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3596 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3597 {
3598   return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3599                                                   (__v8di)
3600                                                   _mm512_setzero_si512 (),
3601                                                   __M);
3602 }
3603
3604 extern __inline __m512i
3605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606 _mm512_set1_epi64 (long long __A)
3607 {
3608   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3609                                                            (__v8di)
3610                                                            _mm512_undefined_epi32 (),
3611                                                            (__mmask8)(-1));
3612 }
3613
3614 extern __inline __m512i
3615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3616 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3617 {
3618   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3619                                                            __M);
3620 }
3621
3622 extern __inline __m512i
3623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3624 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3625 {
3626   return (__m512i)
3627          __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3628                                                  (__v8di) _mm512_setzero_si512 (),
3629                                                  __M);
3630 }
3631
3632 extern __inline __m512
3633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3634 _mm512_broadcast_f32x4 (__m128 __A)
3635 {
3636   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3637                                                      (__v16sf)
3638                                                      _mm512_undefined_ps (),
3639                                                      (__mmask16) -1);
3640 }
3641
3642 extern __inline __m512
3643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3644 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3645 {
3646   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3647                                                      (__v16sf) __O,
3648                                                      __M);
3649 }
3650
3651 extern __inline __m512
3652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3653 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3654 {
3655   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3656                                                      (__v16sf)
3657                                                      _mm512_setzero_ps (),
3658                                                      __M);
3659 }
3660
3661 extern __inline __m512i
3662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3663 _mm512_broadcast_i32x4 (__m128i __A)
3664 {
3665   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3666                                                       (__v16si)
3667                                                       _mm512_undefined_epi32 (),
3668                                                       (__mmask16) -1);
3669 }
3670
3671 extern __inline __m512i
3672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3673 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3674 {
3675   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3676                                                       (__v16si) __O,
3677                                                       __M);
3678 }
3679
3680 extern __inline __m512i
3681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3682 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3683 {
3684   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3685                                                       (__v16si)
3686                                                       _mm512_setzero_si512 (),
3687                                                       __M);
3688 }
3689
3690 extern __inline __m512d
3691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3692 _mm512_broadcast_f64x4 (__m256d __A)
3693 {
3694   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3695                                                       (__v8df)
3696                                                       _mm512_undefined_pd (),
3697                                                       (__mmask8) -1);
3698 }
3699
3700 extern __inline __m512d
3701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3702 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3703 {
3704   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3705                                                       (__v8df) __O,
3706                                                       __M);
3707 }
3708
3709 extern __inline __m512d
3710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3711 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3712 {
3713   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3714                                                       (__v8df)
3715                                                       _mm512_setzero_pd (),
3716                                                       __M);
3717 }
3718
3719 extern __inline __m512i
3720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3721 _mm512_broadcast_i64x4 (__m256i __A)
3722 {
3723   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3724                                                       (__v8di)
3725                                                       _mm512_undefined_epi32 (),
3726                                                       (__mmask8) -1);
3727 }
3728
3729 extern __inline __m512i
3730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3731 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3732 {
3733   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3734                                                       (__v8di) __O,
3735                                                       __M);
3736 }
3737
3738 extern __inline __m512i
3739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3740 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3741 {
3742   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3743                                                       (__v8di)
3744                                                       _mm512_setzero_si512 (),
3745                                                       __M);
3746 }
3747
3748 typedef enum
3749 {
3750   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3751   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3752   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3753   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3754   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3755   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3756   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3757   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3758   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3759   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3760   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3761   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3762   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3763   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3764   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3765   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3766   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3767   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3768   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3769   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3770   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3771   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3772   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3773   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3774   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3775   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3776   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3777   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3778   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3779   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3780   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3781   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3782   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3783   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3784   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3785   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3786   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3787   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3788   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3789   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3790   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3791   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3792   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3793   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3794   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3795   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3796   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3797   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3798   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3799   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3800   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3801   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3802   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3803   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3804   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3805   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3806   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3807   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3808   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3809   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3810   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3811   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3812   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3813   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3814   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3815   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3816   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3817   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3818   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3819   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3820   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3821   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3822   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3823   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3824   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3825   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3826   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3827   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3828   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3829   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3830   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3831   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3832   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3833   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3834   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3835   _MM_PERM_DDDD = 0xFF
3836 } _MM_PERM_ENUM;
3837
3838 #ifdef __OPTIMIZE__
3839 extern __inline __m512i
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3842 {
3843   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3844                                                   __mask,
3845                                                   (__v16si)
3846                                                   _mm512_undefined_epi32 (),
3847                                                   (__mmask16) -1);
3848 }
3849
3850 extern __inline __m512i
3851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3852 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3853                            _MM_PERM_ENUM __mask)
3854 {
3855   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3856                                                   __mask,
3857                                                   (__v16si) __W,
3858                                                   (__mmask16) __U);
3859 }
3860
3861 extern __inline __m512i
3862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3863 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3864 {
3865   return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3866                                                   __mask,
3867                                                   (__v16si)
3868                                                   _mm512_setzero_si512 (),
3869                                                   (__mmask16) __U);
3870 }
3871
3872 extern __inline __m512i
3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3875 {
3876   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3877                                                    (__v8di) __B, __imm,
3878                                                    (__v8di)
3879                                                    _mm512_undefined_epi32 (),
3880                                                    (__mmask8) -1);
3881 }
3882
3883 extern __inline __m512i
3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3886                            __m512i __B, const int __imm)
3887 {
3888   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3889                                                    (__v8di) __B, __imm,
3890                                                    (__v8di) __W,
3891                                                    (__mmask8) __U);
3892 }
3893
3894 extern __inline __m512i
3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3897                             const int __imm)
3898 {
3899   return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3900                                                    (__v8di) __B, __imm,
3901                                                    (__v8di)
3902                                                    _mm512_setzero_si512 (),
3903                                                    (__mmask8) __U);
3904 }
3905
3906 extern __inline __m512i
3907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3908 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3909 {
3910   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3911                                                    (__v16si) __B,
3912                                                    __imm,
3913                                                    (__v16si)
3914                                                    _mm512_undefined_epi32 (),
3915                                                    (__mmask16) -1);
3916 }
3917
3918 extern __inline __m512i
3919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3920 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3921                            __m512i __B, const int __imm)
3922 {
3923   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3924                                                    (__v16si) __B,
3925                                                    __imm,
3926                                                    (__v16si) __W,
3927                                                    (__mmask16) __U);
3928 }
3929
3930 extern __inline __m512i
3931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3932 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3933                             const int __imm)
3934 {
3935   return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3936                                                    (__v16si) __B,
3937                                                    __imm,
3938                                                    (__v16si)
3939                                                    _mm512_setzero_si512 (),
3940                                                    (__mmask16) __U);
3941 }
3942
3943 extern __inline __m512d
3944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3945 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3946 {
3947   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3948                                                    (__v8df) __B, __imm,
3949                                                    (__v8df)
3950                                                    _mm512_undefined_pd (),
3951                                                    (__mmask8) -1);
3952 }
3953
3954 extern __inline __m512d
3955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3956 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3957                            __m512d __B, const int __imm)
3958 {
3959   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3960                                                    (__v8df) __B, __imm,
3961                                                    (__v8df) __W,
3962                                                    (__mmask8) __U);
3963 }
3964
3965 extern __inline __m512d
3966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3967 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3968                             const int __imm)
3969 {
3970   return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3971                                                    (__v8df) __B, __imm,
3972                                                    (__v8df)
3973                                                    _mm512_setzero_pd (),
3974                                                    (__mmask8) __U);
3975 }
3976
3977 extern __inline __m512
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3980 {
3981   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3982                                                   (__v16sf) __B, __imm,
3983                                                   (__v16sf)
3984                                                   _mm512_undefined_ps (),
3985                                                   (__mmask16) -1);
3986 }
3987
3988 extern __inline __m512
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3991                            __m512 __B, const int __imm)
3992 {
3993   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3994                                                   (__v16sf) __B, __imm,
3995                                                   (__v16sf) __W,
3996                                                   (__mmask16) __U);
3997 }
3998
3999 extern __inline __m512
4000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4002                             const int __imm)
4003 {
4004   return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4005                                                   (__v16sf) __B, __imm,
4006                                                   (__v16sf)
4007                                                   _mm512_setzero_ps (),
4008                                                   (__mmask16) __U);
4009 }
4010
4011 #else
4012 #define _mm512_shuffle_epi32(X, C)                                      \
4013   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4014     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4015     (__mmask16)-1))
4016
4017 #define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
4018   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4019     (__v16si)(__m512i)(W),\
4020     (__mmask16)(U)))
4021
4022 #define _mm512_maskz_shuffle_epi32(U, X, C)                             \
4023   ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4024     (__v16si)(__m512i)_mm512_setzero_si512 (),\
4025     (__mmask16)(U)))
4026
4027 #define _mm512_shuffle_i64x2(X, Y, C)                                   \
4028   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4029       (__v8di)(__m512i)(Y), (int)(C),\
4030     (__v8di)(__m512i)_mm512_undefined_epi32 (),\
4031     (__mmask8)-1))
4032
4033 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
4034   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4035       (__v8di)(__m512i)(Y), (int)(C),\
4036     (__v8di)(__m512i)(W),\
4037     (__mmask8)(U)))
4038
4039 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
4040   ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
4041       (__v8di)(__m512i)(Y), (int)(C),\
4042     (__v8di)(__m512i)_mm512_setzero_si512 (),\
4043     (__mmask8)(U)))
4044
4045 #define _mm512_shuffle_i32x4(X, Y, C)                                   \
4046   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4047       (__v16si)(__m512i)(Y), (int)(C),\
4048     (__v16si)(__m512i)_mm512_undefined_epi32 (),\
4049     (__mmask16)-1))
4050
4051 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
4052   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4053       (__v16si)(__m512i)(Y), (int)(C),\
4054     (__v16si)(__m512i)(W),\
4055     (__mmask16)(U)))
4056
4057 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
4058   ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
4059       (__v16si)(__m512i)(Y), (int)(C),\
4060     (__v16si)(__m512i)_mm512_setzero_si512 (),\
4061     (__mmask16)(U)))
4062
4063 #define _mm512_shuffle_f64x2(X, Y, C)                                   \
4064   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4065       (__v8df)(__m512d)(Y), (int)(C),\
4066     (__v8df)(__m512d)_mm512_undefined_pd(),\
4067     (__mmask8)-1))
4068
4069 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
4070   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
4071       (__v8df)(__m512d)(Y), (int)(C),\
4072     (__v8df)(__m512d)(W),\
4073     (__mmask8)(U)))
4074
4075 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
4076   ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
4077       (__v8df)(__m512d)(Y), (int)(C),\
4078     (__v8df)(__m512d)_mm512_setzero_pd(),\
4079     (__mmask8)(U)))
4080
4081 #define _mm512_shuffle_f32x4(X, Y, C)                                  \
4082   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4083       (__v16sf)(__m512)(Y), (int)(C),\
4084     (__v16sf)(__m512)_mm512_undefined_ps(),\
4085     (__mmask16)-1))
4086
4087 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
4088   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4089       (__v16sf)(__m512)(Y), (int)(C),\
4090     (__v16sf)(__m512)(W),\
4091     (__mmask16)(U)))
4092
4093 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
4094   ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
4095       (__v16sf)(__m512)(Y), (int)(C),\
4096     (__v16sf)(__m512)_mm512_setzero_ps(),\
4097     (__mmask16)(U)))
4098 #endif
4099
4100 extern __inline __m512i
4101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4102 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4103 {
4104   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4105                                                   (__v16si) __B,
4106                                                   (__v16si)
4107                                                   _mm512_undefined_epi32 (),
4108                                                   (__mmask16) -1);
4109 }
4110
4111 extern __inline __m512i
4112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4113 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4114 {
4115   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4116                                                   (__v16si) __B,
4117                                                   (__v16si) __W,
4118                                                   (__mmask16) __U);
4119 }
4120
4121 extern __inline __m512i
4122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4123 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4124 {
4125   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4126                                                   (__v16si) __B,
4127                                                   (__v16si)
4128                                                   _mm512_setzero_si512 (),
4129                                                   (__mmask16) __U);
4130 }
4131
4132 extern __inline __m512i
4133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4134 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4135 {
4136   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4137                                                   (__v16si) __B,
4138                                                   (__v16si)
4139                                                   _mm512_undefined_epi32 (),
4140                                                   (__mmask16) -1);
4141 }
4142
4143 extern __inline __m512i
4144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4145 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4146 {
4147   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4148                                                   (__v16si) __B,
4149                                                   (__v16si) __W,
4150                                                   (__mmask16) __U);
4151 }
4152
4153 extern __inline __m512i
4154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4155 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4156 {
4157   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4158                                                   (__v16si) __B,
4159                                                   (__v16si)
4160                                                   _mm512_setzero_si512 (),
4161                                                   (__mmask16) __U);
4162 }
4163
4164 extern __inline __m512i
4165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4166 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4167 {
4168   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4169                                                   (__v8di) __B,
4170                                                   (__v8di)
4171                                                   _mm512_undefined_epi32 (),
4172                                                   (__mmask8) -1);
4173 }
4174
4175 extern __inline __m512i
4176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4177 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4178 {
4179   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4180                                                   (__v8di) __B,
4181                                                   (__v8di) __W,
4182                                                   (__mmask8) __U);
4183 }
4184
4185 extern __inline __m512i
4186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4187 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4188 {
4189   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4190                                                   (__v8di) __B,
4191                                                   (__v8di)
4192                                                   _mm512_setzero_si512 (),
4193                                                   (__mmask8) __U);
4194 }
4195
4196 extern __inline __m512i
4197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4198 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4199 {
4200   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4201                                                   (__v8di) __B,
4202                                                   (__v8di)
4203                                                   _mm512_undefined_epi32 (),
4204                                                   (__mmask8) -1);
4205 }
4206
4207 extern __inline __m512i
4208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4209 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4210 {
4211   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4212                                                   (__v8di) __B,
4213                                                   (__v8di) __W,
4214                                                   (__mmask8) __U);
4215 }
4216
4217 extern __inline __m512i
4218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4219 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4220 {
4221   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4222                                                   (__v8di) __B,
4223                                                   (__v8di)
4224                                                   _mm512_setzero_si512 (),
4225                                                   (__mmask8) __U);
4226 }
4227
4228 #ifdef __OPTIMIZE__
4229 extern __inline __m256i
4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4232 {
4233   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4234                                                      (__v8si)
4235                                                      _mm256_undefined_si256 (),
4236                                                      (__mmask8) -1, __R);
4237 }
4238
4239 extern __inline __m256i
4240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4241 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4242                                 const int __R)
4243 {
4244   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4245                                                      (__v8si) __W,
4246                                                      (__mmask8) __U, __R);
4247 }
4248
4249 extern __inline __m256i
4250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4251 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4252 {
4253   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4254                                                      (__v8si)
4255                                                      _mm256_setzero_si256 (),
4256                                                      (__mmask8) __U, __R);
4257 }
4258
4259 extern __inline __m256i
4260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4261 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4262 {
4263   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4264                                                       (__v8si)
4265                                                       _mm256_undefined_si256 (),
4266                                                       (__mmask8) -1, __R);
4267 }
4268
4269 extern __inline __m256i
4270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4271 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4272                                 const int __R)
4273 {
4274   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4275                                                       (__v8si) __W,
4276                                                       (__mmask8) __U, __R);
4277 }
4278
4279 extern __inline __m256i
4280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4281 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4282 {
4283   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4284                                                       (__v8si)
4285                                                       _mm256_setzero_si256 (),
4286                                                       (__mmask8) __U, __R);
4287 }
4288 #else
4289 #define _mm512_cvtt_roundpd_epi32(A, B)              \
4290     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4291
4292 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
4293     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4294
4295 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
4296     ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4297
4298 #define _mm512_cvtt_roundpd_epu32(A, B)              \
4299     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4300
4301 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
4302     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4303
4304 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
4305     ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4306 #endif
4307
4308 #ifdef __OPTIMIZE__
4309 extern __inline __m256i
4310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4311 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4312 {
4313   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4314                                                     (__v8si)
4315                                                     _mm256_undefined_si256 (),
4316                                                     (__mmask8) -1, __R);
4317 }
4318
4319 extern __inline __m256i
4320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4321 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4322                                const int __R)
4323 {
4324   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4325                                                     (__v8si) __W,
4326                                                     (__mmask8) __U, __R);
4327 }
4328
4329 extern __inline __m256i
4330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4331 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4332 {
4333   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4334                                                     (__v8si)
4335                                                     _mm256_setzero_si256 (),
4336                                                     (__mmask8) __U, __R);
4337 }
4338
4339 extern __inline __m256i
4340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4341 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4342 {
4343   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4344                                                      (__v8si)
4345                                                      _mm256_undefined_si256 (),
4346                                                      (__mmask8) -1, __R);
4347 }
4348
4349 extern __inline __m256i
4350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4351 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4352                                const int __R)
4353 {
4354   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4355                                                      (__v8si) __W,
4356                                                      (__mmask8) __U, __R);
4357 }
4358
4359 extern __inline __m256i
4360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4361 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4362 {
4363   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4364                                                      (__v8si)
4365                                                      _mm256_setzero_si256 (),
4366                                                      (__mmask8) __U, __R);
4367 }
4368 #else
4369 #define _mm512_cvt_roundpd_epi32(A, B)              \
4370     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4371
4372 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
4373     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4374
4375 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
4376     ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4377
4378 #define _mm512_cvt_roundpd_epu32(A, B)              \
4379     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4380
4381 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
4382     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4383
4384 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
4385     ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4386 #endif
4387
4388 #ifdef __OPTIMIZE__
4389 extern __inline __m512i
4390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4391 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4392 {
4393   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4394                                                      (__v16si)
4395                                                      _mm512_undefined_epi32 (),
4396                                                      (__mmask16) -1, __R);
4397 }
4398
4399 extern __inline __m512i
4400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4401 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4402                                 const int __R)
4403 {
4404   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4405                                                      (__v16si) __W,
4406                                                      (__mmask16) __U, __R);
4407 }
4408
4409 extern __inline __m512i
4410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4411 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4412 {
4413   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4414                                                      (__v16si)
4415                                                      _mm512_setzero_si512 (),
4416                                                      (__mmask16) __U, __R);
4417 }
4418
4419 extern __inline __m512i
4420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4421 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4422 {
4423   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4424                                                       (__v16si)
4425                                                       _mm512_undefined_epi32 (),
4426                                                       (__mmask16) -1, __R);
4427 }
4428
4429 extern __inline __m512i
4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4431 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4432                                 const int __R)
4433 {
4434   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4435                                                       (__v16si) __W,
4436                                                       (__mmask16) __U, __R);
4437 }
4438
4439 extern __inline __m512i
4440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4441 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4442 {
4443   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4444                                                       (__v16si)
4445                                                       _mm512_setzero_si512 (),
4446                                                       (__mmask16) __U, __R);
4447 }
4448 #else
4449 #define _mm512_cvtt_roundps_epi32(A, B)              \
4450     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4451
4452 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
4453     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4454
4455 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
4456     ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4457
4458 #define _mm512_cvtt_roundps_epu32(A, B)              \
4459     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4460
4461 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
4462     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4463
4464 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
4465     ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4466 #endif
4467
4468 #ifdef __OPTIMIZE__
4469 extern __inline __m512i
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4472 {
4473   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4474                                                     (__v16si)
4475                                                     _mm512_undefined_epi32 (),
4476                                                     (__mmask16) -1, __R);
4477 }
4478
4479 extern __inline __m512i
4480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4482                                const int __R)
4483 {
4484   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4485                                                     (__v16si) __W,
4486                                                     (__mmask16) __U, __R);
4487 }
4488
4489 extern __inline __m512i
4490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4491 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4492 {
4493   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4494                                                     (__v16si)
4495                                                     _mm512_setzero_si512 (),
4496                                                     (__mmask16) __U, __R);
4497 }
4498
4499 extern __inline __m512i
4500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4501 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4502 {
4503   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4504                                                      (__v16si)
4505                                                      _mm512_undefined_epi32 (),
4506                                                      (__mmask16) -1, __R);
4507 }
4508
4509 extern __inline __m512i
4510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4511 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4512                                const int __R)
4513 {
4514   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4515                                                      (__v16si) __W,
4516                                                      (__mmask16) __U, __R);
4517 }
4518
4519 extern __inline __m512i
4520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4521 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4522 {
4523   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4524                                                      (__v16si)
4525                                                      _mm512_setzero_si512 (),
4526                                                      (__mmask16) __U, __R);
4527 }
4528 #else
4529 #define _mm512_cvt_roundps_epi32(A, B)              \
4530     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4531
4532 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
4533     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4534
4535 #define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
4536     ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4537
4538 #define _mm512_cvt_roundps_epu32(A, B)              \
4539     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
4540
4541 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
4542     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4543
4544 #define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
4545     ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4546 #endif
4547
4548 extern __inline __m128d
4549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4550 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4551 {
4552   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4553 }
4554
4555 #ifdef __x86_64__
4556 #ifdef __OPTIMIZE__
4557 extern __inline __m128d
4558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4559 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4560 {
4561   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4562 }
4563
4564 extern __inline __m128d
4565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4566 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4567 {
4568   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4569 }
4570
4571 extern __inline __m128d
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4574 {
4575   return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4576 }
4577 #else
4578 #define _mm_cvt_roundu64_sd(A, B, C)   \
4579     (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4580
4581 #define _mm_cvt_roundi64_sd(A, B, C)   \
4582     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4583
4584 #define _mm_cvt_roundsi64_sd(A, B, C)   \
4585     (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4586 #endif
4587
4588 #endif
4589
4590 #ifdef __OPTIMIZE__
4591 extern __inline __m128
4592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4593 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4594 {
4595   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4596 }
4597
4598 extern __inline __m128
4599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4600 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4601 {
4602   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4603 }
4604
4605 extern __inline __m128
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4607 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4608 {
4609   return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4610 }
4611 #else
4612 #define _mm_cvt_roundu32_ss(A, B, C)   \
4613     (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4614
4615 #define _mm_cvt_roundi32_ss(A, B, C)   \
4616     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4617
4618 #define _mm_cvt_roundsi32_ss(A, B, C)   \
4619     (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4620 #endif
4621
4622 #ifdef __x86_64__
4623 #ifdef __OPTIMIZE__
4624 extern __inline __m128
4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4626 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4627 {
4628   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4629 }
4630
4631 extern __inline __m128
4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4634 {
4635   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4636 }
4637
4638 extern __inline __m128
4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4640 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4641 {
4642   return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4643 }
4644 #else
4645 #define _mm_cvt_roundu64_ss(A, B, C)   \
4646     (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4647
4648 #define _mm_cvt_roundi64_ss(A, B, C)   \
4649     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4650
4651 #define _mm_cvt_roundsi64_ss(A, B, C)   \
4652     (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4653 #endif
4654
4655 #endif
4656
4657 extern __inline __m128i
4658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4659 _mm512_cvtepi32_epi8 (__m512i __A)
4660 {
4661   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4662                                                   (__v16qi)
4663                                                   _mm_undefined_si128 (),
4664                                                   (__mmask16) -1);
4665 }
4666
4667 extern __inline void
4668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4670 {
4671   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4672 }
4673
4674 extern __inline __m128i
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4677 {
4678   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4679                                                   (__v16qi) __O, __M);
4680 }
4681
4682 extern __inline __m128i
4683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4684 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4685 {
4686   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4687                                                   (__v16qi)
4688                                                   _mm_setzero_si128 (),
4689                                                   __M);
4690 }
4691
4692 extern __inline __m128i
4693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4694 _mm512_cvtsepi32_epi8 (__m512i __A)
4695 {
4696   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4697                                                    (__v16qi)
4698                                                    _mm_undefined_si128 (),
4699                                                    (__mmask16) -1);
4700 }
4701
4702 extern __inline void
4703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4704 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4705 {
4706   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4707 }
4708
4709 extern __inline __m128i
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4712 {
4713   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4714                                                    (__v16qi) __O, __M);
4715 }
4716
4717 extern __inline __m128i
4718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4719 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4720 {
4721   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4722                                                    (__v16qi)
4723                                                    _mm_setzero_si128 (),
4724                                                    __M);
4725 }
4726
4727 extern __inline __m128i
4728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4729 _mm512_cvtusepi32_epi8 (__m512i __A)
4730 {
4731   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4732                                                     (__v16qi)
4733                                                     _mm_undefined_si128 (),
4734                                                     (__mmask16) -1);
4735 }
4736
4737 extern __inline void
4738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4739 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4740 {
4741   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4742 }
4743
4744 extern __inline __m128i
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4747 {
4748   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4749                                                     (__v16qi) __O,
4750                                                     __M);
4751 }
4752
4753 extern __inline __m128i
4754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4755 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4756 {
4757   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4758                                                     (__v16qi)
4759                                                     _mm_setzero_si128 (),
4760                                                     __M);
4761 }
4762
4763 extern __inline __m256i
4764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4765 _mm512_cvtepi32_epi16 (__m512i __A)
4766 {
4767   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4768                                                   (__v16hi)
4769                                                   _mm256_undefined_si256 (),
4770                                                   (__mmask16) -1);
4771 }
4772
4773 extern __inline void
4774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4775 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4776 {
4777   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4778 }
4779
4780 extern __inline __m256i
4781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4783 {
4784   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4785                                                   (__v16hi) __O, __M);
4786 }
4787
4788 extern __inline __m256i
4789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4790 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4791 {
4792   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4793                                                   (__v16hi)
4794                                                   _mm256_setzero_si256 (),
4795                                                   __M);
4796 }
4797
4798 extern __inline __m256i
4799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4800 _mm512_cvtsepi32_epi16 (__m512i __A)
4801 {
4802   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4803                                                    (__v16hi)
4804                                                    _mm256_undefined_si256 (),
4805                                                    (__mmask16) -1);
4806 }
4807
4808 extern __inline void
4809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4810 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4811 {
4812   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4813 }
4814
4815 extern __inline __m256i
4816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4817 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4818 {
4819   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4820                                                    (__v16hi) __O, __M);
4821 }
4822
4823 extern __inline __m256i
4824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4825 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4826 {
4827   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4828                                                    (__v16hi)
4829                                                    _mm256_setzero_si256 (),
4830                                                    __M);
4831 }
4832
4833 extern __inline __m256i
4834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4835 _mm512_cvtusepi32_epi16 (__m512i __A)
4836 {
4837   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4838                                                     (__v16hi)
4839                                                     _mm256_undefined_si256 (),
4840                                                     (__mmask16) -1);
4841 }
4842
4843 extern __inline void
4844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4845 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4846 {
4847   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4848 }
4849
4850 extern __inline __m256i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4853 {
4854   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4855                                                     (__v16hi) __O,
4856                                                     __M);
4857 }
4858
4859 extern __inline __m256i
4860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4861 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4862 {
4863   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4864                                                     (__v16hi)
4865                                                     _mm256_setzero_si256 (),
4866                                                     __M);
4867 }
4868
4869 extern __inline __m256i
4870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4871 _mm512_cvtepi64_epi32 (__m512i __A)
4872 {
4873   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4874                                                   (__v8si)
4875                                                   _mm256_undefined_si256 (),
4876                                                   (__mmask8) -1);
4877 }
4878
4879 extern __inline void
4880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4882 {
4883   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4884 }
4885
4886 extern __inline __m256i
4887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4889 {
4890   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4891                                                   (__v8si) __O, __M);
4892 }
4893
4894 extern __inline __m256i
4895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4897 {
4898   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4899                                                   (__v8si)
4900                                                   _mm256_setzero_si256 (),
4901                                                   __M);
4902 }
4903
4904 extern __inline __m256i
4905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4906 _mm512_cvtsepi64_epi32 (__m512i __A)
4907 {
4908   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4909                                                    (__v8si)
4910                                                    _mm256_undefined_si256 (),
4911                                                    (__mmask8) -1);
4912 }
4913
4914 extern __inline void
4915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4916 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4917 {
4918   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4919 }
4920
4921 extern __inline __m256i
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4924 {
4925   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4926                                                    (__v8si) __O, __M);
4927 }
4928
4929 extern __inline __m256i
4930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4931 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4932 {
4933   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4934                                                    (__v8si)
4935                                                    _mm256_setzero_si256 (),
4936                                                    __M);
4937 }
4938
4939 extern __inline __m256i
4940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4941 _mm512_cvtusepi64_epi32 (__m512i __A)
4942 {
4943   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4944                                                     (__v8si)
4945                                                     _mm256_undefined_si256 (),
4946                                                     (__mmask8) -1);
4947 }
4948
4949 extern __inline void
4950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4951 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4952 {
4953   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4954 }
4955
4956 extern __inline __m256i
4957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4959 {
4960   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4961                                                     (__v8si) __O, __M);
4962 }
4963
4964 extern __inline __m256i
4965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4966 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4967 {
4968   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4969                                                     (__v8si)
4970                                                     _mm256_setzero_si256 (),
4971                                                     __M);
4972 }
4973
4974 extern __inline __m128i
4975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4976 _mm512_cvtepi64_epi16 (__m512i __A)
4977 {
4978   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4979                                                   (__v8hi)
4980                                                   _mm_undefined_si128 (),
4981                                                   (__mmask8) -1);
4982 }
4983
4984 extern __inline void
4985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4986 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4987 {
4988   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4989 }
4990
4991 extern __inline __m128i
4992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4993 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4994 {
4995   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4996                                                   (__v8hi) __O, __M);
4997 }
4998
4999 extern __inline __m128i
5000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5001 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5002 {
5003   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5004                                                   (__v8hi)
5005                                                   _mm_setzero_si128 (),
5006                                                   __M);
5007 }
5008
5009 extern __inline __m128i
5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011 _mm512_cvtsepi64_epi16 (__m512i __A)
5012 {
5013   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5014                                                    (__v8hi)
5015                                                    _mm_undefined_si128 (),
5016                                                    (__mmask8) -1);
5017 }
5018
5019 extern __inline void
5020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5022 {
5023   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5024 }
5025
5026 extern __inline __m128i
5027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5028 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5029 {
5030   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5031                                                    (__v8hi) __O, __M);
5032 }
5033
5034 extern __inline __m128i
5035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5036 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5037 {
5038   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5039                                                    (__v8hi)
5040                                                    _mm_setzero_si128 (),
5041                                                    __M);
5042 }
5043
5044 extern __inline __m128i
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5046 _mm512_cvtusepi64_epi16 (__m512i __A)
5047 {
5048   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5049                                                     (__v8hi)
5050                                                     _mm_undefined_si128 (),
5051                                                     (__mmask8) -1);
5052 }
5053
5054 extern __inline void
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5056 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5057 {
5058   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5059 }
5060
5061 extern __inline __m128i
5062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5064 {
5065   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5066                                                     (__v8hi) __O, __M);
5067 }
5068
5069 extern __inline __m128i
5070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5071 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5072 {
5073   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5074                                                     (__v8hi)
5075                                                     _mm_setzero_si128 (),
5076                                                     __M);
5077 }
5078
5079 extern __inline __m128i
5080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5081 _mm512_cvtepi64_epi8 (__m512i __A)
5082 {
5083   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5084                                                   (__v16qi)
5085                                                   _mm_undefined_si128 (),
5086                                                   (__mmask8) -1);
5087 }
5088
5089 extern __inline void
5090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5091 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5092 {
5093   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5094 }
5095
5096 extern __inline __m128i
5097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5098 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5099 {
5100   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5101                                                   (__v16qi) __O, __M);
5102 }
5103
5104 extern __inline __m128i
5105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5106 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5107 {
5108   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5109                                                   (__v16qi)
5110                                                   _mm_setzero_si128 (),
5111                                                   __M);
5112 }
5113
5114 extern __inline __m128i
5115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116 _mm512_cvtsepi64_epi8 (__m512i __A)
5117 {
5118   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5119                                                    (__v16qi)
5120                                                    _mm_undefined_si128 (),
5121                                                    (__mmask8) -1);
5122 }
5123
5124 extern __inline void
5125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5126 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5127 {
5128   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5129 }
5130
5131 extern __inline __m128i
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5134 {
5135   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5136                                                    (__v16qi) __O, __M);
5137 }
5138
5139 extern __inline __m128i
5140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5141 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5142 {
5143   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5144                                                    (__v16qi)
5145                                                    _mm_setzero_si128 (),
5146                                                    __M);
5147 }
5148
5149 extern __inline __m128i
5150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5151 _mm512_cvtusepi64_epi8 (__m512i __A)
5152 {
5153   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5154                                                     (__v16qi)
5155                                                     _mm_undefined_si128 (),
5156                                                     (__mmask8) -1);
5157 }
5158
5159 extern __inline void
5160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5161 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5162 {
5163   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5164 }
5165
5166 extern __inline __m128i
5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5169 {
5170   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5171                                                     (__v16qi) __O,
5172                                                     __M);
5173 }
5174
5175 extern __inline __m128i
5176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5177 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5178 {
5179   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5180                                                     (__v16qi)
5181                                                     _mm_setzero_si128 (),
5182                                                     __M);
5183 }
5184
5185 extern __inline __m512d
5186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187 _mm512_cvtepi32_pd (__m256i __A)
5188 {
5189   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5190                                                     (__v8df)
5191                                                     _mm512_undefined_pd (),
5192                                                     (__mmask8) -1);
5193 }
5194
5195 extern __inline __m512d
5196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5198 {
5199   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5200                                                     (__v8df) __W,
5201                                                     (__mmask8) __U);
5202 }
5203
5204 extern __inline __m512d
5205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5207 {
5208   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5209                                                     (__v8df)
5210                                                     _mm512_setzero_pd (),
5211                                                     (__mmask8) __U);
5212 }
5213
5214 extern __inline __m512d
5215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5216 _mm512_cvtepu32_pd (__m256i __A)
5217 {
5218   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5219                                                      (__v8df)
5220                                                      _mm512_undefined_pd (),
5221                                                      (__mmask8) -1);
5222 }
5223
5224 extern __inline __m512d
5225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5226 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5227 {
5228   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5229                                                      (__v8df) __W,
5230                                                      (__mmask8) __U);
5231 }
5232
5233 extern __inline __m512d
5234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5235 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5236 {
5237   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5238                                                      (__v8df)
5239                                                      _mm512_setzero_pd (),
5240                                                      (__mmask8) __U);
5241 }
5242
5243 #ifdef __OPTIMIZE__
5244 extern __inline __m512
5245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5246 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5247 {
5248   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5249                                                    (__v16sf)
5250                                                    _mm512_undefined_ps (),
5251                                                    (__mmask16) -1, __R);
5252 }
5253
5254 extern __inline __m512
5255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5256 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5257                                const int __R)
5258 {
5259   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5260                                                    (__v16sf) __W,
5261                                                    (__mmask16) __U, __R);
5262 }
5263
5264 extern __inline __m512
5265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5266 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5267 {
5268   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5269                                                    (__v16sf)
5270                                                    _mm512_setzero_ps (),
5271                                                    (__mmask16) __U, __R);
5272 }
5273
5274 extern __inline __m512
5275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5276 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5277 {
5278   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5279                                                     (__v16sf)
5280                                                     _mm512_undefined_ps (),
5281                                                     (__mmask16) -1, __R);
5282 }
5283
5284 extern __inline __m512
5285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5286 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5287                                const int __R)
5288 {
5289   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5290                                                     (__v16sf) __W,
5291                                                     (__mmask16) __U, __R);
5292 }
5293
5294 extern __inline __m512
5295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5297 {
5298   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5299                                                     (__v16sf)
5300                                                     _mm512_setzero_ps (),
5301                                                     (__mmask16) __U, __R);
5302 }
5303
5304 #else
5305 #define _mm512_cvt_roundepi32_ps(A, B)        \
5306     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5307
5308 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
5309     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5310
5311 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
5312     (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5313
5314 #define _mm512_cvt_roundepu32_ps(A, B)        \
5315     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5316
5317 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
5318     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5319
5320 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
5321     (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5322 #endif
5323
5324 #ifdef __OPTIMIZE__
5325 extern __inline __m256d
5326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5328 {
5329   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5330                                                      __imm,
5331                                                      (__v4df)
5332                                                      _mm256_undefined_pd (),
5333                                                      (__mmask8) -1);
5334 }
5335
5336 extern __inline __m256d
5337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5339                              const int __imm)
5340 {
5341   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5342                                                      __imm,
5343                                                      (__v4df) __W,
5344                                                      (__mmask8) __U);
5345 }
5346
5347 extern __inline __m256d
5348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5349 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5350 {
5351   return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5352                                                      __imm,
5353                                                      (__v4df)
5354                                                      _mm256_setzero_pd (),
5355                                                      (__mmask8) __U);
5356 }
5357
5358 extern __inline __m128
5359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5360 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5361 {
5362   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5363                                                     __imm,
5364                                                     (__v4sf)
5365                                                     _mm_undefined_ps (),
5366                                                     (__mmask8) -1);
5367 }
5368
5369 extern __inline __m128
5370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5371 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5372                              const int __imm)
5373 {
5374   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5375                                                     __imm,
5376                                                     (__v4sf) __W,
5377                                                     (__mmask8) __U);
5378 }
5379
5380 extern __inline __m128
5381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5383 {
5384   return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5385                                                     __imm,
5386                                                     (__v4sf)
5387                                                     _mm_setzero_ps (),
5388                                                     (__mmask8) __U);
5389 }
5390
5391 extern __inline __m256i
5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5393 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5394 {
5395   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5396                                                      __imm,
5397                                                      (__v4di)
5398                                                      _mm256_undefined_si256 (),
5399                                                      (__mmask8) -1);
5400 }
5401
5402 extern __inline __m256i
5403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5404 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5405                                 const int __imm)
5406 {
5407   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5408                                                      __imm,
5409                                                      (__v4di) __W,
5410                                                      (__mmask8) __U);
5411 }
5412
5413 extern __inline __m256i
5414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5416 {
5417   return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5418                                                      __imm,
5419                                                      (__v4di)
5420                                                      _mm256_setzero_si256 (),
5421                                                      (__mmask8) __U);
5422 }
5423
5424 extern __inline __m128i
5425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5426 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5427 {
5428   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5429                                                      __imm,
5430                                                      (__v4si)
5431                                                      _mm_undefined_si128 (),
5432                                                      (__mmask8) -1);
5433 }
5434
5435 extern __inline __m128i
5436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5437 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5438                                 const int __imm)
5439 {
5440   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5441                                                      __imm,
5442                                                      (__v4si) __W,
5443                                                      (__mmask8) __U);
5444 }
5445
5446 extern __inline __m128i
5447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5448 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5449 {
5450   return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5451                                                      __imm,
5452                                                      (__v4si)
5453                                                      _mm_setzero_si128 (),
5454                                                      (__mmask8) __U);
5455 }
5456 #else
5457
5458 #define _mm512_extractf64x4_pd(X, C)                                    \
5459   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5460     (int) (C),\
5461     (__v4df)(__m256d)_mm256_undefined_pd(),\
5462     (__mmask8)-1))
5463
5464 #define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
5465   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5466     (int) (C),\
5467     (__v4df)(__m256d)(W),\
5468     (__mmask8)(U)))
5469
5470 #define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
5471   ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
5472     (int) (C),\
5473     (__v4df)(__m256d)_mm256_setzero_pd(),\
5474     (__mmask8)(U)))
5475
5476 #define _mm512_extractf32x4_ps(X, C)                                    \
5477   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5478     (int) (C),\
5479     (__v4sf)(__m128)_mm_undefined_ps(),\
5480     (__mmask8)-1))
5481
5482 #define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
5483   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5484     (int) (C),\
5485     (__v4sf)(__m128)(W),\
5486     (__mmask8)(U)))
5487
5488 #define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
5489   ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
5490     (int) (C),\
5491     (__v4sf)(__m128)_mm_setzero_ps(),\
5492     (__mmask8)(U)))
5493
5494 #define _mm512_extracti64x4_epi64(X, C)                                 \
5495   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5496     (int) (C),\
5497     (__v4di)(__m256i)_mm256_undefined_si256 (),\
5498     (__mmask8)-1))
5499
5500 #define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
5501   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5502     (int) (C),\
5503     (__v4di)(__m256i)(W),\
5504     (__mmask8)(U)))
5505
5506 #define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
5507   ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
5508     (int) (C),\
5509     (__v4di)(__m256i)_mm256_setzero_si256 (),\
5510     (__mmask8)(U)))
5511
5512 #define _mm512_extracti32x4_epi32(X, C)                                 \
5513   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5514     (int) (C),\
5515     (__v4si)(__m128i)_mm_undefined_si128 (),\
5516     (__mmask8)-1))
5517
5518 #define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
5519   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5520     (int) (C),\
5521     (__v4si)(__m128i)(W),\
5522     (__mmask8)(U)))
5523
5524 #define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
5525   ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
5526     (int) (C),\
5527     (__v4si)(__m128i)_mm_setzero_si128 (),\
5528     (__mmask8)(U)))
5529 #endif
5530
5531 #ifdef __OPTIMIZE__
5532 extern __inline __m512i
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5535 {
5536   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5537                                                     (__v4si) __B,
5538                                                     __imm,
5539                                                     (__v16si) __A, -1);
5540 }
5541
5542 extern __inline __m512
5543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5544 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5545 {
5546   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5547                                                    (__v4sf) __B,
5548                                                    __imm,
5549                                                    (__v16sf) __A, -1);
5550 }
5551
5552 extern __inline __m512i
5553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5554 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5555 {
5556   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5557                                                     (__v4di) __B,
5558                                                     __imm,
5559                                                     (__v8di)
5560                                                     _mm512_undefined_epi32 (),
5561                                                     (__mmask8) -1);
5562 }
5563
5564 extern __inline __m512i
5565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5566 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5567                          __m256i __B, const int __imm)
5568 {
5569   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5570                                                     (__v4di) __B,
5571                                                     __imm,
5572                                                     (__v8di) __W,
5573                                                     (__mmask8) __U);
5574 }
5575
5576 extern __inline __m512i
5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5578 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5579                           const int __imm)
5580 {
5581   return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5582                                                     (__v4di) __B,
5583                                                     __imm,
5584                                                     (__v8di)
5585                                                     _mm512_setzero_si512 (),
5586                                                     (__mmask8) __U);
5587 }
5588
5589 extern __inline __m512d
5590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5591 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5592 {
5593   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5594                                                     (__v4df) __B,
5595                                                     __imm,
5596                                                     (__v8df)
5597                                                     _mm512_undefined_pd (),
5598                                                     (__mmask8) -1);
5599 }
5600
5601 extern __inline __m512d
5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5604                          __m256d __B, const int __imm)
5605 {
5606   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5607                                                     (__v4df) __B,
5608                                                     __imm,
5609                                                     (__v8df) __W,
5610                                                     (__mmask8) __U);
5611 }
5612
5613 extern __inline __m512d
5614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5615 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5616                           const int __imm)
5617 {
5618   return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5619                                                     (__v4df) __B,
5620                                                     __imm,
5621                                                     (__v8df)
5622                                                     _mm512_setzero_pd (),
5623                                                     (__mmask8) __U);
5624 }
5625 #else
5626 #define _mm512_insertf32x4(X, Y, C)                                     \
5627   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
5628     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5629
5630 #define _mm512_inserti32x4(X, Y, C)                                     \
5631   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
5632     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5633
5634 #define _mm512_insertf64x4(X, Y, C)                                     \
5635   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5636     (__v4df)(__m256d) (Y), (int) (C),                                   \
5637     (__v8df)(__m512d)_mm512_undefined_pd(),                             \
5638     (__mmask8)-1))
5639
5640 #define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
5641   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5642     (__v4df)(__m256d) (Y), (int) (C),                                   \
5643     (__v8df)(__m512d)(W),                                               \
5644     (__mmask8)(U)))
5645
5646 #define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
5647   ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
5648     (__v4df)(__m256d) (Y), (int) (C),                                   \
5649     (__v8df)(__m512d)_mm512_setzero_pd(),                               \
5650     (__mmask8)(U)))
5651
5652 #define _mm512_inserti64x4(X, Y, C)                                     \
5653   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5654     (__v4di)(__m256i) (Y), (int) (C),                                   \
5655     (__v8di)(__m512i)_mm512_undefined_epi32 (),                         \
5656     (__mmask8)-1))
5657
5658 #define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
5659   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5660     (__v4di)(__m256i) (Y), (int) (C),\
5661     (__v8di)(__m512i)(W),\
5662     (__mmask8)(U)))
5663
5664 #define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
5665   ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
5666     (__v4di)(__m256i) (Y), (int) (C),                                   \
5667     (__v8di)(__m512i)_mm512_setzero_si512 (),                           \
5668     (__mmask8)(U)))
5669 #endif
5670
5671 extern __inline __m512d
5672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5673 _mm512_loadu_pd (void const *__P)
5674 {
5675   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5676                                                    (__v8df)
5677                                                    _mm512_undefined_pd (),
5678                                                    (__mmask8) -1);
5679 }
5680
5681 extern __inline __m512d
5682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5683 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5684 {
5685   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5686                                                    (__v8df) __W,
5687                                                    (__mmask8) __U);
5688 }
5689
5690 extern __inline __m512d
5691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5692 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5693 {
5694   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5695                                                    (__v8df)
5696                                                    _mm512_setzero_pd (),
5697                                                    (__mmask8) __U);
5698 }
5699
5700 extern __inline void
5701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5702 _mm512_storeu_pd (void *__P, __m512d __A)
5703 {
5704   __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5705                                    (__mmask8) -1);
5706 }
5707
5708 extern __inline void
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5711 {
5712   __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5713                                    (__mmask8) __U);
5714 }
5715
5716 extern __inline __m512
5717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5718 _mm512_loadu_ps (void const *__P)
5719 {
5720   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5721                                                   (__v16sf)
5722                                                   _mm512_undefined_ps (),
5723                                                   (__mmask16) -1);
5724 }
5725
5726 extern __inline __m512
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5729 {
5730   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5731                                                   (__v16sf) __W,
5732                                                   (__mmask16) __U);
5733 }
5734
5735 extern __inline __m512
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5737 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5738 {
5739   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5740                                                   (__v16sf)
5741                                                   _mm512_setzero_ps (),
5742                                                   (__mmask16) __U);
5743 }
5744
5745 extern __inline void
5746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 _mm512_storeu_ps (void *__P, __m512 __A)
5748 {
5749   __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5750                                    (__mmask16) -1);
5751 }
5752
5753 extern __inline void
5754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5756 {
5757   __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5758                                    (__mmask16) __U);
5759 }
5760
5761 extern __inline __m512i
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5764 {
5765   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5766                                                      (__v8di) __W,
5767                                                      (__mmask8) __U);
5768 }
5769
5770 extern __inline __m512i
5771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5772 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5773 {
5774   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5775                                                      (__v8di)
5776                                                      _mm512_setzero_si512 (),
5777                                                      (__mmask8) __U);
5778 }
5779
5780 extern __inline void
5781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5782 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5783 {
5784   __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5785                                      (__mmask8) __U);
5786 }
5787
5788 extern __inline __m512i
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm512_loadu_si512 (void const *__P)
5791 {
5792   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5793                                                      (__v16si)
5794                                                      _mm512_setzero_si512 (),
5795                                                      (__mmask16) -1);
5796 }
5797
5798 extern __inline __m512i
5799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5801 {
5802   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5803                                                      (__v16si) __W,
5804                                                      (__mmask16) __U);
5805 }
5806
5807 extern __inline __m512i
5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5810 {
5811   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5812                                                      (__v16si)
5813                                                      _mm512_setzero_si512 (),
5814                                                      (__mmask16) __U);
5815 }
5816
5817 extern __inline void
5818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5819 _mm512_storeu_si512 (void *__P, __m512i __A)
5820 {
5821   __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5822                                      (__mmask16) -1);
5823 }
5824
5825 extern __inline void
5826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5827 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5828 {
5829   __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5830                                      (__mmask16) __U);
5831 }
5832
5833 extern __inline __m512d
5834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5835 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5836 {
5837   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5838                                                         (__v8di) __C,
5839                                                         (__v8df)
5840                                                         _mm512_undefined_pd (),
5841                                                         (__mmask8) -1);
5842 }
5843
5844 extern __inline __m512d
5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5847 {
5848   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5849                                                         (__v8di) __C,
5850                                                         (__v8df) __W,
5851                                                         (__mmask8) __U);
5852 }
5853
5854 extern __inline __m512d
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5857 {
5858   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5859                                                         (__v8di) __C,
5860                                                         (__v8df)
5861                                                         _mm512_setzero_pd (),
5862                                                         (__mmask8) __U);
5863 }
5864
5865 extern __inline __m512
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5868 {
5869   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5870                                                        (__v16si) __C,
5871                                                        (__v16sf)
5872                                                        _mm512_undefined_ps (),
5873                                                        (__mmask16) -1);
5874 }
5875
5876 extern __inline __m512
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5878 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5879 {
5880   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5881                                                        (__v16si) __C,
5882                                                        (__v16sf) __W,
5883                                                        (__mmask16) __U);
5884 }
5885
5886 extern __inline __m512
5887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5888 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5889 {
5890   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5891                                                        (__v16si) __C,
5892                                                        (__v16sf)
5893                                                        _mm512_setzero_ps (),
5894                                                        (__mmask16) __U);
5895 }
5896
5897 extern __inline __m512i
5898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5899 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5900 {
5901   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5902                                                        /* idx */ ,
5903                                                        (__v8di) __A,
5904                                                        (__v8di) __B,
5905                                                        (__mmask8) -1);
5906 }
5907
5908 extern __inline __m512i
5909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5910 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5911                                 __m512i __B)
5912 {
5913   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5914                                                        /* idx */ ,
5915                                                        (__v8di) __A,
5916                                                        (__v8di) __B,
5917                                                        (__mmask8) __U);
5918 }
5919
5920 extern __inline __m512i
5921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5922 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5923                                  __mmask8 __U, __m512i __B)
5924 {
5925   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5926                                                        (__v8di) __I
5927                                                        /* idx */ ,
5928                                                        (__v8di) __B,
5929                                                        (__mmask8) __U);
5930 }
5931
5932 extern __inline __m512i
5933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5934 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5935                                  __m512i __I, __m512i __B)
5936 {
5937   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5938                                                         /* idx */ ,
5939                                                         (__v8di) __A,
5940                                                         (__v8di) __B,
5941                                                         (__mmask8) __U);
5942 }
5943
5944 extern __inline __m512i
5945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5946 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5947 {
5948   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5949                                                        /* idx */ ,
5950                                                        (__v16si) __A,
5951                                                        (__v16si) __B,
5952                                                        (__mmask16) -1);
5953 }
5954
5955 extern __inline __m512i
5956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5957 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5958                                 __m512i __I, __m512i __B)
5959 {
5960   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5961                                                        /* idx */ ,
5962                                                        (__v16si) __A,
5963                                                        (__v16si) __B,
5964                                                        (__mmask16) __U);
5965 }
5966
5967 extern __inline __m512i
5968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5969 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5970                                  __mmask16 __U, __m512i __B)
5971 {
5972   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5973                                                        (__v16si) __I
5974                                                        /* idx */ ,
5975                                                        (__v16si) __B,
5976                                                        (__mmask16) __U);
5977 }
5978
5979 extern __inline __m512i
5980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5981 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5982                                  __m512i __I, __m512i __B)
5983 {
5984   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5985                                                         /* idx */ ,
5986                                                         (__v16si) __A,
5987                                                         (__v16si) __B,
5988                                                         (__mmask16) __U);
5989 }
5990
5991 extern __inline __m512d
5992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5993 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5994 {
5995   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5996                                                         /* idx */ ,
5997                                                         (__v8df) __A,
5998                                                         (__v8df) __B,
5999                                                         (__mmask8) -1);
6000 }
6001
6002 extern __inline __m512d
6003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6004 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6005                              __m512d __B)
6006 {
6007   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6008                                                         /* idx */ ,
6009                                                         (__v8df) __A,
6010                                                         (__v8df) __B,
6011                                                         (__mmask8) __U);
6012 }
6013
6014 extern __inline __m512d
6015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6016 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6017                               __m512d __B)
6018 {
6019   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6020                                                         (__v8di) __I
6021                                                         /* idx */ ,
6022                                                         (__v8df) __B,
6023                                                         (__mmask8) __U);
6024 }
6025
6026 extern __inline __m512d
6027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6028 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6029                               __m512d __B)
6030 {
6031   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6032                                                          /* idx */ ,
6033                                                          (__v8df) __A,
6034                                                          (__v8df) __B,
6035                                                          (__mmask8) __U);
6036 }
6037
6038 extern __inline __m512
6039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6040 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6041 {
6042   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6043                                                        /* idx */ ,
6044                                                        (__v16sf) __A,
6045                                                        (__v16sf) __B,
6046                                                        (__mmask16) -1);
6047 }
6048
6049 extern __inline __m512
6050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6051 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6052 {
6053   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6054                                                        /* idx */ ,
6055                                                        (__v16sf) __A,
6056                                                        (__v16sf) __B,
6057                                                        (__mmask16) __U);
6058 }
6059
6060 extern __inline __m512
6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6063                               __m512 __B)
6064 {
6065   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6066                                                        (__v16si) __I
6067                                                        /* idx */ ,
6068                                                        (__v16sf) __B,
6069                                                        (__mmask16) __U);
6070 }
6071
6072 extern __inline __m512
6073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6075                               __m512 __B)
6076 {
6077   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6078                                                         /* idx */ ,
6079                                                         (__v16sf) __A,
6080                                                         (__v16sf) __B,
6081                                                         (__mmask16) __U);
6082 }
6083
6084 #ifdef __OPTIMIZE__
6085 extern __inline __m512d
6086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6087 _mm512_permute_pd (__m512d __X, const int __C)
6088 {
6089   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6090                                                      (__v8df)
6091                                                      _mm512_undefined_pd (),
6092                                                      (__mmask8) -1);
6093 }
6094
6095 extern __inline __m512d
6096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6097 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6098 {
6099   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6100                                                      (__v8df) __W,
6101                                                      (__mmask8) __U);
6102 }
6103
6104 extern __inline __m512d
6105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6106 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6107 {
6108   return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6109                                                      (__v8df)
6110                                                      _mm512_setzero_pd (),
6111                                                      (__mmask8) __U);
6112 }
6113
6114 extern __inline __m512
6115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6116 _mm512_permute_ps (__m512 __X, const int __C)
6117 {
6118   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6119                                                     (__v16sf)
6120                                                     _mm512_undefined_ps (),
6121                                                     (__mmask16) -1);
6122 }
6123
6124 extern __inline __m512
6125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6126 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6127 {
6128   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6129                                                     (__v16sf) __W,
6130                                                     (__mmask16) __U);
6131 }
6132
6133 extern __inline __m512
6134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6135 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6136 {
6137   return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6138                                                     (__v16sf)
6139                                                     _mm512_setzero_ps (),
6140                                                     (__mmask16) __U);
6141 }
6142 #else
6143 #define _mm512_permute_pd(X, C)                                                     \
6144   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6145                                               (__v8df)(__m512d)_mm512_undefined_pd(),\
6146                                               (__mmask8)(-1)))
6147
6148 #define _mm512_mask_permute_pd(W, U, X, C)                                          \
6149   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6150                                               (__v8df)(__m512d)(W),                 \
6151                                               (__mmask8)(U)))
6152
6153 #define _mm512_maskz_permute_pd(U, X, C)                                            \
6154   ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),      \
6155                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
6156                                               (__mmask8)(U)))
6157
6158 #define _mm512_permute_ps(X, C)                                                     \
6159   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6160                                               (__v16sf)(__m512)_mm512_undefined_ps(),\
6161                                               (__mmask16)(-1)))
6162
6163 #define _mm512_mask_permute_ps(W, U, X, C)                                          \
6164   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6165                                               (__v16sf)(__m512)(W),                 \
6166                                               (__mmask16)(U)))
6167
6168 #define _mm512_maskz_permute_ps(U, X, C)                                            \
6169   ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),       \
6170                                               (__v16sf)(__m512)_mm512_setzero_ps(), \
6171                                               (__mmask16)(U)))
6172 #endif
6173
6174 #ifdef __OPTIMIZE__
6175 extern __inline __m512i
6176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6177 _mm512_permutex_epi64 (__m512i __X, const int __I)
6178 {
6179   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6180                                                   (__v8di)
6181                                                   _mm512_undefined_epi32 (),
6182                                                   (__mmask8) (-1));
6183 }
6184
6185 extern __inline __m512i
6186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6187 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6188                             __m512i __X, const int __I)
6189 {
6190   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6191                                                   (__v8di) __W,
6192                                                   (__mmask8) __M);
6193 }
6194
6195 extern __inline __m512i
6196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6197 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6198 {
6199   return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6200                                                   (__v8di)
6201                                                   _mm512_setzero_si512 (),
6202                                                   (__mmask8) __M);
6203 }
6204
6205 extern __inline __m512d
6206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6207 _mm512_permutex_pd (__m512d __X, const int __M)
6208 {
6209   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6210                                                   (__v8df)
6211                                                   _mm512_undefined_pd (),
6212                                                   (__mmask8) -1);
6213 }
6214
6215 extern __inline __m512d
6216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6217 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6218 {
6219   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6220                                                   (__v8df) __W,
6221                                                   (__mmask8) __U);
6222 }
6223
6224 extern __inline __m512d
6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6226 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6227 {
6228   return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6229                                                   (__v8df)
6230                                                   _mm512_setzero_pd (),
6231                                                   (__mmask8) __U);
6232 }
6233 #else
6234 #define _mm512_permutex_pd(X, M)                                                \
6235   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6236                                             (__v8df)(__m512d)_mm512_undefined_pd(),\
6237                                             (__mmask8)-1))
6238
6239 #define _mm512_mask_permutex_pd(W, U, X, M)                                     \
6240   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6241                                             (__v8df)(__m512d)(W), (__mmask8)(U)))
6242
6243 #define _mm512_maskz_permutex_pd(U, X, M)                                       \
6244   ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),     \
6245                                             (__v8df)(__m512d)_mm512_setzero_pd(),\
6246                                             (__mmask8)(U)))
6247
6248 #define _mm512_permutex_epi64(X, I)                               \
6249   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6250                                             (int)(I),             \
6251                                             (__v8di)(__m512i)     \
6252                                             (_mm512_undefined_epi32 ()),\
6253                                             (__mmask8)(-1)))
6254
6255 #define _mm512_maskz_permutex_epi64(M, X, I)                 \
6256   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6257                                             (int)(I),             \
6258                                             (__v8di)(__m512i)     \
6259                                             (_mm512_setzero_si512 ()),\
6260                                             (__mmask8)(M)))
6261
6262 #define _mm512_mask_permutex_epi64(W, M, X, I)               \
6263   ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6264                                             (int)(I),             \
6265                                             (__v8di)(__m512i)(W), \
6266                                             (__mmask8)(M)))
6267 #endif
6268
6269 extern __inline __m512i
6270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6271 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6272 {
6273   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6274                                                      (__v8di) __X,
6275                                                      (__v8di)
6276                                                      _mm512_setzero_si512 (),
6277                                                      __M);
6278 }
6279
6280 extern __inline __m512i
6281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6282 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6283 {
6284   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6285                                                      (__v8di) __X,
6286                                                      (__v8di)
6287                                                      _mm512_undefined_epi32 (),
6288                                                      (__mmask8) -1);
6289 }
6290
6291 extern __inline __m512i
6292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6293 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6294                                __m512i __Y)
6295 {
6296   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6297                                                      (__v8di) __X,
6298                                                      (__v8di) __W,
6299                                                      __M);
6300 }
6301
6302 extern __inline __m512i
6303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6305 {
6306   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6307                                                      (__v16si) __X,
6308                                                      (__v16si)
6309                                                      _mm512_setzero_si512 (),
6310                                                      __M);
6311 }
6312
6313 extern __inline __m512i
6314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6316 {
6317   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6318                                                      (__v16si) __X,
6319                                                      (__v16si)
6320                                                      _mm512_undefined_epi32 (),
6321                                                      (__mmask16) -1);
6322 }
6323
6324 extern __inline __m512i
6325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6326 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6327                                __m512i __Y)
6328 {
6329   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6330                                                      (__v16si) __X,
6331                                                      (__v16si) __W,
6332                                                      __M);
6333 }
6334
6335 extern __inline __m512d
6336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6337 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6338 {
6339   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6340                                                      (__v8di) __X,
6341                                                      (__v8df)
6342                                                      _mm512_undefined_pd (),
6343                                                      (__mmask8) -1);
6344 }
6345
6346 extern __inline __m512d
6347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6348 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6349 {
6350   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6351                                                      (__v8di) __X,
6352                                                      (__v8df) __W,
6353                                                      (__mmask8) __U);
6354 }
6355
6356 extern __inline __m512d
6357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6358 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6359 {
6360   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6361                                                      (__v8di) __X,
6362                                                      (__v8df)
6363                                                      _mm512_setzero_pd (),
6364                                                      (__mmask8) __U);
6365 }
6366
6367 extern __inline __m512
6368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6369 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6370 {
6371   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6372                                                     (__v16si) __X,
6373                                                     (__v16sf)
6374                                                     _mm512_undefined_ps (),
6375                                                     (__mmask16) -1);
6376 }
6377
6378 extern __inline __m512
6379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6380 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6381 {
6382   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6383                                                     (__v16si) __X,
6384                                                     (__v16sf) __W,
6385                                                     (__mmask16) __U);
6386 }
6387
6388 extern __inline __m512
6389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6390 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6391 {
6392   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6393                                                     (__v16si) __X,
6394                                                     (__v16sf)
6395                                                     _mm512_setzero_ps (),
6396                                                     (__mmask16) __U);
6397 }
6398
6399 #ifdef __OPTIMIZE__
6400 extern __inline __m512
6401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6402 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6403 {
6404   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6405                                                  (__v16sf) __V, __imm,
6406                                                  (__v16sf)
6407                                                  _mm512_undefined_ps (),
6408                                                  (__mmask16) -1);
6409 }
6410
6411 extern __inline __m512
6412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6413 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6414                         __m512 __V, const int __imm)
6415 {
6416   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6417                                                  (__v16sf) __V, __imm,
6418                                                  (__v16sf) __W,
6419                                                  (__mmask16) __U);
6420 }
6421
6422 extern __inline __m512
6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6425 {
6426   return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6427                                                  (__v16sf) __V, __imm,
6428                                                  (__v16sf)
6429                                                  _mm512_setzero_ps (),
6430                                                  (__mmask16) __U);
6431 }
6432
6433 extern __inline __m512d
6434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6435 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6436 {
6437   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6438                                                   (__v8df) __V, __imm,
6439                                                   (__v8df)
6440                                                   _mm512_undefined_pd (),
6441                                                   (__mmask8) -1);
6442 }
6443
6444 extern __inline __m512d
6445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6447                         __m512d __V, const int __imm)
6448 {
6449   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6450                                                   (__v8df) __V, __imm,
6451                                                   (__v8df) __W,
6452                                                   (__mmask8) __U);
6453 }
6454
6455 extern __inline __m512d
6456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6457 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6458                          const int __imm)
6459 {
6460   return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6461                                                   (__v8df) __V, __imm,
6462                                                   (__v8df)
6463                                                   _mm512_setzero_pd (),
6464                                                   (__mmask8) __U);
6465 }
6466
6467 extern __inline __m512d
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6470                           const int __imm, const int __R)
6471 {
6472   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6473                                                       (__v8df) __B,
6474                                                       (__v8di) __C,
6475                                                       __imm,
6476                                                       (__mmask8) -1, __R);
6477 }
6478
6479 extern __inline __m512d
6480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6481 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6482                                __m512i __C, const int __imm, const int __R)
6483 {
6484   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6485                                                       (__v8df) __B,
6486                                                       (__v8di) __C,
6487                                                       __imm,
6488                                                       (__mmask8) __U, __R);
6489 }
6490
6491 extern __inline __m512d
6492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6493 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6494                                 __m512i __C, const int __imm, const int __R)
6495 {
6496   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6497                                                        (__v8df) __B,
6498                                                        (__v8di) __C,
6499                                                        __imm,
6500                                                        (__mmask8) __U, __R);
6501 }
6502
6503 extern __inline __m512
6504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6505 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6506                           const int __imm, const int __R)
6507 {
6508   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6509                                                      (__v16sf) __B,
6510                                                      (__v16si) __C,
6511                                                      __imm,
6512                                                      (__mmask16) -1, __R);
6513 }
6514
6515 extern __inline __m512
6516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6517 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6518                                __m512i __C, const int __imm, const int __R)
6519 {
6520   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6521                                                      (__v16sf) __B,
6522                                                      (__v16si) __C,
6523                                                      __imm,
6524                                                      (__mmask16) __U, __R);
6525 }
6526
6527 extern __inline __m512
6528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6529 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6530                                 __m512i __C, const int __imm, const int __R)
6531 {
6532   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6533                                                       (__v16sf) __B,
6534                                                       (__v16si) __C,
6535                                                       __imm,
6536                                                       (__mmask16) __U, __R);
6537 }
6538
6539 extern __inline __m128d
6540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6541 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6542                        const int __imm, const int __R)
6543 {
6544   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6545                                                    (__v2df) __B,
6546                                                    (__v2di) __C, __imm,
6547                                                    (__mmask8) -1, __R);
6548 }
6549
6550 extern __inline __m128d
6551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6552 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6553                             __m128i __C, const int __imm, const int __R)
6554 {
6555   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6556                                                    (__v2df) __B,
6557                                                    (__v2di) __C, __imm,
6558                                                    (__mmask8) __U, __R);
6559 }
6560
6561 extern __inline __m128d
6562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6563 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6564                              __m128i __C, const int __imm, const int __R)
6565 {
6566   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6567                                                     (__v2df) __B,
6568                                                     (__v2di) __C,
6569                                                     __imm,
6570                                                     (__mmask8) __U, __R);
6571 }
6572
6573 extern __inline __m128
6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6575 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6576                        const int __imm, const int __R)
6577 {
6578   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6579                                                   (__v4sf) __B,
6580                                                   (__v4si) __C, __imm,
6581                                                   (__mmask8) -1, __R);
6582 }
6583
6584 extern __inline __m128
6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6586 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6587                             __m128i __C, const int __imm, const int __R)
6588 {
6589   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6590                                                   (__v4sf) __B,
6591                                                   (__v4si) __C, __imm,
6592                                                   (__mmask8) __U, __R);
6593 }
6594
6595 extern __inline __m128
6596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6597 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6598                              __m128i __C, const int __imm, const int __R)
6599 {
6600   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6601                                                    (__v4sf) __B,
6602                                                    (__v4si) __C, __imm,
6603                                                    (__mmask8) __U, __R);
6604 }
6605
6606 #else
6607 #define _mm512_shuffle_pd(X, Y, C)                                      \
6608     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6609         (__v8df)(__m512d)(Y), (int)(C),\
6610     (__v8df)(__m512d)_mm512_undefined_pd(),\
6611     (__mmask8)-1))
6612
6613 #define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
6614     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6615         (__v8df)(__m512d)(Y), (int)(C),\
6616     (__v8df)(__m512d)(W),\
6617     (__mmask8)(U)))
6618
6619 #define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
6620     ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
6621         (__v8df)(__m512d)(Y), (int)(C),\
6622     (__v8df)(__m512d)_mm512_setzero_pd(),\
6623     (__mmask8)(U)))
6624
6625 #define _mm512_shuffle_ps(X, Y, C)                                      \
6626     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6627         (__v16sf)(__m512)(Y), (int)(C),\
6628     (__v16sf)(__m512)_mm512_undefined_ps(),\
6629     (__mmask16)-1))
6630
6631 #define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
6632     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6633         (__v16sf)(__m512)(Y), (int)(C),\
6634     (__v16sf)(__m512)(W),\
6635     (__mmask16)(U)))
6636
6637 #define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
6638     ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
6639         (__v16sf)(__m512)(Y), (int)(C),\
6640     (__v16sf)(__m512)_mm512_setzero_ps(),\
6641     (__mmask16)(U)))
6642
6643 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R)                                 \
6644   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
6645       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6646       (__mmask8)(-1), (R)))
6647
6648 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
6649   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
6650       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6651       (__mmask8)(U), (R)))
6652
6653 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
6654   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
6655       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
6656       (__mmask8)(U), (R)))
6657
6658 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R)                                 \
6659   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
6660     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6661     (__mmask16)(-1), (R)))
6662
6663 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
6664   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
6665     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6666     (__mmask16)(U), (R)))
6667
6668 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
6669   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
6670     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
6671     (__mmask16)(U), (R)))
6672
6673 #define _mm_fixupimm_round_sd(X, Y, Z, C, R)                                    \
6674     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
6675       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
6676       (__mmask8)(-1), (R)))
6677
6678 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)                            \
6679     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
6680       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
6681       (__mmask8)(U), (R)))
6682
6683 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)                           \
6684     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),    \
6685       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
6686       (__mmask8)(U), (R)))
6687
6688 #define _mm_fixupimm_round_ss(X, Y, Z, C, R)                                    \
6689     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
6690       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
6691       (__mmask8)(-1), (R)))
6692
6693 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)                            \
6694     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
6695       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
6696       (__mmask8)(U), (R)))
6697
6698 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)                           \
6699     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),      \
6700       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
6701       (__mmask8)(U), (R)))
6702 #endif
6703
6704 extern __inline __m512
6705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6706 _mm512_movehdup_ps (__m512 __A)
6707 {
6708   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6709                                                    (__v16sf)
6710                                                    _mm512_undefined_ps (),
6711                                                    (__mmask16) -1);
6712 }
6713
6714 extern __inline __m512
6715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6716 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6717 {
6718   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6719                                                    (__v16sf) __W,
6720                                                    (__mmask16) __U);
6721 }
6722
6723 extern __inline __m512
6724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6725 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6726 {
6727   return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6728                                                    (__v16sf)
6729                                                    _mm512_setzero_ps (),
6730                                                    (__mmask16) __U);
6731 }
6732
6733 extern __inline __m512
6734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 _mm512_moveldup_ps (__m512 __A)
6736 {
6737   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6738                                                    (__v16sf)
6739                                                    _mm512_undefined_ps (),
6740                                                    (__mmask16) -1);
6741 }
6742
6743 extern __inline __m512
6744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6745 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6746 {
6747   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6748                                                    (__v16sf) __W,
6749                                                    (__mmask16) __U);
6750 }
6751
6752 extern __inline __m512
6753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6754 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6755 {
6756   return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6757                                                    (__v16sf)
6758                                                    _mm512_setzero_ps (),
6759                                                    (__mmask16) __U);
6760 }
6761
6762 extern __inline __m512i
6763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6764 _mm512_or_si512 (__m512i __A, __m512i __B)
6765 {
6766   return (__m512i) ((__v16su) __A | (__v16su) __B);
6767 }
6768
6769 extern __inline __m512i
6770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771 _mm512_or_epi32 (__m512i __A, __m512i __B)
6772 {
6773   return (__m512i) ((__v16su) __A | (__v16su) __B);
6774 }
6775
6776 extern __inline __m512i
6777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6778 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6779 {
6780   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6781                                                 (__v16si) __B,
6782                                                 (__v16si) __W,
6783                                                 (__mmask16) __U);
6784 }
6785
6786 extern __inline __m512i
6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6789 {
6790   return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6791                                                 (__v16si) __B,
6792                                                 (__v16si)
6793                                                 _mm512_setzero_si512 (),
6794                                                 (__mmask16) __U);
6795 }
6796
6797 extern __inline __m512i
6798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6799 _mm512_or_epi64 (__m512i __A, __m512i __B)
6800 {
6801   return (__m512i) ((__v8du) __A | (__v8du) __B);
6802 }
6803
6804 extern __inline __m512i
6805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6806 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6807 {
6808   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6809                                                 (__v8di) __B,
6810                                                 (__v8di) __W,
6811                                                 (__mmask8) __U);
6812 }
6813
6814 extern __inline __m512i
6815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6816 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6817 {
6818   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6819                                                 (__v8di) __B,
6820                                                 (__v8di)
6821                                                 _mm512_setzero_si512 (),
6822                                                 (__mmask8) __U);
6823 }
6824
6825 extern __inline __m512i
6826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6827 _mm512_xor_si512 (__m512i __A, __m512i __B)
6828 {
6829   return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6830 }
6831
6832 extern __inline __m512i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6835 {
6836   return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6837 }
6838
6839 extern __inline __m512i
6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6841 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6842 {
6843   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6844                                                  (__v16si) __B,
6845                                                  (__v16si) __W,
6846                                                  (__mmask16) __U);
6847 }
6848
6849 extern __inline __m512i
6850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6851 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6852 {
6853   return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6854                                                  (__v16si) __B,
6855                                                  (__v16si)
6856                                                  _mm512_setzero_si512 (),
6857                                                  (__mmask16) __U);
6858 }
6859
6860 extern __inline __m512i
6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6863 {
6864   return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6865 }
6866
6867 extern __inline __m512i
6868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6869 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6870 {
6871   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6872                                                  (__v8di) __B,
6873                                                  (__v8di) __W,
6874                                                  (__mmask8) __U);
6875 }
6876
6877 extern __inline __m512i
6878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6879 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6880 {
6881   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6882                                                  (__v8di) __B,
6883                                                  (__v8di)
6884                                                  _mm512_setzero_si512 (),
6885                                                  (__mmask8) __U);
6886 }
6887
6888 #ifdef __OPTIMIZE__
6889 extern __inline __m512i
6890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6891 _mm512_rol_epi32 (__m512i __A, const int __B)
6892 {
6893   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6894                                                  (__v16si)
6895                                                  _mm512_undefined_epi32 (),
6896                                                  (__mmask16) -1);
6897 }
6898
6899 extern __inline __m512i
6900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6901 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6902 {
6903   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6904                                                  (__v16si) __W,
6905                                                  (__mmask16) __U);
6906 }
6907
6908 extern __inline __m512i
6909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6910 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6911 {
6912   return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6913                                                  (__v16si)
6914                                                  _mm512_setzero_si512 (),
6915                                                  (__mmask16) __U);
6916 }
6917
6918 extern __inline __m512i
6919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6920 _mm512_ror_epi32 (__m512i __A, int __B)
6921 {
6922   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6923                                                  (__v16si)
6924                                                  _mm512_undefined_epi32 (),
6925                                                  (__mmask16) -1);
6926 }
6927
6928 extern __inline __m512i
6929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6930 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6931 {
6932   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6933                                                  (__v16si) __W,
6934                                                  (__mmask16) __U);
6935 }
6936
6937 extern __inline __m512i
6938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6939 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6940 {
6941   return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6942                                                  (__v16si)
6943                                                  _mm512_setzero_si512 (),
6944                                                  (__mmask16) __U);
6945 }
6946
6947 extern __inline __m512i
6948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6949 _mm512_rol_epi64 (__m512i __A, const int __B)
6950 {
6951   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6952                                                  (__v8di)
6953                                                  _mm512_undefined_epi32 (),
6954                                                  (__mmask8) -1);
6955 }
6956
6957 extern __inline __m512i
6958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6959 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6960 {
6961   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6962                                                  (__v8di) __W,
6963                                                  (__mmask8) __U);
6964 }
6965
6966 extern __inline __m512i
6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6968 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6969 {
6970   return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6971                                                  (__v8di)
6972                                                  _mm512_setzero_si512 (),
6973                                                  (__mmask8) __U);
6974 }
6975
6976 extern __inline __m512i
6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6978 _mm512_ror_epi64 (__m512i __A, int __B)
6979 {
6980   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6981                                                  (__v8di)
6982                                                  _mm512_undefined_epi32 (),
6983                                                  (__mmask8) -1);
6984 }
6985
6986 extern __inline __m512i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6989 {
6990   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6991                                                  (__v8di) __W,
6992                                                  (__mmask8) __U);
6993 }
6994
6995 extern __inline __m512i
6996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6997 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6998 {
6999   return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7000                                                  (__v8di)
7001                                                  _mm512_setzero_si512 (),
7002                                                  (__mmask8) __U);
7003 }
7004
7005 #else
7006 #define _mm512_rol_epi32(A, B)                                            \
7007     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7008                                             (int)(B),                     \
7009                                             (__v16si)_mm512_undefined_epi32 (), \
7010                                             (__mmask16)(-1)))
7011 #define _mm512_mask_rol_epi32(W, U, A, B)                                 \
7012     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7013                                             (int)(B),                     \
7014                                             (__v16si)(__m512i)(W),        \
7015                                             (__mmask16)(U)))
7016 #define _mm512_maskz_rol_epi32(U, A, B)                                   \
7017     ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),        \
7018                                             (int)(B),                     \
7019                                             (__v16si)_mm512_setzero_si512 (), \
7020                                             (__mmask16)(U)))
7021 #define _mm512_ror_epi32(A, B)                                            \
7022     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7023                                             (int)(B),                     \
7024                                             (__v16si)_mm512_undefined_epi32 (), \
7025                                             (__mmask16)(-1)))
7026 #define _mm512_mask_ror_epi32(W, U, A, B)                                 \
7027     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7028                                             (int)(B),                     \
7029                                             (__v16si)(__m512i)(W),        \
7030                                             (__mmask16)(U)))
7031 #define _mm512_maskz_ror_epi32(U, A, B)                                   \
7032     ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),        \
7033                                             (int)(B),                     \
7034                                             (__v16si)_mm512_setzero_si512 (), \
7035                                             (__mmask16)(U)))
7036 #define _mm512_rol_epi64(A, B)                                            \
7037     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7038                                             (int)(B),                     \
7039                                             (__v8di)_mm512_undefined_epi32 (),  \
7040                                             (__mmask8)(-1)))
7041 #define _mm512_mask_rol_epi64(W, U, A, B)                                 \
7042     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7043                                             (int)(B),                     \
7044                                             (__v8di)(__m512i)(W),         \
7045                                             (__mmask8)(U)))
7046 #define _mm512_maskz_rol_epi64(U, A, B)                                   \
7047     ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),         \
7048                                             (int)(B),                     \
7049                                             (__v8di)_mm512_setzero_si512 (),  \
7050                                             (__mmask8)(U)))
7051
7052 #define _mm512_ror_epi64(A, B)                                            \
7053     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7054                                             (int)(B),                     \
7055                                             (__v8di)_mm512_undefined_epi32 (),  \
7056                                             (__mmask8)(-1)))
7057 #define _mm512_mask_ror_epi64(W, U, A, B)                                 \
7058     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7059                                             (int)(B),                     \
7060                                             (__v8di)(__m512i)(W),         \
7061                                             (__mmask8)(U)))
7062 #define _mm512_maskz_ror_epi64(U, A, B)                                   \
7063     ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),         \
7064                                             (int)(B),                     \
7065                                             (__v8di)_mm512_setzero_si512 (),  \
7066                                             (__mmask8)(U)))
7067 #endif
7068
7069 extern __inline __m512i
7070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7071 _mm512_and_si512 (__m512i __A, __m512i __B)
7072 {
7073   return (__m512i) ((__v16su) __A & (__v16su) __B);
7074 }
7075
7076 extern __inline __m512i
7077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7078 _mm512_and_epi32 (__m512i __A, __m512i __B)
7079 {
7080   return (__m512i) ((__v16su) __A & (__v16su) __B);
7081 }
7082
7083 extern __inline __m512i
7084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7085 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7086 {
7087   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7088                                                  (__v16si) __B,
7089                                                  (__v16si) __W,
7090                                                  (__mmask16) __U);
7091 }
7092
7093 extern __inline __m512i
7094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7095 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7096 {
7097   return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7098                                                  (__v16si) __B,
7099                                                  (__v16si)
7100                                                  _mm512_setzero_si512 (),
7101                                                  (__mmask16) __U);
7102 }
7103
7104 extern __inline __m512i
7105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7106 _mm512_and_epi64 (__m512i __A, __m512i __B)
7107 {
7108   return (__m512i) ((__v8du) __A & (__v8du) __B);
7109 }
7110
7111 extern __inline __m512i
7112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7113 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7114 {
7115   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7116                                                  (__v8di) __B,
7117                                                  (__v8di) __W, __U);
7118 }
7119
7120 extern __inline __m512i
7121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7122 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7123 {
7124   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7125                                                  (__v8di) __B,
7126                                                  (__v8di)
7127                                                  _mm512_setzero_pd (),
7128                                                  __U);
7129 }
7130
7131 extern __inline __m512i
7132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7133 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7134 {
7135   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7136                                                   (__v16si) __B,
7137                                                   (__v16si)
7138                                                   _mm512_undefined_epi32 (),
7139                                                   (__mmask16) -1);
7140 }
7141
7142 extern __inline __m512i
7143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7144 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7145 {
7146   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7147                                                   (__v16si) __B,
7148                                                   (__v16si)
7149                                                   _mm512_undefined_epi32 (),
7150                                                   (__mmask16) -1);
7151 }
7152
7153 extern __inline __m512i
7154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7155 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7156 {
7157   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7158                                                   (__v16si) __B,
7159                                                   (__v16si) __W,
7160                                                   (__mmask16) __U);
7161 }
7162
7163 extern __inline __m512i
7164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7165 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7166 {
7167   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7168                                                   (__v16si) __B,
7169                                                   (__v16si)
7170                                                   _mm512_setzero_si512 (),
7171                                                   (__mmask16) __U);
7172 }
7173
7174 extern __inline __m512i
7175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7176 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7177 {
7178   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7179                                                   (__v8di) __B,
7180                                                   (__v8di)
7181                                                   _mm512_undefined_epi32 (),
7182                                                   (__mmask8) -1);
7183 }
7184
7185 extern __inline __m512i
7186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7187 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7188 {
7189   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7190                                                   (__v8di) __B,
7191                                                   (__v8di) __W, __U);
7192 }
7193
7194 extern __inline __m512i
7195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7196 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7197 {
7198   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7199                                                   (__v8di) __B,
7200                                                   (__v8di)
7201                                                   _mm512_setzero_pd (),
7202                                                   __U);
7203 }
7204
7205 extern __inline __mmask16
7206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7207 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7208 {
7209   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7210                                                 (__v16si) __B,
7211                                                 (__mmask16) -1);
7212 }
7213
7214 extern __inline __mmask16
7215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7216 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7217 {
7218   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7219                                                 (__v16si) __B, __U);
7220 }
7221
7222 extern __inline __mmask8
7223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7224 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7225 {
7226   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7227                                                (__v8di) __B,
7228                                                (__mmask8) -1);
7229 }
7230
7231 extern __inline __mmask8
7232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7233 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7234 {
7235   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7236 }
7237
7238 extern __inline __mmask16
7239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7240 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7241 {
7242   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7243                                                  (__v16si) __B,
7244                                                  (__mmask16) -1);
7245 }
7246
7247 extern __inline __mmask16
7248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7249 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7250 {
7251   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7252                                                  (__v16si) __B, __U);
7253 }
7254
7255 extern __inline __mmask8
7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7257 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7258 {
7259   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7260                                                 (__v8di) __B,
7261                                                 (__mmask8) -1);
7262 }
7263
7264 extern __inline __mmask8
7265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7266 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7267 {
7268   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7269                                                 (__v8di) __B, __U);
7270 }
7271
7272 extern __inline __m512i
7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7274 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7275 {
7276   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7277                                                      (__v16si) __B,
7278                                                      (__v16si)
7279                                                      _mm512_undefined_epi32 (),
7280                                                      (__mmask16) -1);
7281 }
7282
7283 extern __inline __m512i
7284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7285 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7286                             __m512i __B)
7287 {
7288   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7289                                                      (__v16si) __B,
7290                                                      (__v16si) __W,
7291                                                      (__mmask16) __U);
7292 }
7293
7294 extern __inline __m512i
7295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7296 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7297 {
7298   return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7299                                                      (__v16si) __B,
7300                                                      (__v16si)
7301                                                      _mm512_setzero_si512 (),
7302                                                      (__mmask16) __U);
7303 }
7304
7305 extern __inline __m512i
7306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7307 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7308 {
7309   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7310                                                       (__v8di) __B,
7311                                                       (__v8di)
7312                                                       _mm512_undefined_epi32 (),
7313                                                       (__mmask8) -1);
7314 }
7315
7316 extern __inline __m512i
7317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7318 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7319 {
7320   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7321                                                       (__v8di) __B,
7322                                                       (__v8di) __W,
7323                                                       (__mmask8) __U);
7324 }
7325
7326 extern __inline __m512i
7327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7328 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7329 {
7330   return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7331                                                       (__v8di) __B,
7332                                                       (__v8di)
7333                                                       _mm512_setzero_si512 (),
7334                                                       (__mmask8) __U);
7335 }
7336
7337 extern __inline __m512i
7338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7339 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7340 {
7341   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7342                                                      (__v16si) __B,
7343                                                      (__v16si)
7344                                                      _mm512_undefined_epi32 (),
7345                                                      (__mmask16) -1);
7346 }
7347
7348 extern __inline __m512i
7349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7350 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7351                             __m512i __B)
7352 {
7353   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7354                                                      (__v16si) __B,
7355                                                      (__v16si) __W,
7356                                                      (__mmask16) __U);
7357 }
7358
7359 extern __inline __m512i
7360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7361 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7362 {
7363   return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7364                                                      (__v16si) __B,
7365                                                      (__v16si)
7366                                                      _mm512_setzero_si512 (),
7367                                                      (__mmask16) __U);
7368 }
7369
7370 extern __inline __m512i
7371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7372 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7373 {
7374   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7375                                                       (__v8di) __B,
7376                                                       (__v8di)
7377                                                       _mm512_undefined_epi32 (),
7378                                                       (__mmask8) -1);
7379 }
7380
7381 extern __inline __m512i
7382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7383 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7384 {
7385   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7386                                                       (__v8di) __B,
7387                                                       (__v8di) __W,
7388                                                       (__mmask8) __U);
7389 }
7390
7391 extern __inline __m512i
7392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7393 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7394 {
7395   return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7396                                                       (__v8di) __B,
7397                                                       (__v8di)
7398                                                       _mm512_setzero_si512 (),
7399                                                       (__mmask8) __U);
7400 }
7401
7402 #ifdef __x86_64__
7403 #ifdef __OPTIMIZE__
7404 extern __inline unsigned long long
7405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7406 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7407 {
7408   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7409 }
7410
7411 extern __inline long long
7412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7414 {
7415   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7416 }
7417
7418 extern __inline long long
7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7421 {
7422   return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7423 }
7424
7425 extern __inline unsigned long long
7426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7427 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7428 {
7429   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7430 }
7431
7432 extern __inline long long
7433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7434 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7435 {
7436   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7437 }
7438
7439 extern __inline long long
7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7442 {
7443   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7444 }
7445 #else
7446 #define _mm_cvt_roundss_u64(A, B)   \
7447     ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7448
7449 #define _mm_cvt_roundss_si64(A, B)   \
7450     ((long long)__builtin_ia32_vcvtss2si64(A, B))
7451
7452 #define _mm_cvt_roundss_i64(A, B)   \
7453     ((long long)__builtin_ia32_vcvtss2si64(A, B))
7454
7455 #define _mm_cvtt_roundss_u64(A, B)  \
7456     ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7457
7458 #define _mm_cvtt_roundss_i64(A, B)  \
7459     ((long long)__builtin_ia32_vcvttss2si64(A, B))
7460
7461 #define _mm_cvtt_roundss_si64(A, B)  \
7462     ((long long)__builtin_ia32_vcvttss2si64(A, B))
7463 #endif
7464 #endif
7465
7466 #ifdef __OPTIMIZE__
7467 extern __inline unsigned
7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7470 {
7471   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7472 }
7473
7474 extern __inline int
7475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7476 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7477 {
7478   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7479 }
7480
7481 extern __inline int
7482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7483 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7484 {
7485   return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7486 }
7487
7488 extern __inline unsigned
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7491 {
7492   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7493 }
7494
7495 extern __inline int
7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7498 {
7499   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7500 }
7501
7502 extern __inline int
7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7504 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7505 {
7506   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7507 }
7508 #else
7509 #define _mm_cvt_roundss_u32(A, B)   \
7510     ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7511
7512 #define _mm_cvt_roundss_si32(A, B)   \
7513     ((int)__builtin_ia32_vcvtss2si32(A, B))
7514
7515 #define _mm_cvt_roundss_i32(A, B)   \
7516     ((int)__builtin_ia32_vcvtss2si32(A, B))
7517
7518 #define _mm_cvtt_roundss_u32(A, B)  \
7519     ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7520
7521 #define _mm_cvtt_roundss_si32(A, B)  \
7522     ((int)__builtin_ia32_vcvttss2si32(A, B))
7523
7524 #define _mm_cvtt_roundss_i32(A, B)  \
7525     ((int)__builtin_ia32_vcvttss2si32(A, B))
7526 #endif
7527
7528 #ifdef __x86_64__
7529 #ifdef __OPTIMIZE__
7530 extern __inline unsigned long long
7531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7533 {
7534   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7535 }
7536
7537 extern __inline long long
7538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7539 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7540 {
7541   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7542 }
7543
7544 extern __inline long long
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7547 {
7548   return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7549 }
7550
7551 extern __inline unsigned long long
7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7554 {
7555   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7556 }
7557
7558 extern __inline long long
7559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7560 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7561 {
7562   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7563 }
7564
7565 extern __inline long long
7566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7567 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7568 {
7569   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7570 }
7571 #else
7572 #define _mm_cvt_roundsd_u64(A, B)   \
7573     ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7574
7575 #define _mm_cvt_roundsd_si64(A, B)   \
7576     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7577
7578 #define _mm_cvt_roundsd_i64(A, B)   \
7579     ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7580
7581 #define _mm_cvtt_roundsd_u64(A, B)   \
7582     ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7583
7584 #define _mm_cvtt_roundsd_si64(A, B)   \
7585     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7586
7587 #define _mm_cvtt_roundsd_i64(A, B)   \
7588     ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7589 #endif
7590 #endif
7591
7592 #ifdef __OPTIMIZE__
7593 extern __inline unsigned
7594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7595 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7596 {
7597   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7598 }
7599
7600 extern __inline int
7601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7602 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7603 {
7604   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7605 }
7606
7607 extern __inline int
7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7609 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7610 {
7611   return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7612 }
7613
7614 extern __inline unsigned
7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7616 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7617 {
7618   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7619 }
7620
7621 extern __inline int
7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7624 {
7625   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7626 }
7627
7628 extern __inline int
7629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7630 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7631 {
7632   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7633 }
7634 #else
7635 #define _mm_cvt_roundsd_u32(A, B)   \
7636     ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7637
7638 #define _mm_cvt_roundsd_si32(A, B)   \
7639     ((int)__builtin_ia32_vcvtsd2si32(A, B))
7640
7641 #define _mm_cvt_roundsd_i32(A, B)   \
7642     ((int)__builtin_ia32_vcvtsd2si32(A, B))
7643
7644 #define _mm_cvtt_roundsd_u32(A, B)   \
7645     ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7646
7647 #define _mm_cvtt_roundsd_si32(A, B)   \
7648     ((int)__builtin_ia32_vcvttsd2si32(A, B))
7649
7650 #define _mm_cvtt_roundsd_i32(A, B)   \
7651     ((int)__builtin_ia32_vcvttsd2si32(A, B))
7652 #endif
7653
7654 extern __inline __m512d
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm512_movedup_pd (__m512d __A)
7657 {
7658   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7659                                                    (__v8df)
7660                                                    _mm512_undefined_pd (),
7661                                                    (__mmask8) -1);
7662 }
7663
7664 extern __inline __m512d
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7667 {
7668   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7669                                                    (__v8df) __W,
7670                                                    (__mmask8) __U);
7671 }
7672
7673 extern __inline __m512d
7674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7675 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7676 {
7677   return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7678                                                    (__v8df)
7679                                                    _mm512_setzero_pd (),
7680                                                    (__mmask8) __U);
7681 }
7682
7683 extern __inline __m512d
7684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7685 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7686 {
7687   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7688                                                     (__v8df) __B,
7689                                                     (__v8df)
7690                                                     _mm512_undefined_pd (),
7691                                                     (__mmask8) -1);
7692 }
7693
7694 extern __inline __m512d
7695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7696 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7697 {
7698   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7699                                                     (__v8df) __B,
7700                                                     (__v8df) __W,
7701                                                     (__mmask8) __U);
7702 }
7703
7704 extern __inline __m512d
7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7706 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7707 {
7708   return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7709                                                     (__v8df) __B,
7710                                                     (__v8df)
7711                                                     _mm512_setzero_pd (),
7712                                                     (__mmask8) __U);
7713 }
7714
7715 extern __inline __m512d
7716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7717 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7718 {
7719   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7720                                                     (__v8df) __B,
7721                                                     (__v8df)
7722                                                     _mm512_undefined_pd (),
7723                                                     (__mmask8) -1);
7724 }
7725
7726 extern __inline __m512d
7727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7728 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7729 {
7730   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7731                                                     (__v8df) __B,
7732                                                     (__v8df) __W,
7733                                                     (__mmask8) __U);
7734 }
7735
7736 extern __inline __m512d
7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7738 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7739 {
7740   return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7741                                                     (__v8df) __B,
7742                                                     (__v8df)
7743                                                     _mm512_setzero_pd (),
7744                                                     (__mmask8) __U);
7745 }
7746
7747 extern __inline __m512
7748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7749 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7750 {
7751   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7752                                                    (__v16sf) __B,
7753                                                    (__v16sf)
7754                                                    _mm512_undefined_ps (),
7755                                                    (__mmask16) -1);
7756 }
7757
7758 extern __inline __m512
7759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7760 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7761 {
7762   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7763                                                    (__v16sf) __B,
7764                                                    (__v16sf) __W,
7765                                                    (__mmask16) __U);
7766 }
7767
7768 extern __inline __m512
7769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7770 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7771 {
7772   return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7773                                                    (__v16sf) __B,
7774                                                    (__v16sf)
7775                                                    _mm512_setzero_ps (),
7776                                                    (__mmask16) __U);
7777 }
7778
7779 #ifdef __OPTIMIZE__
7780 extern __inline __m512d
7781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7782 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7783 {
7784   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7785                                                     (__v8df)
7786                                                     _mm512_undefined_pd (),
7787                                                     (__mmask8) -1, __R);
7788 }
7789
7790 extern __inline __m512d
7791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7792 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7793                             const int __R)
7794 {
7795   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7796                                                     (__v8df) __W,
7797                                                     (__mmask8) __U, __R);
7798 }
7799
7800 extern __inline __m512d
7801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7802 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7803 {
7804   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7805                                                     (__v8df)
7806                                                     _mm512_setzero_pd (),
7807                                                     (__mmask8) __U, __R);
7808 }
7809
7810 extern __inline __m512
7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7812 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7813 {
7814   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7815                                                     (__v16sf)
7816                                                     _mm512_undefined_ps (),
7817                                                     (__mmask16) -1, __R);
7818 }
7819
7820 extern __inline __m512
7821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7822 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7823                             const int __R)
7824 {
7825   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7826                                                     (__v16sf) __W,
7827                                                     (__mmask16) __U, __R);
7828 }
7829
7830 extern __inline __m512
7831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7832 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7833 {
7834   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7835                                                     (__v16sf)
7836                                                     _mm512_setzero_ps (),
7837                                                     (__mmask16) __U, __R);
7838 }
7839
7840 extern __inline __m256i
7841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7842 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7843 {
7844   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7845                                                      __I,
7846                                                      (__v16hi)
7847                                                      _mm256_undefined_si256 (),
7848                                                      -1);
7849 }
7850
7851 extern __inline __m256i
7852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7853 _mm512_cvtps_ph (__m512 __A, const int __I)
7854 {
7855   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7856                                                      __I,
7857                                                      (__v16hi)
7858                                                      _mm256_undefined_si256 (),
7859                                                      -1);
7860 }
7861
7862 extern __inline __m256i
7863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7864 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7865                             const int __I)
7866 {
7867   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7868                                                      __I,
7869                                                      (__v16hi) __U,
7870                                                      (__mmask16) __W);
7871 }
7872
7873 extern __inline __m256i
7874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7875 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7876 {
7877   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7878                                                      __I,
7879                                                      (__v16hi) __U,
7880                                                      (__mmask16) __W);
7881 }
7882
7883 extern __inline __m256i
7884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7885 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7886 {
7887   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7888                                                      __I,
7889                                                      (__v16hi)
7890                                                      _mm256_setzero_si256 (),
7891                                                      (__mmask16) __W);
7892 }
7893
7894 extern __inline __m256i
7895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7896 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7897 {
7898   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7899                                                      __I,
7900                                                      (__v16hi)
7901                                                      _mm256_setzero_si256 (),
7902                                                      (__mmask16) __W);
7903 }
7904 #else
7905 #define _mm512_cvt_roundps_pd(A, B)              \
7906     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7907
7908 #define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
7909     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7910
7911 #define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
7912     (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7913
7914 #define _mm512_cvt_roundph_ps(A, B)              \
7915     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7916
7917 #define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
7918     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7919
7920 #define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
7921     (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7922
7923 #define _mm512_cvt_roundps_ph(A, I)                                              \
7924   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7925     (__v16hi)_mm256_undefined_si256 (), -1))
7926 #define _mm512_cvtps_ph(A, I)                                            \
7927   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7928     (__v16hi)_mm256_undefined_si256 (), -1))
7929 #define _mm512_mask_cvt_roundps_ph(U, W, A, I)                           \
7930   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7931     (__v16hi)(__m256i)(U), (__mmask16) (W)))
7932 #define _mm512_mask_cvtps_ph(U, W, A, I)                                 \
7933   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7934     (__v16hi)(__m256i)(U), (__mmask16) (W)))
7935 #define _mm512_maskz_cvt_roundps_ph(W, A, I)                                     \
7936   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7937     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7938 #define _mm512_maskz_cvtps_ph(W, A, I)                                   \
7939   ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7940     (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7941 #endif
7942
7943 #ifdef __OPTIMIZE__
7944 extern __inline __m256
7945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7946 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7947 {
7948   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7949                                                    (__v8sf)
7950                                                    _mm256_undefined_ps (),
7951                                                    (__mmask8) -1, __R);
7952 }
7953
7954 extern __inline __m256
7955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7956 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7957                             const int __R)
7958 {
7959   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7960                                                    (__v8sf) __W,
7961                                                    (__mmask8) __U, __R);
7962 }
7963
7964 extern __inline __m256
7965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7966 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7967 {
7968   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7969                                                    (__v8sf)
7970                                                    _mm256_setzero_ps (),
7971                                                    (__mmask8) __U, __R);
7972 }
7973
7974 extern __inline __m128
7975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7976 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7977 {
7978   return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7979                                                  (__v2df) __B,
7980                                                  __R);
7981 }
7982
7983 extern __inline __m128d
7984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7986 {
7987   return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7988                                                   (__v4sf) __B,
7989                                                   __R);
7990 }
7991 #else
7992 #define _mm512_cvt_roundpd_ps(A, B)              \
7993     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7994
7995 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
7996     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7997
7998 #define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
7999     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8000
8001 #define _mm_cvt_roundsd_ss(A, B, C)              \
8002     (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8003
8004 #define _mm_cvt_roundss_sd(A, B, C)              \
8005     (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8006 #endif
8007
8008 extern __inline void
8009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8011 {
8012   __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8013 }
8014
8015 extern __inline void
8016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017 _mm512_stream_ps (float *__P, __m512 __A)
8018 {
8019   __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8020 }
8021
8022 extern __inline void
8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8024 _mm512_stream_pd (double *__P, __m512d __A)
8025 {
8026   __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8027 }
8028
8029 extern __inline __m512i
8030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031 _mm512_stream_load_si512 (void *__P)
8032 {
8033   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8034 }
8035
8036 /* Constants for mantissa extraction */
8037 typedef enum
8038 {
8039   _MM_MANT_NORM_1_2,            /* interval [1, 2)      */
8040   _MM_MANT_NORM_p5_2,           /* interval [0.5, 2)    */
8041   _MM_MANT_NORM_p5_1,           /* interval [0.5, 1)    */
8042   _MM_MANT_NORM_p75_1p5         /* interval [0.75, 1.5) */
8043 } _MM_MANTISSA_NORM_ENUM;
8044
8045 typedef enum
8046 {
8047   _MM_MANT_SIGN_src,            /* sign = sign(SRC)     */
8048   _MM_MANT_SIGN_zero,           /* sign = 0             */
8049   _MM_MANT_SIGN_nan             /* DEST = NaN if sign(SRC) = 1 */
8050 } _MM_MANTISSA_SIGN_ENUM;
8051
8052 #ifdef __OPTIMIZE__
8053 extern __inline __m128
8054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8055 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8056 {
8057   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8058                                                     (__v4sf) __B,
8059                                                     __R);
8060 }
8061
8062 extern __inline __m128d
8063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8064 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8065 {
8066   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8067                                                      (__v2df) __B,
8068                                                      __R);
8069 }
8070
8071 extern __inline __m512
8072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8073 _mm512_getexp_round_ps (__m512 __A, const int __R)
8074 {
8075   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8076                                                    (__v16sf)
8077                                                    _mm512_undefined_ps (),
8078                                                    (__mmask16) -1, __R);
8079 }
8080
8081 extern __inline __m512
8082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8083 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8084                              const int __R)
8085 {
8086   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8087                                                    (__v16sf) __W,
8088                                                    (__mmask16) __U, __R);
8089 }
8090
8091 extern __inline __m512
8092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8093 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8094 {
8095   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8096                                                    (__v16sf)
8097                                                    _mm512_setzero_ps (),
8098                                                    (__mmask16) __U, __R);
8099 }
8100
8101 extern __inline __m512d
8102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8103 _mm512_getexp_round_pd (__m512d __A, const int __R)
8104 {
8105   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8106                                                     (__v8df)
8107                                                     _mm512_undefined_pd (),
8108                                                     (__mmask8) -1, __R);
8109 }
8110
8111 extern __inline __m512d
8112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8114                              const int __R)
8115 {
8116   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8117                                                     (__v8df) __W,
8118                                                     (__mmask8) __U, __R);
8119 }
8120
8121 extern __inline __m512d
8122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8123 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8124 {
8125   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8126                                                     (__v8df)
8127                                                     _mm512_setzero_pd (),
8128                                                     (__mmask8) __U, __R);
8129 }
8130
8131 extern __inline __m512d
8132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8133 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8134                          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8135 {
8136   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8137                                                      (__C << 2) | __B,
8138                                                      _mm512_undefined_pd (),
8139                                                      (__mmask8) -1, __R);
8140 }
8141
8142 extern __inline __m512d
8143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8144 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8145                               _MM_MANTISSA_NORM_ENUM __B,
8146                               _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8147 {
8148   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8149                                                      (__C << 2) | __B,
8150                                                      (__v8df) __W, __U,
8151                                                      __R);
8152 }
8153
8154 extern __inline __m512d
8155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8156 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8157                                _MM_MANTISSA_NORM_ENUM __B,
8158                                _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8159 {
8160   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8161                                                      (__C << 2) | __B,
8162                                                      (__v8df)
8163                                                      _mm512_setzero_pd (),
8164                                                      __U, __R);
8165 }
8166
8167 extern __inline __m512
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8170                          _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8171 {
8172   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8173                                                     (__C << 2) | __B,
8174                                                     _mm512_undefined_ps (),
8175                                                     (__mmask16) -1, __R);
8176 }
8177
8178 extern __inline __m512
8179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8181                               _MM_MANTISSA_NORM_ENUM __B,
8182                               _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8183 {
8184   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8185                                                     (__C << 2) | __B,
8186                                                     (__v16sf) __W, __U,
8187                                                     __R);
8188 }
8189
8190 extern __inline __m512
8191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8193                                _MM_MANTISSA_NORM_ENUM __B,
8194                                _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8195 {
8196   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8197                                                     (__C << 2) | __B,
8198                                                     (__v16sf)
8199                                                     _mm512_setzero_ps (),
8200                                                     __U, __R);
8201 }
8202
8203 extern __inline __m128d
8204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8205 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8206                       _MM_MANTISSA_NORM_ENUM __C,
8207                       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8208 {
8209   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8210                                                   (__v2df) __B,
8211                                                   (__D << 2) | __C,
8212                                                    __R);
8213 }
8214
8215 extern __inline __m128
8216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8217 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8218                       _MM_MANTISSA_NORM_ENUM __C,
8219                       _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8220 {
8221   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8222                                                   (__v4sf) __B,
8223                                                   (__D << 2) | __C,
8224                                                   __R);
8225 }
8226
8227 #else
8228 #define _mm512_getmant_round_pd(X, B, C, R)                                                  \
8229   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8230                                               (int)(((C)<<2) | (B)),                \
8231                                               (__v8df)(__m512d)_mm512_undefined_pd(), \
8232                                               (__mmask8)-1,\
8233                                               (R)))
8234
8235 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
8236   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8237                                               (int)(((C)<<2) | (B)),                \
8238                                               (__v8df)(__m512d)(W),                 \
8239                                               (__mmask8)(U),\
8240                                               (R)))
8241
8242 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
8243   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
8244                                               (int)(((C)<<2) | (B)),                \
8245                                               (__v8df)(__m512d)_mm512_setzero_pd(), \
8246                                               (__mmask8)(U),\
8247                                               (R)))
8248 #define _mm512_getmant_round_ps(X, B, C, R)                                                  \
8249   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8250                                              (int)(((C)<<2) | (B)),                 \
8251                                              (__v16sf)(__m512)_mm512_undefined_ps(), \
8252                                              (__mmask16)-1,\
8253                                              (R)))
8254
8255 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
8256   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8257                                              (int)(((C)<<2) | (B)),                 \
8258                                              (__v16sf)(__m512)(W),                  \
8259                                              (__mmask16)(U),\
8260                                              (R)))
8261
8262 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
8263   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
8264                                              (int)(((C)<<2) | (B)),                 \
8265                                              (__v16sf)(__m512)_mm512_setzero_ps(),  \
8266                                              (__mmask16)(U),\
8267                                              (R)))
8268 #define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
8269   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
8270                                             (__v2df)(__m128d)(Y),       \
8271                                             (int)(((D)<<2) | (C)),      \
8272                                             (R)))
8273
8274 #define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
8275   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
8276                                            (__v4sf)(__m128)(Y),         \
8277                                            (int)(((D)<<2) | (C)),       \
8278                                            (R)))
8279
8280 #define _mm_getexp_round_ss(A, B, R)                                                  \
8281   ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8282
8283 #define _mm_getexp_round_sd(A, B, R)                                                   \
8284   ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8285
8286 #define _mm512_getexp_round_ps(A, R)                                            \
8287   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8288   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8289
8290 #define _mm512_mask_getexp_round_ps(W, U, A, R)                                 \
8291   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8292   (__v16sf)(__m512)(W), (__mmask16)(U), R))
8293
8294 #define _mm512_maskz_getexp_round_ps(U, A, R)                                   \
8295   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
8296   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8297
8298 #define _mm512_getexp_round_pd(A, R)                                            \
8299   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8300   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8301
8302 #define _mm512_mask_getexp_round_pd(W, U, A, R)                                 \
8303   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8304   (__v8df)(__m512d)(W), (__mmask8)(U), R))
8305
8306 #define _mm512_maskz_getexp_round_pd(U, A, R)                                   \
8307   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
8308   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8309 #endif
8310
8311 #ifdef __OPTIMIZE__
8312 extern __inline __m512
8313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8315 {
8316   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8317                                                   (__v16sf)
8318                                                   _mm512_undefined_ps (),
8319                                                   -1, __R);
8320 }
8321
8322 extern __inline __m512
8323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8325                                  const int __imm, const int __R)
8326 {
8327   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8328                                                   (__v16sf) __A,
8329                                                   (__mmask16) __B, __R);
8330 }
8331
8332 extern __inline __m512
8333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8335                                   const int __imm, const int __R)
8336 {
8337   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8338                                                   __imm,
8339                                                   (__v16sf)
8340                                                   _mm512_setzero_ps (),
8341                                                   (__mmask16) __A, __R);
8342 }
8343
8344 extern __inline __m512d
8345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8346 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8347 {
8348   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8349                                                    (__v8df)
8350                                                    _mm512_undefined_pd (),
8351                                                    -1, __R);
8352 }
8353
8354 extern __inline __m512d
8355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8356 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8357                                  __m512d __C, const int __imm, const int __R)
8358 {
8359   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8360                                                    (__v8df) __A,
8361                                                    (__mmask8) __B, __R);
8362 }
8363
8364 extern __inline __m512d
8365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8366 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8367                                   const int __imm, const int __R)
8368 {
8369   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8370                                                    __imm,
8371                                                    (__v8df)
8372                                                    _mm512_setzero_pd (),
8373                                                    (__mmask8) __A, __R);
8374 }
8375
8376 extern __inline __m128
8377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8378 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8379 {
8380   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8381                                                    (__v4sf) __B, __imm, __R);
8382 }
8383
8384 extern __inline __m128d
8385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8387                          const int __R)
8388 {
8389   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8390                                                     (__v2df) __B, __imm, __R);
8391 }
8392
8393 #else
8394 #define _mm512_roundscale_round_ps(A, B, R) \
8395   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8396     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8397 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R)                          \
8398   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),       \
8399                                             (int)(D),                   \
8400                                             (__v16sf)(__m512)(A),       \
8401                                             (__mmask16)(B), R))
8402 #define _mm512_maskz_roundscale_round_ps(A, B, C, R)                            \
8403   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),       \
8404                                             (int)(C),                   \
8405                                             (__v16sf)_mm512_setzero_ps(),\
8406                                             (__mmask16)(A), R))
8407 #define _mm512_roundscale_round_pd(A, B, R) \
8408   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8409     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8410 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R)                          \
8411   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),      \
8412                                              (int)(D),                  \
8413                                              (__v8df)(__m512d)(A),      \
8414                                              (__mmask8)(B), R))
8415 #define _mm512_maskz_roundscale_round_pd(A, B, C, R)                            \
8416   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),      \
8417                                              (int)(C),                  \
8418                                              (__v8df)_mm512_setzero_pd(),\
8419                                              (__mmask8)(A), R))
8420 #define _mm_roundscale_round_ss(A, B, C, R)                                     \
8421   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),       \
8422     (__v4sf)(__m128)(B), (int)(C), R))
8423 #define _mm_roundscale_round_sd(A, B, C, R)                                     \
8424   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),     \
8425     (__v2df)(__m128d)(B), (int)(C), R))
8426 #endif
8427
8428 extern __inline __m512
8429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8430 _mm512_floor_ps (__m512 __A)
8431 {
8432   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8433                                                   _MM_FROUND_FLOOR,
8434                                                   (__v16sf) __A, -1,
8435                                                   _MM_FROUND_CUR_DIRECTION);
8436 }
8437
8438 extern __inline __m512d
8439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8440 _mm512_floor_pd (__m512d __A)
8441 {
8442   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8443                                                    _MM_FROUND_FLOOR,
8444                                                    (__v8df) __A, -1,
8445                                                    _MM_FROUND_CUR_DIRECTION);
8446 }
8447
8448 extern __inline __m512
8449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8450 _mm512_ceil_ps (__m512 __A)
8451 {
8452   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8453                                                   _MM_FROUND_CEIL,
8454                                                   (__v16sf) __A, -1,
8455                                                   _MM_FROUND_CUR_DIRECTION);
8456 }
8457
8458 extern __inline __m512d
8459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8460 _mm512_ceil_pd (__m512d __A)
8461 {
8462   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8463                                                    _MM_FROUND_CEIL,
8464                                                    (__v8df) __A, -1,
8465                                                    _MM_FROUND_CUR_DIRECTION);
8466 }
8467
8468 extern __inline __m512
8469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8470 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8471 {
8472   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8473                                                   _MM_FROUND_FLOOR,
8474                                                   (__v16sf) __W, __U,
8475                                                   _MM_FROUND_CUR_DIRECTION);
8476 }
8477
8478 extern __inline __m512d
8479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8480 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8481 {
8482   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8483                                                    _MM_FROUND_FLOOR,
8484                                                    (__v8df) __W, __U,
8485                                                    _MM_FROUND_CUR_DIRECTION);
8486 }
8487
8488 extern __inline __m512
8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8490 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8491 {
8492   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8493                                                   _MM_FROUND_CEIL,
8494                                                   (__v16sf) __W, __U,
8495                                                   _MM_FROUND_CUR_DIRECTION);
8496 }
8497
8498 extern __inline __m512d
8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8501 {
8502   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8503                                                    _MM_FROUND_CEIL,
8504                                                    (__v8df) __W, __U,
8505                                                    _MM_FROUND_CUR_DIRECTION);
8506 }
8507
8508 #ifdef __OPTIMIZE__
8509 extern __inline __m512i
8510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8512 {
8513   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8514                                                   (__v16si) __B, __imm,
8515                                                   (__v16si)
8516                                                   _mm512_undefined_epi32 (),
8517                                                   (__mmask16) -1);
8518 }
8519
8520 extern __inline __m512i
8521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8523                           __m512i __B, const int __imm)
8524 {
8525   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8526                                                   (__v16si) __B, __imm,
8527                                                   (__v16si) __W,
8528                                                   (__mmask16) __U);
8529 }
8530
8531 extern __inline __m512i
8532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8533 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8534                            const int __imm)
8535 {
8536   return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8537                                                   (__v16si) __B, __imm,
8538                                                   (__v16si)
8539                                                   _mm512_setzero_si512 (),
8540                                                   (__mmask16) __U);
8541 }
8542
8543 extern __inline __m512i
8544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8545 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8546 {
8547   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8548                                                   (__v8di) __B, __imm,
8549                                                   (__v8di)
8550                                                   _mm512_undefined_epi32 (),
8551                                                   (__mmask8) -1);
8552 }
8553
8554 extern __inline __m512i
8555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8556 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8557                           __m512i __B, const int __imm)
8558 {
8559   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8560                                                   (__v8di) __B, __imm,
8561                                                   (__v8di) __W,
8562                                                   (__mmask8) __U);
8563 }
8564
8565 extern __inline __m512i
8566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8567 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8568                            const int __imm)
8569 {
8570   return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8571                                                   (__v8di) __B, __imm,
8572                                                   (__v8di)
8573                                                   _mm512_setzero_si512 (),
8574                                                   (__mmask8) __U);
8575 }
8576 #else
8577 #define _mm512_alignr_epi32(X, Y, C)                                        \
8578     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8579         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
8580         (__mmask16)-1))
8581
8582 #define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
8583     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8584         (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
8585         (__mmask16)(U)))
8586
8587 #define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
8588     ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
8589         (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8590         (__mmask16)(U)))
8591
8592 #define _mm512_alignr_epi64(X, Y, C)                                        \
8593     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8594         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (),  \
8595         (__mmask8)-1))
8596
8597 #define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
8598     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8599         (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8600
8601 #define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
8602     ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
8603         (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8604         (__mmask8)(U)))
8605 #endif
8606
8607 extern __inline __mmask16
8608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8609 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8610 {
8611   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8612                                                      (__v16si) __B,
8613                                                      (__mmask16) -1);
8614 }
8615
8616 extern __inline __mmask16
8617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8618 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8619 {
8620   return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8621                                                      (__v16si) __B, __U);
8622 }
8623
8624 extern __inline __mmask8
8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8626 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8627 {
8628   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8629                                                     (__v8di) __B, __U);
8630 }
8631
8632 extern __inline __mmask8
8633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8634 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8635 {
8636   return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8637                                                     (__v8di) __B,
8638                                                     (__mmask8) -1);
8639 }
8640
8641 extern __inline __mmask16
8642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8643 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8644 {
8645   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8646                                                      (__v16si) __B,
8647                                                      (__mmask16) -1);
8648 }
8649
8650 extern __inline __mmask16
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8653 {
8654   return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8655                                                      (__v16si) __B, __U);
8656 }
8657
8658 extern __inline __mmask8
8659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8660 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8661 {
8662   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8663                                                     (__v8di) __B, __U);
8664 }
8665
8666 extern __inline __mmask8
8667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8668 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8669 {
8670   return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8671                                                     (__v8di) __B,
8672                                                     (__mmask8) -1);
8673 }
8674
8675 extern __inline __mmask16
8676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8677 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8678 {
8679   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8680                                                     (__v16si) __Y, 5,
8681                                                     (__mmask16) -1);
8682 }
8683
8684 extern __inline __mmask16
8685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8686 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8687 {
8688   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8689                                                     (__v16si) __Y, 5,
8690                                                     (__mmask16) __M);
8691 }
8692
8693 extern __inline __mmask16
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8696 {
8697   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8698                                                     (__v16si) __Y, 5,
8699                                                     (__mmask16) __M);
8700 }
8701
8702 extern __inline __mmask16
8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8705 {
8706   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8707                                                     (__v16si) __Y, 5,
8708                                                     (__mmask16) -1);
8709 }
8710
8711 extern __inline __mmask8
8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8713 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8714 {
8715   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8716                                                     (__v8di) __Y, 5,
8717                                                     (__mmask8) __M);
8718 }
8719
8720 extern __inline __mmask8
8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8722 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8723 {
8724   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8725                                                     (__v8di) __Y, 5,
8726                                                     (__mmask8) -1);
8727 }
8728
8729 extern __inline __mmask8
8730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8731 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8732 {
8733   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8734                                                     (__v8di) __Y, 5,
8735                                                     (__mmask8) __M);
8736 }
8737
8738 extern __inline __mmask8
8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8741 {
8742   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8743                                                     (__v8di) __Y, 5,
8744                                                     (__mmask8) -1);
8745 }
8746
8747 extern __inline __mmask16
8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8750 {
8751   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8752                                                     (__v16si) __Y, 2,
8753                                                     (__mmask16) __M);
8754 }
8755
8756 extern __inline __mmask16
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8759 {
8760   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8761                                                     (__v16si) __Y, 2,
8762                                                     (__mmask16) -1);
8763 }
8764
8765 extern __inline __mmask16
8766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8768 {
8769   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8770                                                     (__v16si) __Y, 2,
8771                                                     (__mmask16) __M);
8772 }
8773
8774 extern __inline __mmask16
8775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8776 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8777 {
8778   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8779                                                     (__v16si) __Y, 2,
8780                                                     (__mmask16) -1);
8781 }
8782
8783 extern __inline __mmask8
8784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8785 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8786 {
8787   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8788                                                     (__v8di) __Y, 2,
8789                                                     (__mmask8) __M);
8790 }
8791
8792 extern __inline __mmask8
8793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8794 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8795 {
8796   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8797                                                     (__v8di) __Y, 2,
8798                                                     (__mmask8) -1);
8799 }
8800
8801 extern __inline __mmask8
8802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8803 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8804 {
8805   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8806                                                     (__v8di) __Y, 2,
8807                                                     (__mmask8) __M);
8808 }
8809
8810 extern __inline __mmask8
8811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8812 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8813 {
8814   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8815                                                     (__v8di) __Y, 2,
8816                                                     (__mmask8) -1);
8817 }
8818
8819 extern __inline __mmask16
8820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8821 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8822 {
8823   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8824                                                     (__v16si) __Y, 1,
8825                                                     (__mmask16) __M);
8826 }
8827
8828 extern __inline __mmask16
8829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8830 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8831 {
8832   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8833                                                     (__v16si) __Y, 1,
8834                                                     (__mmask16) -1);
8835 }
8836
8837 extern __inline __mmask16
8838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8839 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8840 {
8841   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8842                                                     (__v16si) __Y, 1,
8843                                                     (__mmask16) __M);
8844 }
8845
8846 extern __inline __mmask16
8847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8848 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8849 {
8850   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8851                                                     (__v16si) __Y, 1,
8852                                                     (__mmask16) -1);
8853 }
8854
8855 extern __inline __mmask8
8856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8858 {
8859   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8860                                                     (__v8di) __Y, 1,
8861                                                     (__mmask8) __M);
8862 }
8863
8864 extern __inline __mmask8
8865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8867 {
8868   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8869                                                     (__v8di) __Y, 1,
8870                                                     (__mmask8) -1);
8871 }
8872
8873 extern __inline __mmask8
8874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8875 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8876 {
8877   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8878                                                     (__v8di) __Y, 1,
8879                                                     (__mmask8) __M);
8880 }
8881
8882 extern __inline __mmask8
8883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8884 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8885 {
8886   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8887                                                     (__v8di) __Y, 1,
8888                                                     (__mmask8) -1);
8889 }
8890
8891 extern __inline __mmask16
8892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8893 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8894 {
8895   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8896                                                     (__v16si) __Y, 4,
8897                                                     (__mmask16) -1);
8898 }
8899
8900 extern __inline __mmask16
8901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8902 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8903 {
8904   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8905                                                     (__v16si) __Y, 4,
8906                                                     (__mmask16) __M);
8907 }
8908
8909 extern __inline __mmask16
8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8911 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8912 {
8913   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8914                                                     (__v16si) __Y, 4,
8915                                                     (__mmask16) __M);
8916 }
8917
8918 extern __inline __mmask16
8919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8920 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8921 {
8922   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8923                                                     (__v16si) __Y, 4,
8924                                                     (__mmask16) -1);
8925 }
8926
8927 extern __inline __mmask8
8928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8929 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8930 {
8931   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8932                                                     (__v8di) __Y, 4,
8933                                                     (__mmask8) __M);
8934 }
8935
8936 extern __inline __mmask8
8937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8938 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8939 {
8940   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8941                                                     (__v8di) __Y, 4,
8942                                                     (__mmask8) -1);
8943 }
8944
8945 extern __inline __mmask8
8946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8948 {
8949   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8950                                                     (__v8di) __Y, 4,
8951                                                     (__mmask8) __M);
8952 }
8953
8954 extern __inline __mmask8
8955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8956 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8957 {
8958   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8959                                                     (__v8di) __Y, 4,
8960                                                     (__mmask8) -1);
8961 }
8962
8963 #define _MM_CMPINT_EQ       0x0
8964 #define _MM_CMPINT_LT       0x1
8965 #define _MM_CMPINT_LE       0x2
8966 #define _MM_CMPINT_UNUSED   0x3
8967 #define _MM_CMPINT_NE       0x4
8968 #define _MM_CMPINT_NLT      0x5
8969 #define _MM_CMPINT_GE       0x5
8970 #define _MM_CMPINT_NLE      0x6
8971 #define _MM_CMPINT_GT       0x6
8972
8973 #ifdef __OPTIMIZE__
8974 extern __inline __mmask8
8975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8976 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8977 {
8978   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8979                                                  (__v8di) __Y, __P,
8980                                                  (__mmask8) -1);
8981 }
8982
8983 extern __inline __mmask16
8984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8985 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8986 {
8987   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8988                                                   (__v16si) __Y, __P,
8989                                                   (__mmask16) -1);
8990 }
8991
8992 extern __inline __mmask8
8993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8994 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8995 {
8996   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8997                                                   (__v8di) __Y, __P,
8998                                                   (__mmask8) -1);
8999 }
9000
9001 extern __inline __mmask16
9002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9003 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9004 {
9005   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9006                                                    (__v16si) __Y, __P,
9007                                                    (__mmask16) -1);
9008 }
9009
9010 extern __inline __mmask8
9011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9012 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9013                           const int __R)
9014 {
9015   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9016                                                   (__v8df) __Y, __P,
9017                                                   (__mmask8) -1, __R);
9018 }
9019
9020 extern __inline __mmask16
9021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9022 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9023 {
9024   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9025                                                    (__v16sf) __Y, __P,
9026                                                    (__mmask16) -1, __R);
9027 }
9028
9029 extern __inline __mmask8
9030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9031 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9032                             const int __P)
9033 {
9034   return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9035                                                  (__v8di) __Y, __P,
9036                                                  (__mmask8) __U);
9037 }
9038
9039 extern __inline __mmask16
9040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9041 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9042                             const int __P)
9043 {
9044   return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9045                                                   (__v16si) __Y, __P,
9046                                                   (__mmask16) __U);
9047 }
9048
9049 extern __inline __mmask8
9050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9051 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9052                             const int __P)
9053 {
9054   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9055                                                   (__v8di) __Y, __P,
9056                                                   (__mmask8) __U);
9057 }
9058
9059 extern __inline __mmask16
9060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9061 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9062                             const int __P)
9063 {
9064   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9065                                                    (__v16si) __Y, __P,
9066                                                    (__mmask16) __U);
9067 }
9068
9069 extern __inline __mmask8
9070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9071 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9072                                const int __P, const int __R)
9073 {
9074   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9075                                                   (__v8df) __Y, __P,
9076                                                   (__mmask8) __U, __R);
9077 }
9078
9079 extern __inline __mmask16
9080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9081 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9082                                const int __P, const int __R)
9083 {
9084   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9085                                                    (__v16sf) __Y, __P,
9086                                                    (__mmask16) __U, __R);
9087 }
9088
9089 extern __inline __mmask8
9090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9091 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9092 {
9093   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9094                                                (__v2df) __Y, __P,
9095                                                (__mmask8) -1, __R);
9096 }
9097
9098 extern __inline __mmask8
9099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9100 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9101                             const int __P, const int __R)
9102 {
9103   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9104                                                (__v2df) __Y, __P,
9105                                                (__mmask8) __M, __R);
9106 }
9107
9108 extern __inline __mmask8
9109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9110 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9111 {
9112   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9113                                                (__v4sf) __Y, __P,
9114                                                (__mmask8) -1, __R);
9115 }
9116
9117 extern __inline __mmask8
9118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9119 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9120                             const int __P, const int __R)
9121 {
9122   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9123                                                (__v4sf) __Y, __P,
9124                                                (__mmask8) __M, __R);
9125 }
9126
9127 #else
9128 #define _mm512_cmp_epi64_mask(X, Y, P)                                  \
9129   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),        \
9130                                            (__v8di)(__m512i)(Y), (int)(P),\
9131                                            (__mmask8)-1))
9132
9133 #define _mm512_cmp_epi32_mask(X, Y, P)                                  \
9134   ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),      \
9135                                             (__v16si)(__m512i)(Y), (int)(P), \
9136                                             (__mmask16)-1))
9137
9138 #define _mm512_cmp_epu64_mask(X, Y, P)                                  \
9139   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),       \
9140                                             (__v8di)(__m512i)(Y), (int)(P),\
9141                                             (__mmask8)-1))
9142
9143 #define _mm512_cmp_epu32_mask(X, Y, P)                                  \
9144   ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),     \
9145                                              (__v16si)(__m512i)(Y), (int)(P), \
9146                                              (__mmask16)-1))
9147
9148 #define _mm512_cmp_round_pd_mask(X, Y, P, R)                            \
9149   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
9150                                             (__v8df)(__m512d)(Y), (int)(P),\
9151                                             (__mmask8)-1, R))
9152
9153 #define _mm512_cmp_round_ps_mask(X, Y, P, R)                            \
9154   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
9155                                              (__v16sf)(__m512)(Y), (int)(P),\
9156                                              (__mmask16)-1, R))
9157
9158 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P)                          \
9159   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),        \
9160                                            (__v8di)(__m512i)(Y), (int)(P),\
9161                                            (__mmask8)M))
9162
9163 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P)                          \
9164   ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),      \
9165                                             (__v16si)(__m512i)(Y), (int)(P), \
9166                                             (__mmask16)M))
9167
9168 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P)                          \
9169   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),       \
9170                                             (__v8di)(__m512i)(Y), (int)(P),\
9171                                             (__mmask8)M))
9172
9173 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P)                          \
9174   ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),     \
9175                                              (__v16si)(__m512i)(Y), (int)(P), \
9176                                              (__mmask16)M))
9177
9178 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)                    \
9179   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
9180                                             (__v8df)(__m512d)(Y), (int)(P),\
9181                                             (__mmask8)M, R))
9182
9183 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)                    \
9184   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
9185                                              (__v16sf)(__m512)(Y), (int)(P),\
9186                                              (__mmask16)M, R))
9187
9188 #define _mm_cmp_round_sd_mask(X, Y, P, R)                               \
9189   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
9190                                          (__v2df)(__m128d)(Y), (int)(P),\
9191                                          (__mmask8)-1, R))
9192
9193 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)                       \
9194   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
9195                                          (__v2df)(__m128d)(Y), (int)(P),\
9196                                          (M), R))
9197
9198 #define _mm_cmp_round_ss_mask(X, Y, P, R)                               \
9199   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
9200                                          (__v4sf)(__m128)(Y), (int)(P), \
9201                                          (__mmask8)-1, R))
9202
9203 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)                       \
9204   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
9205                                          (__v4sf)(__m128)(Y), (int)(P), \
9206                                          (M), R))
9207 #endif
9208
9209 #ifdef __OPTIMIZE__
9210 extern __inline __m512
9211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9212 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9213 {
9214   __m512 v1_old = _mm512_undefined_ps ();
9215   __mmask16 mask = 0xFFFF;
9216
9217   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9218                                                 __addr,
9219                                                 (__v16si) __index,
9220                                                 mask, __scale);
9221 }
9222
9223 extern __inline __m512
9224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9226                           __m512i __index, float const *__addr, int __scale)
9227 {
9228   return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9229                                                 __addr,
9230                                                 (__v16si) __index,
9231                                                 __mask, __scale);
9232 }
9233
9234 extern __inline __m512d
9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9237 {
9238   __m512d v1_old = _mm512_undefined_pd ();
9239   __mmask8 mask = 0xFF;
9240
9241   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9242                                                 __addr,
9243                                                 (__v8si) __index, mask,
9244                                                 __scale);
9245 }
9246
9247 extern __inline __m512d
9248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9249 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9250                           __m256i __index, double const *__addr, int __scale)
9251 {
9252   return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9253                                                 __addr,
9254                                                 (__v8si) __index,
9255                                                 __mask, __scale);
9256 }
9257
9258 extern __inline __m256
9259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9260 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9261 {
9262   __m256 v1_old = _mm256_undefined_ps ();
9263   __mmask8 mask = 0xFF;
9264
9265   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9266                                                 __addr,
9267                                                 (__v8di) __index, mask,
9268                                                 __scale);
9269 }
9270
9271 extern __inline __m256
9272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9273 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9274                           __m512i __index, float const *__addr, int __scale)
9275 {
9276   return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9277                                                 __addr,
9278                                                 (__v8di) __index,
9279                                                 __mask, __scale);
9280 }
9281
9282 extern __inline __m512d
9283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9284 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9285 {
9286   __m512d v1_old = _mm512_undefined_pd ();
9287   __mmask8 mask = 0xFF;
9288
9289   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9290                                                 __addr,
9291                                                 (__v8di) __index, mask,
9292                                                 __scale);
9293 }
9294
9295 extern __inline __m512d
9296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9297 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9298                           __m512i __index, double const *__addr, int __scale)
9299 {
9300   return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9301                                                 __addr,
9302                                                 (__v8di) __index,
9303                                                 __mask, __scale);
9304 }
9305
9306 extern __inline __m512i
9307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9308 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9309 {
9310   __m512i v1_old = _mm512_undefined_epi32 ();
9311   __mmask16 mask = 0xFFFF;
9312
9313   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9314                                                  __addr,
9315                                                  (__v16si) __index,
9316                                                  mask, __scale);
9317 }
9318
9319 extern __inline __m512i
9320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9321 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9322                              __m512i __index, int const *__addr, int __scale)
9323 {
9324   return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9325                                                  __addr,
9326                                                  (__v16si) __index,
9327                                                  __mask, __scale);
9328 }
9329
9330 extern __inline __m512i
9331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9332 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9333 {
9334   __m512i v1_old = _mm512_undefined_epi32 ();
9335   __mmask8 mask = 0xFF;
9336
9337   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9338                                                 __addr,
9339                                                 (__v8si) __index, mask,
9340                                                 __scale);
9341 }
9342
9343 extern __inline __m512i
9344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9345 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9346                              __m256i __index, long long const *__addr,
9347                              int __scale)
9348 {
9349   return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9350                                                 __addr,
9351                                                 (__v8si) __index,
9352                                                 __mask, __scale);
9353 }
9354
9355 extern __inline __m256i
9356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9357 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9358 {
9359   __m256i v1_old = _mm256_undefined_si256 ();
9360   __mmask8 mask = 0xFF;
9361
9362   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9363                                                  __addr,
9364                                                  (__v8di) __index,
9365                                                  mask, __scale);
9366 }
9367
9368 extern __inline __m256i
9369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9370 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9371                              __m512i __index, int const *__addr, int __scale)
9372 {
9373   return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9374                                                  __addr,
9375                                                  (__v8di) __index,
9376                                                  __mask, __scale);
9377 }
9378
9379 extern __inline __m512i
9380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9381 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9382 {
9383   __m512i v1_old = _mm512_undefined_epi32 ();
9384   __mmask8 mask = 0xFF;
9385
9386   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9387                                                 __addr,
9388                                                 (__v8di) __index, mask,
9389                                                 __scale);
9390 }
9391
9392 extern __inline __m512i
9393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9394 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9395                              __m512i __index, long long const *__addr,
9396                              int __scale)
9397 {
9398   return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9399                                                 __addr,
9400                                                 (__v8di) __index,
9401                                                 __mask, __scale);
9402 }
9403
9404 extern __inline void
9405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9406 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9407 {
9408   __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9409                                  (__v16si) __index, (__v16sf) __v1, __scale);
9410 }
9411
9412 extern __inline void
9413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9414 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9415                            __m512i __index, __m512 __v1, int __scale)
9416 {
9417   __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9418                                  (__v16sf) __v1, __scale);
9419 }
9420
9421 extern __inline void
9422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9423 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9424                       int __scale)
9425 {
9426   __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9427                                 (__v8si) __index, (__v8df) __v1, __scale);
9428 }
9429
9430 extern __inline void
9431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9432 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9433                            __m256i __index, __m512d __v1, int __scale)
9434 {
9435   __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9436                                 (__v8df) __v1, __scale);
9437 }
9438
9439 extern __inline void
9440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9441 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9442 {
9443   __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9444                                  (__v8di) __index, (__v8sf) __v1, __scale);
9445 }
9446
9447 extern __inline void
9448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9449 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9450                            __m512i __index, __m256 __v1, int __scale)
9451 {
9452   __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9453                                  (__v8sf) __v1, __scale);
9454 }
9455
9456 extern __inline void
9457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9458 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9459                       int __scale)
9460 {
9461   __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9462                                 (__v8di) __index, (__v8df) __v1, __scale);
9463 }
9464
9465 extern __inline void
9466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9467 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9468                            __m512i __index, __m512d __v1, int __scale)
9469 {
9470   __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9471                                 (__v8df) __v1, __scale);
9472 }
9473
9474 extern __inline void
9475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9476 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9477                          __m512i __v1, int __scale)
9478 {
9479   __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9480                                  (__v16si) __index, (__v16si) __v1, __scale);
9481 }
9482
9483 extern __inline void
9484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9485 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9486                               __m512i __index, __m512i __v1, int __scale)
9487 {
9488   __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9489                                  (__v16si) __v1, __scale);
9490 }
9491
9492 extern __inline void
9493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9494 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9495                          __m512i __v1, int __scale)
9496 {
9497   __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9498                                 (__v8si) __index, (__v8di) __v1, __scale);
9499 }
9500
9501 extern __inline void
9502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9503 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9504                               __m256i __index, __m512i __v1, int __scale)
9505 {
9506   __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9507                                 (__v8di) __v1, __scale);
9508 }
9509
9510 extern __inline void
9511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9513                          __m256i __v1, int __scale)
9514 {
9515   __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9516                                  (__v8di) __index, (__v8si) __v1, __scale);
9517 }
9518
9519 extern __inline void
9520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9521 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9522                               __m512i __index, __m256i __v1, int __scale)
9523 {
9524   __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9525                                  (__v8si) __v1, __scale);
9526 }
9527
9528 extern __inline void
9529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9530 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9531                          __m512i __v1, int __scale)
9532 {
9533   __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9534                                 (__v8di) __index, (__v8di) __v1, __scale);
9535 }
9536
9537 extern __inline void
9538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9539 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9540                               __m512i __index, __m512i __v1, int __scale)
9541 {
9542   __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9543                                 (__v8di) __v1, __scale);
9544 }
9545 #else
9546 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE)                         \
9547   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9548                                          (float const *)ADDR,           \
9549                                          (__v16si)(__m512i)INDEX,       \
9550                                          (__mmask16)0xFFFF, (int)SCALE)
9551
9552 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9553   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,        \
9554                                          (float const *)ADDR,           \
9555                                          (__v16si)(__m512i)INDEX,       \
9556                                          (__mmask16)MASK, (int)SCALE)
9557
9558 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE)                         \
9559   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9560                                          (double const *)ADDR,          \
9561                                          (__v8si)(__m256i)INDEX,        \
9562                                          (__mmask8)0xFF, (int)SCALE)
9563
9564 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9565   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,        \
9566                                          (double const *)ADDR,          \
9567                                          (__v8si)(__m256i)INDEX,        \
9568                                          (__mmask8)MASK, (int)SCALE)
9569
9570 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE)                         \
9571   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9572                                          (float const *)ADDR,           \
9573                                          (__v8di)(__m512i)INDEX,        \
9574                                          (__mmask8)0xFF, (int)SCALE)
9575
9576 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9577   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,         \
9578                                          (float const *)ADDR,           \
9579                                          (__v8di)(__m512i)INDEX,        \
9580                                          (__mmask8)MASK, (int)SCALE)
9581
9582 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE)                         \
9583   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9584                                          (double const *)ADDR,          \
9585                                          (__v8di)(__m512i)INDEX,        \
9586                                          (__mmask8)0xFF, (int)SCALE)
9587
9588 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)       \
9589   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,        \
9590                                          (double const *)ADDR,          \
9591                                          (__v8di)(__m512i)INDEX,        \
9592                                          (__mmask8)MASK, (int)SCALE)
9593
9594 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)                      \
9595   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),   \
9596                                           (int const *)ADDR,            \
9597                                           (__v16si)(__m512i)INDEX,      \
9598                                           (__mmask16)0xFFFF, (int)SCALE)
9599
9600 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9601   (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,      \
9602                                           (int const *)ADDR,            \
9603                                           (__v16si)(__m512i)INDEX,      \
9604                                           (__mmask16)MASK, (int)SCALE)
9605
9606 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)                      \
9607   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),     \
9608                                          (long long const *)ADDR,       \
9609                                          (__v8si)(__m256i)INDEX,        \
9610                                          (__mmask8)0xFF, (int)SCALE)
9611
9612 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9613   (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,        \
9614                                          (long long const *)ADDR,       \
9615                                          (__v8si)(__m256i)INDEX,        \
9616                                          (__mmask8)MASK, (int)SCALE)
9617
9618 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)                        \
9619   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9620                                           (int const *)ADDR,              \
9621                                           (__v8di)(__m512i)INDEX,         \
9622                                           (__mmask8)0xFF, (int)SCALE)
9623
9624 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9625   (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,       \
9626                                           (int const *)ADDR,            \
9627                                           (__v8di)(__m512i)INDEX,       \
9628                                           (__mmask8)MASK, (int)SCALE)
9629
9630 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)                      \
9631   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),     \
9632                                          (long long const *)ADDR,       \
9633                                          (__v8di)(__m512i)INDEX,        \
9634                                          (__mmask8)0xFF, (int)SCALE)
9635
9636 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)    \
9637   (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,        \
9638                                          (long long const *)ADDR,       \
9639                                          (__v8di)(__m512i)INDEX,        \
9640                                          (__mmask8)MASK, (int)SCALE)
9641
9642 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)                    \
9643   __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF,      \
9644                                  (__v16si)(__m512i)INDEX,               \
9645                                  (__v16sf)(__m512)V1, (int)SCALE)
9646
9647 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
9648   __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK,                \
9649                                  (__v16si)(__m512i)INDEX,               \
9650                                  (__v16sf)(__m512)V1, (int)SCALE)
9651
9652 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)                    \
9653   __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF,         \
9654                                 (__v8si)(__m256i)INDEX,                 \
9655                                 (__v8df)(__m512d)V1, (int)SCALE)
9656
9657 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
9658   __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK,         \
9659                                 (__v8si)(__m256i)INDEX,                 \
9660                                 (__v8df)(__m512d)V1, (int)SCALE)
9661
9662 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)                    \
9663   __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF,         \
9664                                  (__v8di)(__m512i)INDEX,                \
9665                                  (__v8sf)(__m256)V1, (int)SCALE)
9666
9667 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)         \
9668   __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK,                \
9669                                  (__v8di)(__m512i)INDEX,                \
9670                                  (__v8sf)(__m256)V1, (int)SCALE)
9671
9672 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)                    \
9673   __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF,         \
9674                                 (__v8di)(__m512i)INDEX,                 \
9675                                 (__v8df)(__m512d)V1, (int)SCALE)
9676
9677 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)         \
9678   __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK,         \
9679                                 (__v8di)(__m512i)INDEX,                 \
9680                                 (__v8df)(__m512d)V1, (int)SCALE)
9681
9682 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
9683   __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF,        \
9684                                  (__v16si)(__m512i)INDEX,               \
9685                                  (__v16si)(__m512i)V1, (int)SCALE)
9686
9687 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
9688   __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK,          \
9689                                  (__v16si)(__m512i)INDEX,               \
9690                                  (__v16si)(__m512i)V1, (int)SCALE)
9691
9692 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
9693   __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF,      \
9694                                 (__v8si)(__m256i)INDEX,                 \
9695                                 (__v8di)(__m512i)V1, (int)SCALE)
9696
9697 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
9698   __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK,      \
9699                                 (__v8si)(__m256i)INDEX,                 \
9700                                 (__v8di)(__m512i)V1, (int)SCALE)
9701
9702 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                 \
9703   __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF,           \
9704                                  (__v8di)(__m512i)INDEX,                \
9705                                  (__v8si)(__m256i)V1, (int)SCALE)
9706
9707 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)      \
9708   __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK,           \
9709                                  (__v8di)(__m512i)INDEX,                \
9710                                  (__v8si)(__m256i)V1, (int)SCALE)
9711
9712 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                 \
9713   __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF,      \
9714                                 (__v8di)(__m512i)INDEX,                 \
9715                                 (__v8di)(__m512i)V1, (int)SCALE)
9716
9717 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)      \
9718   __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK,      \
9719                                 (__v8di)(__m512i)INDEX,                 \
9720                                 (__v8di)(__m512i)V1, (int)SCALE)
9721 #endif
9722
9723 extern __inline __m512d
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9726 {
9727   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9728                                                       (__v8df) __W,
9729                                                       (__mmask8) __U);
9730 }
9731
9732 extern __inline __m512d
9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9735 {
9736   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9737                                                       (__v8df)
9738                                                       _mm512_setzero_pd (),
9739                                                       (__mmask8) __U);
9740 }
9741
9742 extern __inline void
9743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9744 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9745 {
9746   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9747                                           (__mmask8) __U);
9748 }
9749
9750 extern __inline __m512
9751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9752 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9753 {
9754   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9755                                                      (__v16sf) __W,
9756                                                      (__mmask16) __U);
9757 }
9758
9759 extern __inline __m512
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9762 {
9763   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9764                                                      (__v16sf)
9765                                                      _mm512_setzero_ps (),
9766                                                      (__mmask16) __U);
9767 }
9768
9769 extern __inline void
9770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9771 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9772 {
9773   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9774                                           (__mmask16) __U);
9775 }
9776
9777 extern __inline __m512i
9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9780 {
9781   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9782                                                       (__v8di) __W,
9783                                                       (__mmask8) __U);
9784 }
9785
9786 extern __inline __m512i
9787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9788 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9789 {
9790   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9791                                                       (__v8di)
9792                                                       _mm512_setzero_si512 (),
9793                                                       (__mmask8) __U);
9794 }
9795
9796 extern __inline void
9797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9798 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9799 {
9800   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9801                                           (__mmask8) __U);
9802 }
9803
9804 extern __inline __m512i
9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9806 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9807 {
9808   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9809                                                       (__v16si) __W,
9810                                                       (__mmask16) __U);
9811 }
9812
9813 extern __inline __m512i
9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9816 {
9817   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9818                                                       (__v16si)
9819                                                       _mm512_setzero_si512 (),
9820                                                       (__mmask16) __U);
9821 }
9822
9823 extern __inline void
9824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9825 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9826 {
9827   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9828                                           (__mmask16) __U);
9829 }
9830
9831 extern __inline __m512d
9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9834 {
9835   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9836                                                     (__v8df) __W,
9837                                                     (__mmask8) __U);
9838 }
9839
9840 extern __inline __m512d
9841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9842 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9843 {
9844   return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9845                                                      (__v8df)
9846                                                      _mm512_setzero_pd (),
9847                                                      (__mmask8) __U);
9848 }
9849
9850 extern __inline __m512d
9851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9852 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9853 {
9854   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9855                                                         (__v8df) __W,
9856                                                         (__mmask8) __U);
9857 }
9858
9859 extern __inline __m512d
9860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9861 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9862 {
9863   return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9864                                                          (__v8df)
9865                                                          _mm512_setzero_pd (),
9866                                                          (__mmask8) __U);
9867 }
9868
9869 extern __inline __m512
9870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9871 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9872 {
9873   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9874                                                    (__v16sf) __W,
9875                                                    (__mmask16) __U);
9876 }
9877
9878 extern __inline __m512
9879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9880 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9881 {
9882   return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9883                                                     (__v16sf)
9884                                                     _mm512_setzero_ps (),
9885                                                     (__mmask16) __U);
9886 }
9887
9888 extern __inline __m512
9889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9890 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9891 {
9892   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9893                                                        (__v16sf) __W,
9894                                                        (__mmask16) __U);
9895 }
9896
9897 extern __inline __m512
9898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9899 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9900 {
9901   return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9902                                                         (__v16sf)
9903                                                         _mm512_setzero_ps (),
9904                                                         (__mmask16) __U);
9905 }
9906
9907 extern __inline __m512i
9908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9909 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9910 {
9911   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9912                                                     (__v8di) __W,
9913                                                     (__mmask8) __U);
9914 }
9915
9916 extern __inline __m512i
9917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9919 {
9920   return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9921                                                      (__v8di)
9922                                                      _mm512_setzero_si512 (),
9923                                                      (__mmask8) __U);
9924 }
9925
9926 extern __inline __m512i
9927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9928 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9929 {
9930   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9931                                                         (__v8di) __W,
9932                                                         (__mmask8) __U);
9933 }
9934
9935 extern __inline __m512i
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9938 {
9939   return (__m512i)
9940          __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9941                                                (__v8di)
9942                                                _mm512_setzero_si512 (),
9943                                                (__mmask8) __U);
9944 }
9945
9946 extern __inline __m512i
9947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9948 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9949 {
9950   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9951                                                     (__v16si) __W,
9952                                                     (__mmask16) __U);
9953 }
9954
9955 extern __inline __m512i
9956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9957 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9958 {
9959   return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9960                                                      (__v16si)
9961                                                      _mm512_setzero_si512 (),
9962                                                      (__mmask16) __U);
9963 }
9964
9965 extern __inline __m512i
9966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9967 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9968 {
9969   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9970                                                         (__v16si) __W,
9971                                                         (__mmask16) __U);
9972 }
9973
9974 extern __inline __m512i
9975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9976 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9977 {
9978   return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9979                                                          (__v16si)
9980                                                          _mm512_setzero_si512
9981                                                          (), (__mmask16) __U);
9982 }
9983
9984 /* Mask arithmetic operations */
9985 extern __inline __mmask16
9986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9987 _mm512_kand (__mmask16 __A, __mmask16 __B)
9988 {
9989   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9990 }
9991
9992 extern __inline __mmask16
9993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9994 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9995 {
9996   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9997 }
9998
9999 extern __inline __mmask16
10000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10001 _mm512_kor (__mmask16 __A, __mmask16 __B)
10002 {
10003   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10004 }
10005
10006 extern __inline int
10007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10008 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10009 {
10010   return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10011                                                 (__mmask16) __B);
10012 }
10013
10014 extern __inline int
10015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10016 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10017 {
10018   return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10019                                                 (__mmask16) __B);
10020 }
10021
10022 extern __inline __mmask16
10023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10024 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10025 {
10026   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10027 }
10028
10029 extern __inline __mmask16
10030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10031 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10032 {
10033   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10034 }
10035
10036 extern __inline __mmask16
10037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10038 _mm512_knot (__mmask16 __A)
10039 {
10040   return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10041 }
10042
10043 extern __inline __mmask16
10044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10045 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10046 {
10047   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10048 }
10049
10050 #ifdef __OPTIMIZE__
10051 extern __inline __m512i
10052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10054                           const int __imm)
10055 {
10056   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10057                                                     (__v4si) __D,
10058                                                     __imm,
10059                                                     (__v16si)
10060                                                     _mm512_setzero_si512 (),
10061                                                     __B);
10062 }
10063
10064 extern __inline __m512
10065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10066 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10067                           const int __imm)
10068 {
10069   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10070                                                    (__v4sf) __D,
10071                                                    __imm,
10072                                                    (__v16sf)
10073                                                    _mm512_setzero_ps (), __B);
10074 }
10075
10076 extern __inline __m512i
10077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10078 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10079                          __m128i __D, const int __imm)
10080 {
10081   return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10082                                                     (__v4si) __D,
10083                                                     __imm,
10084                                                     (__v16si) __A,
10085                                                     __B);
10086 }
10087
10088 extern __inline __m512
10089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10090 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10091                          __m128 __D, const int __imm)
10092 {
10093   return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10094                                                    (__v4sf) __D,
10095                                                    __imm,
10096                                                    (__v16sf) __A, __B);
10097 }
10098 #else
10099 #define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
10100   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10101     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
10102     (__mmask8)(A)))
10103
10104 #define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
10105   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10106     (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
10107     (__mmask8)(A)))
10108
10109 #define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
10110   ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
10111     (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
10112                                              (__mmask8)(B)))
10113
10114 #define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
10115   ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
10116     (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
10117                                               (__mmask8)(B)))
10118 #endif
10119
10120 extern __inline __m512i
10121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10122 _mm512_max_epi64 (__m512i __A, __m512i __B)
10123 {
10124   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10125                                                   (__v8di) __B,
10126                                                   (__v8di)
10127                                                   _mm512_undefined_epi32 (),
10128                                                   (__mmask8) -1);
10129 }
10130
10131 extern __inline __m512i
10132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10133 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10134 {
10135   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10136                                                   (__v8di) __B,
10137                                                   (__v8di)
10138                                                   _mm512_setzero_si512 (),
10139                                                   __M);
10140 }
10141
10142 extern __inline __m512i
10143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10144 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10145 {
10146   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10147                                                   (__v8di) __B,
10148                                                   (__v8di) __W, __M);
10149 }
10150
10151 extern __inline __m512i
10152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10153 _mm512_min_epi64 (__m512i __A, __m512i __B)
10154 {
10155   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10156                                                   (__v8di) __B,
10157                                                   (__v8di)
10158                                                   _mm512_undefined_epi32 (),
10159                                                   (__mmask8) -1);
10160 }
10161
10162 extern __inline __m512i
10163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10164 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10165 {
10166   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10167                                                   (__v8di) __B,
10168                                                   (__v8di) __W, __M);
10169 }
10170
10171 extern __inline __m512i
10172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10173 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10174 {
10175   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10176                                                   (__v8di) __B,
10177                                                   (__v8di)
10178                                                   _mm512_setzero_si512 (),
10179                                                   __M);
10180 }
10181
10182 extern __inline __m512i
10183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10184 _mm512_max_epu64 (__m512i __A, __m512i __B)
10185 {
10186   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10187                                                   (__v8di) __B,
10188                                                   (__v8di)
10189                                                   _mm512_undefined_epi32 (),
10190                                                   (__mmask8) -1);
10191 }
10192
10193 extern __inline __m512i
10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10196 {
10197   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10198                                                   (__v8di) __B,
10199                                                   (__v8di)
10200                                                   _mm512_setzero_si512 (),
10201                                                   __M);
10202 }
10203
10204 extern __inline __m512i
10205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10206 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10207 {
10208   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10209                                                   (__v8di) __B,
10210                                                   (__v8di) __W, __M);
10211 }
10212
10213 extern __inline __m512i
10214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10215 _mm512_min_epu64 (__m512i __A, __m512i __B)
10216 {
10217   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10218                                                   (__v8di) __B,
10219                                                   (__v8di)
10220                                                   _mm512_undefined_epi32 (),
10221                                                   (__mmask8) -1);
10222 }
10223
10224 extern __inline __m512i
10225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10226 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10227 {
10228   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10229                                                   (__v8di) __B,
10230                                                   (__v8di) __W, __M);
10231 }
10232
10233 extern __inline __m512i
10234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10235 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10236 {
10237   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10238                                                   (__v8di) __B,
10239                                                   (__v8di)
10240                                                   _mm512_setzero_si512 (),
10241                                                   __M);
10242 }
10243
10244 extern __inline __m512i
10245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10246 _mm512_max_epi32 (__m512i __A, __m512i __B)
10247 {
10248   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10249                                                   (__v16si) __B,
10250                                                   (__v16si)
10251                                                   _mm512_undefined_epi32 (),
10252                                                   (__mmask16) -1);
10253 }
10254
10255 extern __inline __m512i
10256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10257 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10258 {
10259   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10260                                                   (__v16si) __B,
10261                                                   (__v16si)
10262                                                   _mm512_setzero_si512 (),
10263                                                   __M);
10264 }
10265
10266 extern __inline __m512i
10267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10268 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10269 {
10270   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10271                                                   (__v16si) __B,
10272                                                   (__v16si) __W, __M);
10273 }
10274
10275 extern __inline __m512i
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm512_min_epi32 (__m512i __A, __m512i __B)
10278 {
10279   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10280                                                   (__v16si) __B,
10281                                                   (__v16si)
10282                                                   _mm512_undefined_epi32 (),
10283                                                   (__mmask16) -1);
10284 }
10285
10286 extern __inline __m512i
10287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10288 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10289 {
10290   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10291                                                   (__v16si) __B,
10292                                                   (__v16si)
10293                                                   _mm512_setzero_si512 (),
10294                                                   __M);
10295 }
10296
10297 extern __inline __m512i
10298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10299 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10300 {
10301   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10302                                                   (__v16si) __B,
10303                                                   (__v16si) __W, __M);
10304 }
10305
10306 extern __inline __m512i
10307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10308 _mm512_max_epu32 (__m512i __A, __m512i __B)
10309 {
10310   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10311                                                   (__v16si) __B,
10312                                                   (__v16si)
10313                                                   _mm512_undefined_epi32 (),
10314                                                   (__mmask16) -1);
10315 }
10316
10317 extern __inline __m512i
10318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10319 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10320 {
10321   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10322                                                   (__v16si) __B,
10323                                                   (__v16si)
10324                                                   _mm512_setzero_si512 (),
10325                                                   __M);
10326 }
10327
10328 extern __inline __m512i
10329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10330 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10331 {
10332   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10333                                                   (__v16si) __B,
10334                                                   (__v16si) __W, __M);
10335 }
10336
10337 extern __inline __m512i
10338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10339 _mm512_min_epu32 (__m512i __A, __m512i __B)
10340 {
10341   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10342                                                   (__v16si) __B,
10343                                                   (__v16si)
10344                                                   _mm512_undefined_epi32 (),
10345                                                   (__mmask16) -1);
10346 }
10347
10348 extern __inline __m512i
10349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10350 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10351 {
10352   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10353                                                   (__v16si) __B,
10354                                                   (__v16si)
10355                                                   _mm512_setzero_si512 (),
10356                                                   __M);
10357 }
10358
10359 extern __inline __m512i
10360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10362 {
10363   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10364                                                   (__v16si) __B,
10365                                                   (__v16si) __W, __M);
10366 }
10367
10368 extern __inline __m512
10369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10370 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10371 {
10372   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10373                                                    (__v16sf) __B,
10374                                                    (__v16sf)
10375                                                    _mm512_undefined_ps (),
10376                                                    (__mmask16) -1);
10377 }
10378
10379 extern __inline __m512
10380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10381 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10382 {
10383   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10384                                                    (__v16sf) __B,
10385                                                    (__v16sf) __W,
10386                                                    (__mmask16) __U);
10387 }
10388
10389 extern __inline __m512
10390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10391 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10392 {
10393   return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10394                                                    (__v16sf) __B,
10395                                                    (__v16sf)
10396                                                    _mm512_setzero_ps (),
10397                                                    (__mmask16) __U);
10398 }
10399
10400 #ifdef __OPTIMIZE__
10401 extern __inline __m128d
10402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10403 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10404 {
10405   return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10406                                                (__v2df) __B,
10407                                                __R);
10408 }
10409
10410 extern __inline __m128
10411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10412 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10413 {
10414   return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10415                                               (__v4sf) __B,
10416                                               __R);
10417 }
10418
10419 extern __inline __m128d
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10422 {
10423   return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10424                                                (__v2df) __B,
10425                                                __R);
10426 }
10427
10428 extern __inline __m128
10429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10430 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10431 {
10432   return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10433                                               (__v4sf) __B,
10434                                               __R);
10435 }
10436
10437 #else
10438 #define _mm_max_round_sd(A, B, C)            \
10439     (__m128d)__builtin_ia32_addsd_round(A, B, C)
10440
10441 #define _mm_max_round_ss(A, B, C)            \
10442     (__m128)__builtin_ia32_addss_round(A, B, C)
10443
10444 #define _mm_min_round_sd(A, B, C)            \
10445     (__m128d)__builtin_ia32_subsd_round(A, B, C)
10446
10447 #define _mm_min_round_ss(A, B, C)            \
10448     (__m128)__builtin_ia32_subss_round(A, B, C)
10449 #endif
10450
10451 extern __inline __m512d
10452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10453 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10454 {
10455   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10456                                                      (__v8df) __W,
10457                                                      (__mmask8) __U);
10458 }
10459
10460 extern __inline __m512
10461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10462 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10463 {
10464   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10465                                                     (__v16sf) __W,
10466                                                     (__mmask16) __U);
10467 }
10468
10469 extern __inline __m512i
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10472 {
10473   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10474                                                     (__v8di) __W,
10475                                                     (__mmask8) __U);
10476 }
10477
10478 extern __inline __m512i
10479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10480 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10481 {
10482   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10483                                                     (__v16si) __W,
10484                                                     (__mmask16) __U);
10485 }
10486
10487 #ifdef __OPTIMIZE__
10488 extern __inline __m128d
10489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10491 {
10492   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10493                                                    (__v2df) __A,
10494                                                    (__v2df) __B,
10495                                                    __R);
10496 }
10497
10498 extern __inline __m128
10499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10501 {
10502   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10503                                                   (__v4sf) __A,
10504                                                   (__v4sf) __B,
10505                                                   __R);
10506 }
10507
10508 extern __inline __m128d
10509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10510 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10511 {
10512   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10513                                                    (__v2df) __A,
10514                                                    -(__v2df) __B,
10515                                                    __R);
10516 }
10517
10518 extern __inline __m128
10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10520 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10521 {
10522   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10523                                                   (__v4sf) __A,
10524                                                   -(__v4sf) __B,
10525                                                   __R);
10526 }
10527
10528 extern __inline __m128d
10529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10530 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10531 {
10532   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10533                                                    -(__v2df) __A,
10534                                                    (__v2df) __B,
10535                                                    __R);
10536 }
10537
10538 extern __inline __m128
10539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10541 {
10542   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10543                                                   -(__v4sf) __A,
10544                                                   (__v4sf) __B,
10545                                                   __R);
10546 }
10547
10548 extern __inline __m128d
10549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10551 {
10552   return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10553                                                    -(__v2df) __A,
10554                                                    -(__v2df) __B,
10555                                                    __R);
10556 }
10557
10558 extern __inline __m128
10559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10560 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10561 {
10562   return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10563                                                   -(__v4sf) __A,
10564                                                   -(__v4sf) __B,
10565                                                   __R);
10566 }
10567 #else
10568 #define _mm_fmadd_round_sd(A, B, C, R)            \
10569     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10570
10571 #define _mm_fmadd_round_ss(A, B, C, R)            \
10572     (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10573
10574 #define _mm_fmsub_round_sd(A, B, C, R)            \
10575     (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10576
10577 #define _mm_fmsub_round_ss(A, B, C, R)            \
10578     (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10579
10580 #define _mm_fnmadd_round_sd(A, B, C, R)            \
10581     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10582
10583 #define _mm_fnmadd_round_ss(A, B, C, R)            \
10584    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10585
10586 #define _mm_fnmsub_round_sd(A, B, C, R)            \
10587     (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10588
10589 #define _mm_fnmsub_round_ss(A, B, C, R)            \
10590     (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10591 #endif
10592
10593 #ifdef __OPTIMIZE__
10594 extern __inline int
10595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10596 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10597 {
10598   return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10599 }
10600
10601 extern __inline int
10602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10603 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10604 {
10605   return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10606 }
10607 #else
10608 #define _mm_comi_round_ss(A, B, C, D)\
10609 __builtin_ia32_vcomiss(A, B, C, D)
10610 #define _mm_comi_round_sd(A, B, C, D)\
10611 __builtin_ia32_vcomisd(A, B, C, D)
10612 #endif
10613
10614 extern __inline __m512d
10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10616 _mm512_sqrt_pd (__m512d __A)
10617 {
10618   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10619                                                   (__v8df)
10620                                                   _mm512_undefined_pd (),
10621                                                   (__mmask8) -1,
10622                                                   _MM_FROUND_CUR_DIRECTION);
10623 }
10624
10625 extern __inline __m512d
10626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10627 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10628 {
10629   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10630                                                   (__v8df) __W,
10631                                                   (__mmask8) __U,
10632                                                   _MM_FROUND_CUR_DIRECTION);
10633 }
10634
10635 extern __inline __m512d
10636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10637 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10638 {
10639   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10640                                                   (__v8df)
10641                                                   _mm512_setzero_pd (),
10642                                                   (__mmask8) __U,
10643                                                   _MM_FROUND_CUR_DIRECTION);
10644 }
10645
10646 extern __inline __m512
10647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10648 _mm512_sqrt_ps (__m512 __A)
10649 {
10650   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10651                                                  (__v16sf)
10652                                                  _mm512_undefined_ps (),
10653                                                  (__mmask16) -1,
10654                                                  _MM_FROUND_CUR_DIRECTION);
10655 }
10656
10657 extern __inline __m512
10658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10659 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10660 {
10661   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10662                                                  (__v16sf) __W,
10663                                                  (__mmask16) __U,
10664                                                  _MM_FROUND_CUR_DIRECTION);
10665 }
10666
10667 extern __inline __m512
10668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10669 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10670 {
10671   return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10672                                                  (__v16sf)
10673                                                  _mm512_setzero_ps (),
10674                                                  (__mmask16) __U,
10675                                                  _MM_FROUND_CUR_DIRECTION);
10676 }
10677
10678 extern __inline __m512d
10679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10680 _mm512_add_pd (__m512d __A, __m512d __B)
10681 {
10682   return (__m512d) ((__v8df)__A + (__v8df)__B);
10683 }
10684
10685 extern __inline __m512d
10686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10687 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10688 {
10689   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10690                                                  (__v8df) __B,
10691                                                  (__v8df) __W,
10692                                                  (__mmask8) __U,
10693                                                  _MM_FROUND_CUR_DIRECTION);
10694 }
10695
10696 extern __inline __m512d
10697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10698 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10699 {
10700   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10701                                                  (__v8df) __B,
10702                                                  (__v8df)
10703                                                  _mm512_setzero_pd (),
10704                                                  (__mmask8) __U,
10705                                                  _MM_FROUND_CUR_DIRECTION);
10706 }
10707
10708 extern __inline __m512
10709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10710 _mm512_add_ps (__m512 __A, __m512 __B)
10711 {
10712   return (__m512) ((__v16sf)__A + (__v16sf)__B);
10713 }
10714
10715 extern __inline __m512
10716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10717 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10718 {
10719   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10720                                                 (__v16sf) __B,
10721                                                 (__v16sf) __W,
10722                                                 (__mmask16) __U,
10723                                                 _MM_FROUND_CUR_DIRECTION);
10724 }
10725
10726 extern __inline __m512
10727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10728 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10729 {
10730   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10731                                                 (__v16sf) __B,
10732                                                 (__v16sf)
10733                                                 _mm512_setzero_ps (),
10734                                                 (__mmask16) __U,
10735                                                 _MM_FROUND_CUR_DIRECTION);
10736 }
10737
10738 extern __inline __m512d
10739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10740 _mm512_sub_pd (__m512d __A, __m512d __B)
10741 {
10742   return (__m512d) ((__v8df)__A - (__v8df)__B);
10743 }
10744
10745 extern __inline __m512d
10746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10747 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10748 {
10749   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10750                                                  (__v8df) __B,
10751                                                  (__v8df) __W,
10752                                                  (__mmask8) __U,
10753                                                  _MM_FROUND_CUR_DIRECTION);
10754 }
10755
10756 extern __inline __m512d
10757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10758 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10759 {
10760   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10761                                                  (__v8df) __B,
10762                                                  (__v8df)
10763                                                  _mm512_setzero_pd (),
10764                                                  (__mmask8) __U,
10765                                                  _MM_FROUND_CUR_DIRECTION);
10766 }
10767
10768 extern __inline __m512
10769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10770 _mm512_sub_ps (__m512 __A, __m512 __B)
10771 {
10772   return (__m512) ((__v16sf)__A - (__v16sf)__B);
10773 }
10774
10775 extern __inline __m512
10776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10778 {
10779   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10780                                                 (__v16sf) __B,
10781                                                 (__v16sf) __W,
10782                                                 (__mmask16) __U,
10783                                                 _MM_FROUND_CUR_DIRECTION);
10784 }
10785
10786 extern __inline __m512
10787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10788 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10789 {
10790   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10791                                                 (__v16sf) __B,
10792                                                 (__v16sf)
10793                                                 _mm512_setzero_ps (),
10794                                                 (__mmask16) __U,
10795                                                 _MM_FROUND_CUR_DIRECTION);
10796 }
10797
10798 extern __inline __m512d
10799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10800 _mm512_mul_pd (__m512d __A, __m512d __B)
10801 {
10802   return (__m512d) ((__v8df)__A * (__v8df)__B);
10803 }
10804
10805 extern __inline __m512d
10806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10807 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10808 {
10809   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10810                                                  (__v8df) __B,
10811                                                  (__v8df) __W,
10812                                                  (__mmask8) __U,
10813                                                  _MM_FROUND_CUR_DIRECTION);
10814 }
10815
10816 extern __inline __m512d
10817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10819 {
10820   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10821                                                  (__v8df) __B,
10822                                                  (__v8df)
10823                                                  _mm512_setzero_pd (),
10824                                                  (__mmask8) __U,
10825                                                  _MM_FROUND_CUR_DIRECTION);
10826 }
10827
10828 extern __inline __m512
10829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10830 _mm512_mul_ps (__m512 __A, __m512 __B)
10831 {
10832   return (__m512) ((__v16sf)__A * (__v16sf)__B);
10833 }
10834
10835 extern __inline __m512
10836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10837 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10838 {
10839   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10840                                                 (__v16sf) __B,
10841                                                 (__v16sf) __W,
10842                                                 (__mmask16) __U,
10843                                                 _MM_FROUND_CUR_DIRECTION);
10844 }
10845
10846 extern __inline __m512
10847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10848 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10849 {
10850   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10851                                                 (__v16sf) __B,
10852                                                 (__v16sf)
10853                                                 _mm512_setzero_ps (),
10854                                                 (__mmask16) __U,
10855                                                 _MM_FROUND_CUR_DIRECTION);
10856 }
10857
10858 extern __inline __m512d
10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10860 _mm512_div_pd (__m512d __M, __m512d __V)
10861 {
10862   return (__m512d) ((__v8df)__M / (__v8df)__V);
10863 }
10864
10865 extern __inline __m512d
10866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10867 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10868 {
10869   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10870                                                  (__v8df) __V,
10871                                                  (__v8df) __W,
10872                                                  (__mmask8) __U,
10873                                                  _MM_FROUND_CUR_DIRECTION);
10874 }
10875
10876 extern __inline __m512d
10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10878 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10879 {
10880   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10881                                                  (__v8df) __V,
10882                                                  (__v8df)
10883                                                  _mm512_setzero_pd (),
10884                                                  (__mmask8) __U,
10885                                                  _MM_FROUND_CUR_DIRECTION);
10886 }
10887
10888 extern __inline __m512
10889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10890 _mm512_div_ps (__m512 __A, __m512 __B)
10891 {
10892   return (__m512) ((__v16sf)__A / (__v16sf)__B);
10893 }
10894
10895 extern __inline __m512
10896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10897 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10898 {
10899   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10900                                                 (__v16sf) __B,
10901                                                 (__v16sf) __W,
10902                                                 (__mmask16) __U,
10903                                                 _MM_FROUND_CUR_DIRECTION);
10904 }
10905
10906 extern __inline __m512
10907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10909 {
10910   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10911                                                 (__v16sf) __B,
10912                                                 (__v16sf)
10913                                                 _mm512_setzero_ps (),
10914                                                 (__mmask16) __U,
10915                                                 _MM_FROUND_CUR_DIRECTION);
10916 }
10917
10918 extern __inline __m512d
10919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10920 _mm512_max_pd (__m512d __A, __m512d __B)
10921 {
10922   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10923                                                  (__v8df) __B,
10924                                                  (__v8df)
10925                                                  _mm512_undefined_pd (),
10926                                                  (__mmask8) -1,
10927                                                  _MM_FROUND_CUR_DIRECTION);
10928 }
10929
10930 extern __inline __m512d
10931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10932 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10933 {
10934   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10935                                                  (__v8df) __B,
10936                                                  (__v8df) __W,
10937                                                  (__mmask8) __U,
10938                                                  _MM_FROUND_CUR_DIRECTION);
10939 }
10940
10941 extern __inline __m512d
10942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10944 {
10945   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10946                                                  (__v8df) __B,
10947                                                  (__v8df)
10948                                                  _mm512_setzero_pd (),
10949                                                  (__mmask8) __U,
10950                                                  _MM_FROUND_CUR_DIRECTION);
10951 }
10952
10953 extern __inline __m512
10954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10955 _mm512_max_ps (__m512 __A, __m512 __B)
10956 {
10957   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10958                                                 (__v16sf) __B,
10959                                                 (__v16sf)
10960                                                 _mm512_undefined_ps (),
10961                                                 (__mmask16) -1,
10962                                                 _MM_FROUND_CUR_DIRECTION);
10963 }
10964
10965 extern __inline __m512
10966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10967 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10968 {
10969   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10970                                                 (__v16sf) __B,
10971                                                 (__v16sf) __W,
10972                                                 (__mmask16) __U,
10973                                                 _MM_FROUND_CUR_DIRECTION);
10974 }
10975
10976 extern __inline __m512
10977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10978 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10979 {
10980   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10981                                                 (__v16sf) __B,
10982                                                 (__v16sf)
10983                                                 _mm512_setzero_ps (),
10984                                                 (__mmask16) __U,
10985                                                 _MM_FROUND_CUR_DIRECTION);
10986 }
10987
10988 extern __inline __m512d
10989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10990 _mm512_min_pd (__m512d __A, __m512d __B)
10991 {
10992   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10993                                                  (__v8df) __B,
10994                                                  (__v8df)
10995                                                  _mm512_undefined_pd (),
10996                                                  (__mmask8) -1,
10997                                                  _MM_FROUND_CUR_DIRECTION);
10998 }
10999
11000 extern __inline __m512d
11001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11002 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11003 {
11004   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11005                                                  (__v8df) __B,
11006                                                  (__v8df) __W,
11007                                                  (__mmask8) __U,
11008                                                  _MM_FROUND_CUR_DIRECTION);
11009 }
11010
11011 extern __inline __m512d
11012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11014 {
11015   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11016                                                  (__v8df) __B,
11017                                                  (__v8df)
11018                                                  _mm512_setzero_pd (),
11019                                                  (__mmask8) __U,
11020                                                  _MM_FROUND_CUR_DIRECTION);
11021 }
11022
11023 extern __inline __m512
11024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11025 _mm512_min_ps (__m512 __A, __m512 __B)
11026 {
11027   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11028                                                 (__v16sf) __B,
11029                                                 (__v16sf)
11030                                                 _mm512_undefined_ps (),
11031                                                 (__mmask16) -1,
11032                                                 _MM_FROUND_CUR_DIRECTION);
11033 }
11034
11035 extern __inline __m512
11036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11037 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11038 {
11039   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11040                                                 (__v16sf) __B,
11041                                                 (__v16sf) __W,
11042                                                 (__mmask16) __U,
11043                                                 _MM_FROUND_CUR_DIRECTION);
11044 }
11045
11046 extern __inline __m512
11047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11048 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11049 {
11050   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11051                                                 (__v16sf) __B,
11052                                                 (__v16sf)
11053                                                 _mm512_setzero_ps (),
11054                                                 (__mmask16) __U,
11055                                                 _MM_FROUND_CUR_DIRECTION);
11056 }
11057
11058 extern __inline __m512d
11059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11060 _mm512_scalef_pd (__m512d __A, __m512d __B)
11061 {
11062   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11063                                                     (__v8df) __B,
11064                                                     (__v8df)
11065                                                     _mm512_undefined_pd (),
11066                                                     (__mmask8) -1,
11067                                                     _MM_FROUND_CUR_DIRECTION);
11068 }
11069
11070 extern __inline __m512d
11071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11072 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11073 {
11074   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11075                                                     (__v8df) __B,
11076                                                     (__v8df) __W,
11077                                                     (__mmask8) __U,
11078                                                     _MM_FROUND_CUR_DIRECTION);
11079 }
11080
11081 extern __inline __m512d
11082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11083 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11084 {
11085   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11086                                                     (__v8df) __B,
11087                                                     (__v8df)
11088                                                     _mm512_setzero_pd (),
11089                                                     (__mmask8) __U,
11090                                                     _MM_FROUND_CUR_DIRECTION);
11091 }
11092
11093 extern __inline __m512
11094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11095 _mm512_scalef_ps (__m512 __A, __m512 __B)
11096 {
11097   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11098                                                    (__v16sf) __B,
11099                                                    (__v16sf)
11100                                                    _mm512_undefined_ps (),
11101                                                    (__mmask16) -1,
11102                                                    _MM_FROUND_CUR_DIRECTION);
11103 }
11104
11105 extern __inline __m512
11106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11107 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11108 {
11109   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11110                                                    (__v16sf) __B,
11111                                                    (__v16sf) __W,
11112                                                    (__mmask16) __U,
11113                                                    _MM_FROUND_CUR_DIRECTION);
11114 }
11115
11116 extern __inline __m512
11117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11118 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11119 {
11120   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11121                                                    (__v16sf) __B,
11122                                                    (__v16sf)
11123                                                    _mm512_setzero_ps (),
11124                                                    (__mmask16) __U,
11125                                                    _MM_FROUND_CUR_DIRECTION);
11126 }
11127
11128 extern __inline __m128d
11129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11130 _mm_scalef_sd (__m128d __A, __m128d __B)
11131 {
11132   return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11133                                                   (__v2df) __B,
11134                                                   _MM_FROUND_CUR_DIRECTION);
11135 }
11136
11137 extern __inline __m128
11138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11139 _mm_scalef_ss (__m128 __A, __m128 __B)
11140 {
11141   return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11142                                                  (__v4sf) __B,
11143                                                  _MM_FROUND_CUR_DIRECTION);
11144 }
11145
11146 extern __inline __m512d
11147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11149 {
11150   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11151                                                     (__v8df) __B,
11152                                                     (__v8df) __C,
11153                                                     (__mmask8) -1,
11154                                                     _MM_FROUND_CUR_DIRECTION);
11155 }
11156
11157 extern __inline __m512d
11158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11160 {
11161   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11162                                                     (__v8df) __B,
11163                                                     (__v8df) __C,
11164                                                     (__mmask8) __U,
11165                                                     _MM_FROUND_CUR_DIRECTION);
11166 }
11167
11168 extern __inline __m512d
11169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11171 {
11172   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11173                                                      (__v8df) __B,
11174                                                      (__v8df) __C,
11175                                                      (__mmask8) __U,
11176                                                      _MM_FROUND_CUR_DIRECTION);
11177 }
11178
11179 extern __inline __m512d
11180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11181 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11182 {
11183   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11184                                                      (__v8df) __B,
11185                                                      (__v8df) __C,
11186                                                      (__mmask8) __U,
11187                                                      _MM_FROUND_CUR_DIRECTION);
11188 }
11189
11190 extern __inline __m512
11191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11192 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11193 {
11194   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11195                                                    (__v16sf) __B,
11196                                                    (__v16sf) __C,
11197                                                    (__mmask16) -1,
11198                                                    _MM_FROUND_CUR_DIRECTION);
11199 }
11200
11201 extern __inline __m512
11202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11203 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11204 {
11205   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11206                                                    (__v16sf) __B,
11207                                                    (__v16sf) __C,
11208                                                    (__mmask16) __U,
11209                                                    _MM_FROUND_CUR_DIRECTION);
11210 }
11211
11212 extern __inline __m512
11213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11214 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11215 {
11216   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11217                                                     (__v16sf) __B,
11218                                                     (__v16sf) __C,
11219                                                     (__mmask16) __U,
11220                                                     _MM_FROUND_CUR_DIRECTION);
11221 }
11222
11223 extern __inline __m512
11224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11225 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11226 {
11227   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11228                                                     (__v16sf) __B,
11229                                                     (__v16sf) __C,
11230                                                     (__mmask16) __U,
11231                                                     _MM_FROUND_CUR_DIRECTION);
11232 }
11233
11234 extern __inline __m512d
11235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11236 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11237 {
11238   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11239                                                     (__v8df) __B,
11240                                                     -(__v8df) __C,
11241                                                     (__mmask8) -1,
11242                                                     _MM_FROUND_CUR_DIRECTION);
11243 }
11244
11245 extern __inline __m512d
11246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11247 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11248 {
11249   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11250                                                     (__v8df) __B,
11251                                                     -(__v8df) __C,
11252                                                     (__mmask8) __U,
11253                                                     _MM_FROUND_CUR_DIRECTION);
11254 }
11255
11256 extern __inline __m512d
11257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11259 {
11260   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11261                                                      (__v8df) __B,
11262                                                      (__v8df) __C,
11263                                                      (__mmask8) __U,
11264                                                      _MM_FROUND_CUR_DIRECTION);
11265 }
11266
11267 extern __inline __m512d
11268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11270 {
11271   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11272                                                      (__v8df) __B,
11273                                                      -(__v8df) __C,
11274                                                      (__mmask8) __U,
11275                                                      _MM_FROUND_CUR_DIRECTION);
11276 }
11277
11278 extern __inline __m512
11279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11281 {
11282   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11283                                                    (__v16sf) __B,
11284                                                    -(__v16sf) __C,
11285                                                    (__mmask16) -1,
11286                                                    _MM_FROUND_CUR_DIRECTION);
11287 }
11288
11289 extern __inline __m512
11290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11292 {
11293   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11294                                                    (__v16sf) __B,
11295                                                    -(__v16sf) __C,
11296                                                    (__mmask16) __U,
11297                                                    _MM_FROUND_CUR_DIRECTION);
11298 }
11299
11300 extern __inline __m512
11301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11302 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11303 {
11304   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11305                                                     (__v16sf) __B,
11306                                                     (__v16sf) __C,
11307                                                     (__mmask16) __U,
11308                                                     _MM_FROUND_CUR_DIRECTION);
11309 }
11310
11311 extern __inline __m512
11312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11313 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11314 {
11315   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11316                                                     (__v16sf) __B,
11317                                                     -(__v16sf) __C,
11318                                                     (__mmask16) __U,
11319                                                     _MM_FROUND_CUR_DIRECTION);
11320 }
11321
11322 extern __inline __m512d
11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11325 {
11326   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11327                                                        (__v8df) __B,
11328                                                        (__v8df) __C,
11329                                                        (__mmask8) -1,
11330                                                        _MM_FROUND_CUR_DIRECTION);
11331 }
11332
11333 extern __inline __m512d
11334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11335 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11336 {
11337   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11338                                                        (__v8df) __B,
11339                                                        (__v8df) __C,
11340                                                        (__mmask8) __U,
11341                                                        _MM_FROUND_CUR_DIRECTION);
11342 }
11343
11344 extern __inline __m512d
11345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11346 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11347 {
11348   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11349                                                         (__v8df) __B,
11350                                                         (__v8df) __C,
11351                                                         (__mmask8) __U,
11352                                                         _MM_FROUND_CUR_DIRECTION);
11353 }
11354
11355 extern __inline __m512d
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11358 {
11359   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11360                                                         (__v8df) __B,
11361                                                         (__v8df) __C,
11362                                                         (__mmask8) __U,
11363                                                         _MM_FROUND_CUR_DIRECTION);
11364 }
11365
11366 extern __inline __m512
11367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11369 {
11370   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11371                                                       (__v16sf) __B,
11372                                                       (__v16sf) __C,
11373                                                       (__mmask16) -1,
11374                                                       _MM_FROUND_CUR_DIRECTION);
11375 }
11376
11377 extern __inline __m512
11378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11379 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11380 {
11381   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11382                                                       (__v16sf) __B,
11383                                                       (__v16sf) __C,
11384                                                       (__mmask16) __U,
11385                                                       _MM_FROUND_CUR_DIRECTION);
11386 }
11387
11388 extern __inline __m512
11389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11391 {
11392   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11393                                                        (__v16sf) __B,
11394                                                        (__v16sf) __C,
11395                                                        (__mmask16) __U,
11396                                                        _MM_FROUND_CUR_DIRECTION);
11397 }
11398
11399 extern __inline __m512
11400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11401 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11402 {
11403   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11404                                                        (__v16sf) __B,
11405                                                        (__v16sf) __C,
11406                                                        (__mmask16) __U,
11407                                                        _MM_FROUND_CUR_DIRECTION);
11408 }
11409
11410 extern __inline __m512d
11411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11412 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11413 {
11414   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11415                                                        (__v8df) __B,
11416                                                        -(__v8df) __C,
11417                                                        (__mmask8) -1,
11418                                                        _MM_FROUND_CUR_DIRECTION);
11419 }
11420
11421 extern __inline __m512d
11422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11423 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11424 {
11425   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11426                                                        (__v8df) __B,
11427                                                        -(__v8df) __C,
11428                                                        (__mmask8) __U,
11429                                                        _MM_FROUND_CUR_DIRECTION);
11430 }
11431
11432 extern __inline __m512d
11433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11434 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11435 {
11436   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11437                                                         (__v8df) __B,
11438                                                         (__v8df) __C,
11439                                                         (__mmask8) __U,
11440                                                         _MM_FROUND_CUR_DIRECTION);
11441 }
11442
11443 extern __inline __m512d
11444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11445 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11446 {
11447   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11448                                                         (__v8df) __B,
11449                                                         -(__v8df) __C,
11450                                                         (__mmask8) __U,
11451                                                         _MM_FROUND_CUR_DIRECTION);
11452 }
11453
11454 extern __inline __m512
11455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11456 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11457 {
11458   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11459                                                       (__v16sf) __B,
11460                                                       -(__v16sf) __C,
11461                                                       (__mmask16) -1,
11462                                                       _MM_FROUND_CUR_DIRECTION);
11463 }
11464
11465 extern __inline __m512
11466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11468 {
11469   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11470                                                       (__v16sf) __B,
11471                                                       -(__v16sf) __C,
11472                                                       (__mmask16) __U,
11473                                                       _MM_FROUND_CUR_DIRECTION);
11474 }
11475
11476 extern __inline __m512
11477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11478 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11479 {
11480   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11481                                                        (__v16sf) __B,
11482                                                        (__v16sf) __C,
11483                                                        (__mmask16) __U,
11484                                                        _MM_FROUND_CUR_DIRECTION);
11485 }
11486
11487 extern __inline __m512
11488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11489 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11490 {
11491   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11492                                                        (__v16sf) __B,
11493                                                        -(__v16sf) __C,
11494                                                        (__mmask16) __U,
11495                                                        _MM_FROUND_CUR_DIRECTION);
11496 }
11497
11498 extern __inline __m512d
11499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11500 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11501 {
11502   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11503                                                     (__v8df) __B,
11504                                                     (__v8df) __C,
11505                                                     (__mmask8) -1,
11506                                                     _MM_FROUND_CUR_DIRECTION);
11507 }
11508
11509 extern __inline __m512d
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11512 {
11513   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11514                                                      (__v8df) __B,
11515                                                      (__v8df) __C,
11516                                                      (__mmask8) __U,
11517                                                      _MM_FROUND_CUR_DIRECTION);
11518 }
11519
11520 extern __inline __m512d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11523 {
11524   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11525                                                      (__v8df) __B,
11526                                                      (__v8df) __C,
11527                                                      (__mmask8) __U,
11528                                                      _MM_FROUND_CUR_DIRECTION);
11529 }
11530
11531 extern __inline __m512d
11532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11533 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11534 {
11535   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11536                                                      (__v8df) __B,
11537                                                      (__v8df) __C,
11538                                                      (__mmask8) __U,
11539                                                      _MM_FROUND_CUR_DIRECTION);
11540 }
11541
11542 extern __inline __m512
11543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11544 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11545 {
11546   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11547                                                    (__v16sf) __B,
11548                                                    (__v16sf) __C,
11549                                                    (__mmask16) -1,
11550                                                    _MM_FROUND_CUR_DIRECTION);
11551 }
11552
11553 extern __inline __m512
11554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11555 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11556 {
11557   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11558                                                     (__v16sf) __B,
11559                                                     (__v16sf) __C,
11560                                                     (__mmask16) __U,
11561                                                     _MM_FROUND_CUR_DIRECTION);
11562 }
11563
11564 extern __inline __m512
11565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11566 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11567 {
11568   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11569                                                     (__v16sf) __B,
11570                                                     (__v16sf) __C,
11571                                                     (__mmask16) __U,
11572                                                     _MM_FROUND_CUR_DIRECTION);
11573 }
11574
11575 extern __inline __m512
11576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11577 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11578 {
11579   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11580                                                     (__v16sf) __B,
11581                                                     (__v16sf) __C,
11582                                                     (__mmask16) __U,
11583                                                     _MM_FROUND_CUR_DIRECTION);
11584 }
11585
11586 extern __inline __m512d
11587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11588 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11589 {
11590   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11591                                                     (__v8df) __B,
11592                                                     -(__v8df) __C,
11593                                                     (__mmask8) -1,
11594                                                     _MM_FROUND_CUR_DIRECTION);
11595 }
11596
11597 extern __inline __m512d
11598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11599 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11600 {
11601   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11602                                                      (__v8df) __B,
11603                                                      (__v8df) __C,
11604                                                      (__mmask8) __U,
11605                                                      _MM_FROUND_CUR_DIRECTION);
11606 }
11607
11608 extern __inline __m512d
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11611 {
11612   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11613                                                       (__v8df) __B,
11614                                                       (__v8df) __C,
11615                                                       (__mmask8) __U,
11616                                                       _MM_FROUND_CUR_DIRECTION);
11617 }
11618
11619 extern __inline __m512d
11620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11621 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11622 {
11623   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11624                                                      (__v8df) __B,
11625                                                      -(__v8df) __C,
11626                                                      (__mmask8) __U,
11627                                                      _MM_FROUND_CUR_DIRECTION);
11628 }
11629
11630 extern __inline __m512
11631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11633 {
11634   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11635                                                    (__v16sf) __B,
11636                                                    -(__v16sf) __C,
11637                                                    (__mmask16) -1,
11638                                                    _MM_FROUND_CUR_DIRECTION);
11639 }
11640
11641 extern __inline __m512
11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11644 {
11645   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11646                                                     (__v16sf) __B,
11647                                                     (__v16sf) __C,
11648                                                     (__mmask16) __U,
11649                                                     _MM_FROUND_CUR_DIRECTION);
11650 }
11651
11652 extern __inline __m512
11653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11654 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11655 {
11656   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11657                                                      (__v16sf) __B,
11658                                                      (__v16sf) __C,
11659                                                      (__mmask16) __U,
11660                                                      _MM_FROUND_CUR_DIRECTION);
11661 }
11662
11663 extern __inline __m512
11664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11665 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11666 {
11667   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11668                                                     (__v16sf) __B,
11669                                                     -(__v16sf) __C,
11670                                                     (__mmask16) __U,
11671                                                     _MM_FROUND_CUR_DIRECTION);
11672 }
11673
11674 extern __inline __m256i
11675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11676 _mm512_cvttpd_epi32 (__m512d __A)
11677 {
11678   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11679                                                      (__v8si)
11680                                                      _mm256_undefined_si256 (),
11681                                                      (__mmask8) -1,
11682                                                      _MM_FROUND_CUR_DIRECTION);
11683 }
11684
11685 extern __inline __m256i
11686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11687 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11688 {
11689   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11690                                                      (__v8si) __W,
11691                                                      (__mmask8) __U,
11692                                                      _MM_FROUND_CUR_DIRECTION);
11693 }
11694
11695 extern __inline __m256i
11696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11697 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11698 {
11699   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11700                                                      (__v8si)
11701                                                      _mm256_setzero_si256 (),
11702                                                      (__mmask8) __U,
11703                                                      _MM_FROUND_CUR_DIRECTION);
11704 }
11705
11706 extern __inline __m256i
11707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708 _mm512_cvttpd_epu32 (__m512d __A)
11709 {
11710   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11711                                                       (__v8si)
11712                                                       _mm256_undefined_si256 (),
11713                                                       (__mmask8) -1,
11714                                                       _MM_FROUND_CUR_DIRECTION);
11715 }
11716
11717 extern __inline __m256i
11718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11719 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11720 {
11721   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11722                                                       (__v8si) __W,
11723                                                       (__mmask8) __U,
11724                                                       _MM_FROUND_CUR_DIRECTION);
11725 }
11726
11727 extern __inline __m256i
11728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11729 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11730 {
11731   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11732                                                       (__v8si)
11733                                                       _mm256_setzero_si256 (),
11734                                                       (__mmask8) __U,
11735                                                       _MM_FROUND_CUR_DIRECTION);
11736 }
11737
11738 extern __inline __m256i
11739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11740 _mm512_cvtpd_epi32 (__m512d __A)
11741 {
11742   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11743                                                     (__v8si)
11744                                                     _mm256_undefined_si256 (),
11745                                                     (__mmask8) -1,
11746                                                     _MM_FROUND_CUR_DIRECTION);
11747 }
11748
11749 extern __inline __m256i
11750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11751 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11752 {
11753   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11754                                                     (__v8si) __W,
11755                                                     (__mmask8) __U,
11756                                                     _MM_FROUND_CUR_DIRECTION);
11757 }
11758
11759 extern __inline __m256i
11760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11761 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11762 {
11763   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11764                                                     (__v8si)
11765                                                     _mm256_setzero_si256 (),
11766                                                     (__mmask8) __U,
11767                                                     _MM_FROUND_CUR_DIRECTION);
11768 }
11769
11770 extern __inline __m256i
11771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11772 _mm512_cvtpd_epu32 (__m512d __A)
11773 {
11774   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11775                                                      (__v8si)
11776                                                      _mm256_undefined_si256 (),
11777                                                      (__mmask8) -1,
11778                                                      _MM_FROUND_CUR_DIRECTION);
11779 }
11780
11781 extern __inline __m256i
11782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11783 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11784 {
11785   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11786                                                      (__v8si) __W,
11787                                                      (__mmask8) __U,
11788                                                      _MM_FROUND_CUR_DIRECTION);
11789 }
11790
11791 extern __inline __m256i
11792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11793 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11794 {
11795   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11796                                                      (__v8si)
11797                                                      _mm256_setzero_si256 (),
11798                                                      (__mmask8) __U,
11799                                                      _MM_FROUND_CUR_DIRECTION);
11800 }
11801
11802 extern __inline __m512i
11803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11804 _mm512_cvttps_epi32 (__m512 __A)
11805 {
11806   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11807                                                      (__v16si)
11808                                                      _mm512_undefined_epi32 (),
11809                                                      (__mmask16) -1,
11810                                                      _MM_FROUND_CUR_DIRECTION);
11811 }
11812
11813 extern __inline __m512i
11814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11815 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11816 {
11817   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11818                                                      (__v16si) __W,
11819                                                      (__mmask16) __U,
11820                                                      _MM_FROUND_CUR_DIRECTION);
11821 }
11822
11823 extern __inline __m512i
11824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11825 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11826 {
11827   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11828                                                      (__v16si)
11829                                                      _mm512_setzero_si512 (),
11830                                                      (__mmask16) __U,
11831                                                      _MM_FROUND_CUR_DIRECTION);
11832 }
11833
11834 extern __inline __m512i
11835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11836 _mm512_cvttps_epu32 (__m512 __A)
11837 {
11838   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11839                                                       (__v16si)
11840                                                       _mm512_undefined_epi32 (),
11841                                                       (__mmask16) -1,
11842                                                       _MM_FROUND_CUR_DIRECTION);
11843 }
11844
11845 extern __inline __m512i
11846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11847 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11848 {
11849   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11850                                                       (__v16si) __W,
11851                                                       (__mmask16) __U,
11852                                                       _MM_FROUND_CUR_DIRECTION);
11853 }
11854
11855 extern __inline __m512i
11856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11857 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11858 {
11859   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11860                                                       (__v16si)
11861                                                       _mm512_setzero_si512 (),
11862                                                       (__mmask16) __U,
11863                                                       _MM_FROUND_CUR_DIRECTION);
11864 }
11865
11866 extern __inline __m512i
11867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868 _mm512_cvtps_epi32 (__m512 __A)
11869 {
11870   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11871                                                     (__v16si)
11872                                                     _mm512_undefined_epi32 (),
11873                                                     (__mmask16) -1,
11874                                                     _MM_FROUND_CUR_DIRECTION);
11875 }
11876
11877 extern __inline __m512i
11878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11879 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11880 {
11881   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11882                                                     (__v16si) __W,
11883                                                     (__mmask16) __U,
11884                                                     _MM_FROUND_CUR_DIRECTION);
11885 }
11886
11887 extern __inline __m512i
11888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11889 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11890 {
11891   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11892                                                     (__v16si)
11893                                                     _mm512_setzero_si512 (),
11894                                                     (__mmask16) __U,
11895                                                     _MM_FROUND_CUR_DIRECTION);
11896 }
11897
11898 extern __inline __m512i
11899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11900 _mm512_cvtps_epu32 (__m512 __A)
11901 {
11902   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11903                                                      (__v16si)
11904                                                      _mm512_undefined_epi32 (),
11905                                                      (__mmask16) -1,
11906                                                      _MM_FROUND_CUR_DIRECTION);
11907 }
11908
11909 extern __inline __m512i
11910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11911 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11912 {
11913   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11914                                                      (__v16si) __W,
11915                                                      (__mmask16) __U,
11916                                                      _MM_FROUND_CUR_DIRECTION);
11917 }
11918
11919 extern __inline __m512i
11920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11921 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11922 {
11923   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11924                                                      (__v16si)
11925                                                      _mm512_setzero_si512 (),
11926                                                      (__mmask16) __U,
11927                                                      _MM_FROUND_CUR_DIRECTION);
11928 }
11929
11930 #ifdef __x86_64__
11931 extern __inline __m128
11932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11933 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11934 {
11935   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11936                                               _MM_FROUND_CUR_DIRECTION);
11937 }
11938
11939 extern __inline __m128d
11940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11941 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11942 {
11943   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11944                                                _MM_FROUND_CUR_DIRECTION);
11945 }
11946 #endif
11947
11948 extern __inline __m128
11949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11950 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11951 {
11952   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11953                                               _MM_FROUND_CUR_DIRECTION);
11954 }
11955
11956 extern __inline __m512
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958 _mm512_cvtepi32_ps (__m512i __A)
11959 {
11960   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11961                                                    (__v16sf)
11962                                                    _mm512_undefined_ps (),
11963                                                    (__mmask16) -1,
11964                                                    _MM_FROUND_CUR_DIRECTION);
11965 }
11966
11967 extern __inline __m512
11968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11969 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11970 {
11971   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11972                                                    (__v16sf) __W,
11973                                                    (__mmask16) __U,
11974                                                    _MM_FROUND_CUR_DIRECTION);
11975 }
11976
11977 extern __inline __m512
11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11979 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11980 {
11981   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11982                                                    (__v16sf)
11983                                                    _mm512_setzero_ps (),
11984                                                    (__mmask16) __U,
11985                                                    _MM_FROUND_CUR_DIRECTION);
11986 }
11987
11988 extern __inline __m512
11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990 _mm512_cvtepu32_ps (__m512i __A)
11991 {
11992   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11993                                                     (__v16sf)
11994                                                     _mm512_undefined_ps (),
11995                                                     (__mmask16) -1,
11996                                                     _MM_FROUND_CUR_DIRECTION);
11997 }
11998
11999 extern __inline __m512
12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12001 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12002 {
12003   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12004                                                     (__v16sf) __W,
12005                                                     (__mmask16) __U,
12006                                                     _MM_FROUND_CUR_DIRECTION);
12007 }
12008
12009 extern __inline __m512
12010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12011 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12012 {
12013   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12014                                                     (__v16sf)
12015                                                     _mm512_setzero_ps (),
12016                                                     (__mmask16) __U,
12017                                                     _MM_FROUND_CUR_DIRECTION);
12018 }
12019
12020 #ifdef __OPTIMIZE__
12021 extern __inline __m512d
12022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12023 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12024 {
12025   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12026                                                       (__v8df) __B,
12027                                                       (__v8di) __C,
12028                                                       __imm,
12029                                                       (__mmask8) -1,
12030                                                       _MM_FROUND_CUR_DIRECTION);
12031 }
12032
12033 extern __inline __m512d
12034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12035 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12036                          __m512i __C, const int __imm)
12037 {
12038   return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12039                                                       (__v8df) __B,
12040                                                       (__v8di) __C,
12041                                                       __imm,
12042                                                       (__mmask8) __U,
12043                                                       _MM_FROUND_CUR_DIRECTION);
12044 }
12045
12046 extern __inline __m512d
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12049                           __m512i __C, const int __imm)
12050 {
12051   return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12052                                                        (__v8df) __B,
12053                                                        (__v8di) __C,
12054                                                        __imm,
12055                                                        (__mmask8) __U,
12056                                                        _MM_FROUND_CUR_DIRECTION);
12057 }
12058
12059 extern __inline __m512
12060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12061 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12062 {
12063   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12064                                                      (__v16sf) __B,
12065                                                      (__v16si) __C,
12066                                                      __imm,
12067                                                      (__mmask16) -1,
12068                                                      _MM_FROUND_CUR_DIRECTION);
12069 }
12070
12071 extern __inline __m512
12072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12073 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12074                          __m512i __C, const int __imm)
12075 {
12076   return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12077                                                      (__v16sf) __B,
12078                                                      (__v16si) __C,
12079                                                      __imm,
12080                                                      (__mmask16) __U,
12081                                                      _MM_FROUND_CUR_DIRECTION);
12082 }
12083
12084 extern __inline __m512
12085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12086 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12087                           __m512i __C, const int __imm)
12088 {
12089   return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12090                                                       (__v16sf) __B,
12091                                                       (__v16si) __C,
12092                                                       __imm,
12093                                                       (__mmask16) __U,
12094                                                       _MM_FROUND_CUR_DIRECTION);
12095 }
12096
12097 extern __inline __m128d
12098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12099 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12100 {
12101   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12102                                                    (__v2df) __B,
12103                                                    (__v2di) __C, __imm,
12104                                                    (__mmask8) -1,
12105                                                    _MM_FROUND_CUR_DIRECTION);
12106 }
12107
12108 extern __inline __m128d
12109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12110 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12111                       __m128i __C, const int __imm)
12112 {
12113   return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12114                                                    (__v2df) __B,
12115                                                    (__v2di) __C, __imm,
12116                                                    (__mmask8) __U,
12117                                                    _MM_FROUND_CUR_DIRECTION);
12118 }
12119
12120 extern __inline __m128d
12121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12122 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12123                        __m128i __C, const int __imm)
12124 {
12125   return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12126                                                     (__v2df) __B,
12127                                                     (__v2di) __C,
12128                                                     __imm,
12129                                                     (__mmask8) __U,
12130                                                     _MM_FROUND_CUR_DIRECTION);
12131 }
12132
12133 extern __inline __m128
12134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12135 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12136 {
12137   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12138                                                   (__v4sf) __B,
12139                                                   (__v4si) __C, __imm,
12140                                                   (__mmask8) -1,
12141                                                   _MM_FROUND_CUR_DIRECTION);
12142 }
12143
12144 extern __inline __m128
12145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12146 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12147                       __m128i __C, const int __imm)
12148 {
12149   return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12150                                                   (__v4sf) __B,
12151                                                   (__v4si) __C, __imm,
12152                                                   (__mmask8) __U,
12153                                                   _MM_FROUND_CUR_DIRECTION);
12154 }
12155
12156 extern __inline __m128
12157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12158 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12159                        __m128i __C, const int __imm)
12160 {
12161   return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12162                                                    (__v4sf) __B,
12163                                                    (__v4si) __C, __imm,
12164                                                    (__mmask8) __U,
12165                                                    _MM_FROUND_CUR_DIRECTION);
12166 }
12167 #else
12168 #define _mm512_fixupimm_pd(X, Y, Z, C)                                  \
12169   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
12170       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12171       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12172
12173 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
12174   ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
12175       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12176       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12177
12178 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
12179   ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
12180       (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
12181       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12182
12183 #define _mm512_fixupimm_ps(X, Y, Z, C)                                  \
12184   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
12185     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12186     (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12187
12188 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
12189   ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
12190     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12191     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12192
12193 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
12194   ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
12195     (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
12196     (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12197
12198 #define _mm_fixupimm_sd(X, Y, Z, C)                                     \
12199     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
12200       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
12201       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12202
12203 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C)                             \
12204     ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),     \
12205       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
12206       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12207
12208 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)                            \
12209     ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),    \
12210       (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),             \
12211       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12212
12213 #define _mm_fixupimm_ss(X, Y, Z, C)                                     \
12214     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
12215       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
12216       (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12217
12218 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C)                             \
12219     ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),       \
12220       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
12221       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12222
12223 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)                            \
12224     ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),      \
12225       (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),              \
12226       (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12227 #endif
12228
12229 #ifdef __x86_64__
12230 extern __inline unsigned long long
12231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12232 _mm_cvtss_u64 (__m128 __A)
12233 {
12234   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12235                                                            __A,
12236                                                            _MM_FROUND_CUR_DIRECTION);
12237 }
12238
12239 extern __inline unsigned long long
12240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12241 _mm_cvttss_u64 (__m128 __A)
12242 {
12243   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12244                                                             __A,
12245                                                             _MM_FROUND_CUR_DIRECTION);
12246 }
12247
12248 extern __inline long long
12249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12250 _mm_cvttss_i64 (__m128 __A)
12251 {
12252   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12253                                                   _MM_FROUND_CUR_DIRECTION);
12254 }
12255 #endif /* __x86_64__ */
12256
12257 extern __inline unsigned
12258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12259 _mm_cvtss_u32 (__m128 __A)
12260 {
12261   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12262                                                  _MM_FROUND_CUR_DIRECTION);
12263 }
12264
12265 extern __inline unsigned
12266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12267 _mm_cvttss_u32 (__m128 __A)
12268 {
12269   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12270                                                   _MM_FROUND_CUR_DIRECTION);
12271 }
12272
12273 extern __inline int
12274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12275 _mm_cvttss_i32 (__m128 __A)
12276 {
12277   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12278                                             _MM_FROUND_CUR_DIRECTION);
12279 }
12280
12281 #ifdef __x86_64__
12282 extern __inline unsigned long long
12283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12284 _mm_cvtsd_u64 (__m128d __A)
12285 {
12286   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12287                                                            __A,
12288                                                            _MM_FROUND_CUR_DIRECTION);
12289 }
12290
12291 extern __inline unsigned long long
12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293 _mm_cvttsd_u64 (__m128d __A)
12294 {
12295   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12296                                                             __A,
12297                                                             _MM_FROUND_CUR_DIRECTION);
12298 }
12299
12300 extern __inline long long
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm_cvttsd_i64 (__m128d __A)
12303 {
12304   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12305                                                   _MM_FROUND_CUR_DIRECTION);
12306 }
12307 #endif /* __x86_64__ */
12308
12309 extern __inline unsigned
12310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12311 _mm_cvtsd_u32 (__m128d __A)
12312 {
12313   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12314                                                  _MM_FROUND_CUR_DIRECTION);
12315 }
12316
12317 extern __inline unsigned
12318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12319 _mm_cvttsd_u32 (__m128d __A)
12320 {
12321   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12322                                                   _MM_FROUND_CUR_DIRECTION);
12323 }
12324
12325 extern __inline int
12326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327 _mm_cvttsd_i32 (__m128d __A)
12328 {
12329   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12330                                             _MM_FROUND_CUR_DIRECTION);
12331 }
12332
12333 extern __inline __m512d
12334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12335 _mm512_cvtps_pd (__m256 __A)
12336 {
12337   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12338                                                     (__v8df)
12339                                                     _mm512_undefined_pd (),
12340                                                     (__mmask8) -1,
12341                                                     _MM_FROUND_CUR_DIRECTION);
12342 }
12343
12344 extern __inline __m512d
12345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12346 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12347 {
12348   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12349                                                     (__v8df) __W,
12350                                                     (__mmask8) __U,
12351                                                     _MM_FROUND_CUR_DIRECTION);
12352 }
12353
12354 extern __inline __m512d
12355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12356 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12357 {
12358   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12359                                                     (__v8df)
12360                                                     _mm512_setzero_pd (),
12361                                                     (__mmask8) __U,
12362                                                     _MM_FROUND_CUR_DIRECTION);
12363 }
12364
12365 extern __inline __m512
12366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367 _mm512_cvtph_ps (__m256i __A)
12368 {
12369   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12370                                                     (__v16sf)
12371                                                     _mm512_undefined_ps (),
12372                                                     (__mmask16) -1,
12373                                                     _MM_FROUND_CUR_DIRECTION);
12374 }
12375
12376 extern __inline __m512
12377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12378 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12379 {
12380   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12381                                                     (__v16sf) __W,
12382                                                     (__mmask16) __U,
12383                                                     _MM_FROUND_CUR_DIRECTION);
12384 }
12385
12386 extern __inline __m512
12387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12388 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12389 {
12390   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12391                                                     (__v16sf)
12392                                                     _mm512_setzero_ps (),
12393                                                     (__mmask16) __U,
12394                                                     _MM_FROUND_CUR_DIRECTION);
12395 }
12396
12397 extern __inline __m256
12398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12399 _mm512_cvtpd_ps (__m512d __A)
12400 {
12401   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12402                                                    (__v8sf)
12403                                                    _mm256_undefined_ps (),
12404                                                    (__mmask8) -1,
12405                                                    _MM_FROUND_CUR_DIRECTION);
12406 }
12407
12408 extern __inline __m256
12409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12410 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12411 {
12412   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12413                                                    (__v8sf) __W,
12414                                                    (__mmask8) __U,
12415                                                    _MM_FROUND_CUR_DIRECTION);
12416 }
12417
12418 extern __inline __m256
12419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12420 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12421 {
12422   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12423                                                    (__v8sf)
12424                                                    _mm256_setzero_ps (),
12425                                                    (__mmask8) __U,
12426                                                    _MM_FROUND_CUR_DIRECTION);
12427 }
12428
12429 #ifdef __OPTIMIZE__
12430 extern __inline __m512
12431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12432 _mm512_getexp_ps (__m512 __A)
12433 {
12434   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12435                                                    (__v16sf)
12436                                                    _mm512_undefined_ps (),
12437                                                    (__mmask16) -1,
12438                                                    _MM_FROUND_CUR_DIRECTION);
12439 }
12440
12441 extern __inline __m512
12442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12443 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12444 {
12445   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12446                                                    (__v16sf) __W,
12447                                                    (__mmask16) __U,
12448                                                    _MM_FROUND_CUR_DIRECTION);
12449 }
12450
12451 extern __inline __m512
12452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12453 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12454 {
12455   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12456                                                    (__v16sf)
12457                                                    _mm512_setzero_ps (),
12458                                                    (__mmask16) __U,
12459                                                    _MM_FROUND_CUR_DIRECTION);
12460 }
12461
12462 extern __inline __m512d
12463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12464 _mm512_getexp_pd (__m512d __A)
12465 {
12466   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12467                                                     (__v8df)
12468                                                     _mm512_undefined_pd (),
12469                                                     (__mmask8) -1,
12470                                                     _MM_FROUND_CUR_DIRECTION);
12471 }
12472
12473 extern __inline __m512d
12474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12475 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12476 {
12477   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12478                                                     (__v8df) __W,
12479                                                     (__mmask8) __U,
12480                                                     _MM_FROUND_CUR_DIRECTION);
12481 }
12482
12483 extern __inline __m512d
12484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12485 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12486 {
12487   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12488                                                     (__v8df)
12489                                                     _mm512_setzero_pd (),
12490                                                     (__mmask8) __U,
12491                                                     _MM_FROUND_CUR_DIRECTION);
12492 }
12493
12494 extern __inline __m128
12495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12496 _mm_getexp_ss (__m128 __A, __m128 __B)
12497 {
12498   return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12499                                                     (__v4sf) __B,
12500                                                     _MM_FROUND_CUR_DIRECTION);
12501 }
12502
12503 extern __inline __m128d
12504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12505 _mm_getexp_sd (__m128d __A, __m128d __B)
12506 {
12507   return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12508                                                      (__v2df) __B,
12509                                                      _MM_FROUND_CUR_DIRECTION);
12510 }
12511
12512 extern __inline __m512d
12513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12515                    _MM_MANTISSA_SIGN_ENUM __C)
12516 {
12517   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12518                                                      (__C << 2) | __B,
12519                                                      _mm512_undefined_pd (),
12520                                                      (__mmask8) -1,
12521                                                      _MM_FROUND_CUR_DIRECTION);
12522 }
12523
12524 extern __inline __m512d
12525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12526 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12527                         _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12528 {
12529   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12530                                                      (__C << 2) | __B,
12531                                                      (__v8df) __W, __U,
12532                                                      _MM_FROUND_CUR_DIRECTION);
12533 }
12534
12535 extern __inline __m512d
12536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12537 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12538                          _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12539 {
12540   return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12541                                                      (__C << 2) | __B,
12542                                                      (__v8df)
12543                                                      _mm512_setzero_pd (),
12544                                                      __U,
12545                                                      _MM_FROUND_CUR_DIRECTION);
12546 }
12547
12548 extern __inline __m512
12549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12550 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12551                    _MM_MANTISSA_SIGN_ENUM __C)
12552 {
12553   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12554                                                     (__C << 2) | __B,
12555                                                     _mm512_undefined_ps (),
12556                                                     (__mmask16) -1,
12557                                                     _MM_FROUND_CUR_DIRECTION);
12558 }
12559
12560 extern __inline __m512
12561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12562 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12563                         _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12564 {
12565   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12566                                                     (__C << 2) | __B,
12567                                                     (__v16sf) __W, __U,
12568                                                     _MM_FROUND_CUR_DIRECTION);
12569 }
12570
12571 extern __inline __m512
12572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12573 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12574                          _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12575 {
12576   return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12577                                                     (__C << 2) | __B,
12578                                                     (__v16sf)
12579                                                     _mm512_setzero_ps (),
12580                                                     __U,
12581                                                     _MM_FROUND_CUR_DIRECTION);
12582 }
12583
12584 extern __inline __m128d
12585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12586 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12587                 _MM_MANTISSA_SIGN_ENUM __D)
12588 {
12589   return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12590                                                    (__v2df) __B,
12591                                                    (__D << 2) | __C,
12592                                                    _MM_FROUND_CUR_DIRECTION);
12593 }
12594
12595 extern __inline __m128
12596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12597 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12598                 _MM_MANTISSA_SIGN_ENUM __D)
12599 {
12600   return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12601                                                   (__v4sf) __B,
12602                                                   (__D << 2) | __C,
12603                                                   _MM_FROUND_CUR_DIRECTION);
12604 }
12605
12606 #else
12607 #define _mm512_getmant_pd(X, B, C)                                                  \
12608   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12609                                               (int)(((C)<<2) | (B)),                \
12610                                               (__v8df)_mm512_undefined_pd(),        \
12611                                               (__mmask8)-1,\
12612                                               _MM_FROUND_CUR_DIRECTION))
12613
12614 #define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
12615   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12616                                               (int)(((C)<<2) | (B)),                \
12617                                               (__v8df)(__m512d)(W),                 \
12618                                               (__mmask8)(U),\
12619                                               _MM_FROUND_CUR_DIRECTION))
12620
12621 #define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
12622   ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
12623                                               (int)(((C)<<2) | (B)),                \
12624                                               (__v8df)_mm512_setzero_pd(),          \
12625                                               (__mmask8)(U),\
12626                                               _MM_FROUND_CUR_DIRECTION))
12627 #define _mm512_getmant_ps(X, B, C)                                                  \
12628   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12629                                              (int)(((C)<<2) | (B)),                 \
12630                                              (__v16sf)_mm512_undefined_ps(),        \
12631                                              (__mmask16)-1,\
12632                                              _MM_FROUND_CUR_DIRECTION))
12633
12634 #define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
12635   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12636                                              (int)(((C)<<2) | (B)),                 \
12637                                              (__v16sf)(__m512)(W),                  \
12638                                              (__mmask16)(U),\
12639                                              _MM_FROUND_CUR_DIRECTION))
12640
12641 #define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
12642   ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
12643                                              (int)(((C)<<2) | (B)),                 \
12644                                              (__v16sf)_mm512_setzero_ps(),          \
12645                                              (__mmask16)(U),\
12646                                              _MM_FROUND_CUR_DIRECTION))
12647 #define _mm_getmant_sd(X, Y, C, D)                                                  \
12648   ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
12649                                            (__v2df)(__m128d)(Y),                    \
12650                                            (int)(((D)<<2) | (C)),                   \
12651                                            _MM_FROUND_CUR_DIRECTION))
12652
12653 #define _mm_getmant_ss(X, Y, C, D)                                                  \
12654   ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
12655                                           (__v4sf)(__m128)(Y),                      \
12656                                           (int)(((D)<<2) | (C)),                    \
12657                                           _MM_FROUND_CUR_DIRECTION))
12658
12659 #define _mm_getexp_ss(A, B)                                                   \
12660   ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
12661                                            _MM_FROUND_CUR_DIRECTION))
12662
12663 #define _mm_getexp_sd(A, B)                                                    \
12664   ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12665                                             _MM_FROUND_CUR_DIRECTION))
12666
12667 #define _mm512_getexp_ps(A)                                             \
12668   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
12669   (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12670
12671 #define _mm512_mask_getexp_ps(W, U, A)                                  \
12672   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
12673   (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12674
12675 #define _mm512_maskz_getexp_ps(U, A)                                    \
12676   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),                \
12677   (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12678
12679 #define _mm512_getexp_pd(A)                                             \
12680   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
12681   (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12682
12683 #define _mm512_mask_getexp_pd(W, U, A)                                  \
12684   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
12685   (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12686
12687 #define _mm512_maskz_getexp_pd(U, A)                                    \
12688   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),               \
12689   (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12690 #endif
12691
12692 #ifdef __OPTIMIZE__
12693 extern __inline __m512
12694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12695 _mm512_roundscale_ps (__m512 __A, const int __imm)
12696 {
12697   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12698                                                   (__v16sf)
12699                                                   _mm512_undefined_ps (),
12700                                                   -1,
12701                                                   _MM_FROUND_CUR_DIRECTION);
12702 }
12703
12704 extern __inline __m512
12705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12706 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12707                            const int __imm)
12708 {
12709   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12710                                                   (__v16sf) __A,
12711                                                   (__mmask16) __B,
12712                                                   _MM_FROUND_CUR_DIRECTION);
12713 }
12714
12715 extern __inline __m512
12716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12717 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12718 {
12719   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12720                                                   __imm,
12721                                                   (__v16sf)
12722                                                   _mm512_setzero_ps (),
12723                                                   (__mmask16) __A,
12724                                                   _MM_FROUND_CUR_DIRECTION);
12725 }
12726
12727 extern __inline __m512d
12728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12729 _mm512_roundscale_pd (__m512d __A, const int __imm)
12730 {
12731   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12732                                                    (__v8df)
12733                                                    _mm512_undefined_pd (),
12734                                                    -1,
12735                                                    _MM_FROUND_CUR_DIRECTION);
12736 }
12737
12738 extern __inline __m512d
12739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12740 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12741                            const int __imm)
12742 {
12743   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12744                                                    (__v8df) __A,
12745                                                    (__mmask8) __B,
12746                                                    _MM_FROUND_CUR_DIRECTION);
12747 }
12748
12749 extern __inline __m512d
12750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12751 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12752 {
12753   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12754                                                    __imm,
12755                                                    (__v8df)
12756                                                    _mm512_setzero_pd (),
12757                                                    (__mmask8) __A,
12758                                                    _MM_FROUND_CUR_DIRECTION);
12759 }
12760
12761 extern __inline __m128
12762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12763 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12764 {
12765   return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12766                                                    (__v4sf) __B, __imm,
12767                                                    _MM_FROUND_CUR_DIRECTION);
12768 }
12769
12770 extern __inline __m128d
12771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12772 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12773 {
12774   return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12775                                                     (__v2df) __B, __imm,
12776                                                    _MM_FROUND_CUR_DIRECTION);
12777 }
12778
12779 #else
12780 #define _mm512_roundscale_ps(A, B) \
12781   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12782     (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12783 #define _mm512_mask_roundscale_ps(A, B, C, D)                           \
12784   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),       \
12785                                             (int)(D),                   \
12786                                             (__v16sf)(__m512)(A),       \
12787                                             (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12788 #define _mm512_maskz_roundscale_ps(A, B, C)                             \
12789   ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),       \
12790                                             (int)(C),                   \
12791                                             (__v16sf)_mm512_setzero_ps(),\
12792                                             (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12793 #define _mm512_roundscale_pd(A, B) \
12794   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12795     (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12796 #define _mm512_mask_roundscale_pd(A, B, C, D)                           \
12797   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),      \
12798                                              (int)(D),                  \
12799                                              (__v8df)(__m512d)(A),      \
12800                                              (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12801 #define _mm512_maskz_roundscale_pd(A, B, C)                             \
12802   ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),      \
12803                                              (int)(C),                  \
12804                                              (__v8df)_mm512_setzero_pd(),\
12805                                              (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12806 #define _mm_roundscale_ss(A, B, C)                                      \
12807   ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),       \
12808   (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12809 #define _mm_roundscale_sd(A, B, C)                                      \
12810   ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),     \
12811     (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12812 #endif
12813
12814 #ifdef __OPTIMIZE__
12815 extern __inline __mmask8
12816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12817 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12818 {
12819   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12820                                                   (__v8df) __Y, __P,
12821                                                   (__mmask8) -1,
12822                                                   _MM_FROUND_CUR_DIRECTION);
12823 }
12824
12825 extern __inline __mmask16
12826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12827 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12828 {
12829   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12830                                                    (__v16sf) __Y, __P,
12831                                                    (__mmask16) -1,
12832                                                    _MM_FROUND_CUR_DIRECTION);
12833 }
12834
12835 extern __inline __mmask16
12836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12837 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12838 {
12839   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12840                                                    (__v16sf) __Y, __P,
12841                                                    (__mmask16) __U,
12842                                                    _MM_FROUND_CUR_DIRECTION);
12843 }
12844
12845 extern __inline __mmask8
12846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12847 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12848 {
12849   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12850                                                   (__v8df) __Y, __P,
12851                                                   (__mmask8) __U,
12852                                                   _MM_FROUND_CUR_DIRECTION);
12853 }
12854
12855 extern __inline __mmask8
12856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12857 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12858 {
12859   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12860                                                (__v2df) __Y, __P,
12861                                                (__mmask8) -1,
12862                                                _MM_FROUND_CUR_DIRECTION);
12863 }
12864
12865 extern __inline __mmask8
12866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12867 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12868 {
12869   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12870                                                (__v2df) __Y, __P,
12871                                                (__mmask8) __M,
12872                                                _MM_FROUND_CUR_DIRECTION);
12873 }
12874
12875 extern __inline __mmask8
12876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12877 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12878 {
12879   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12880                                                (__v4sf) __Y, __P,
12881                                                (__mmask8) -1,
12882                                                _MM_FROUND_CUR_DIRECTION);
12883 }
12884
12885 extern __inline __mmask8
12886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12887 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12888 {
12889   return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12890                                                (__v4sf) __Y, __P,
12891                                                (__mmask8) __M,
12892                                                _MM_FROUND_CUR_DIRECTION);
12893 }
12894
12895 #else
12896 #define _mm512_cmp_pd_mask(X, Y, P)                                     \
12897   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
12898                                             (__v8df)(__m512d)(Y), (int)(P),\
12899                                             (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12900
12901 #define _mm512_cmp_ps_mask(X, Y, P)                                     \
12902   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
12903                                              (__v16sf)(__m512)(Y), (int)(P),\
12904                                              (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12905
12906 #define _mm512_mask_cmp_pd_mask(M, X, Y, P)                                     \
12907   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),       \
12908                                             (__v8df)(__m512d)(Y), (int)(P),\
12909                                             (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12910
12911 #define _mm512_mask_cmp_ps_mask(M, X, Y, P)                                     \
12912   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),      \
12913                                              (__v16sf)(__m512)(Y), (int)(P),\
12914                                              (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12915
12916 #define _mm_cmp_sd_mask(X, Y, P)                                        \
12917   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
12918                                          (__v2df)(__m128d)(Y), (int)(P),\
12919                                          (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12920
12921 #define _mm_mask_cmp_sd_mask(M, X, Y, P)                                        \
12922   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),          \
12923                                          (__v2df)(__m128d)(Y), (int)(P),\
12924                                          M,_MM_FROUND_CUR_DIRECTION))
12925
12926 #define _mm_cmp_ss_mask(X, Y, P)                                        \
12927   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
12928                                          (__v4sf)(__m128)(Y), (int)(P), \
12929                                          (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12930
12931 #define _mm_mask_cmp_ss_mask(M, X, Y, P)                                        \
12932   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),           \
12933                                          (__v4sf)(__m128)(Y), (int)(P), \
12934                                          M,_MM_FROUND_CUR_DIRECTION))
12935 #endif
12936
12937 extern __inline __mmask16
12938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12939 _mm512_kmov (__mmask16 __A)
12940 {
12941   return __builtin_ia32_kmov16 (__A);
12942 }
12943
12944 extern __inline __m512
12945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12946 _mm512_castpd_ps (__m512d __A)
12947 {
12948   return (__m512) (__A);
12949 }
12950
12951 extern __inline __m512i
12952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12953 _mm512_castpd_si512 (__m512d __A)
12954 {
12955   return (__m512i) (__A);
12956 }
12957
12958 extern __inline __m512d
12959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12960 _mm512_castps_pd (__m512 __A)
12961 {
12962   return (__m512d) (__A);
12963 }
12964
12965 extern __inline __m512i
12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12967 _mm512_castps_si512 (__m512 __A)
12968 {
12969   return (__m512i) (__A);
12970 }
12971
12972 extern __inline __m512
12973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12974 _mm512_castsi512_ps (__m512i __A)
12975 {
12976   return (__m512) (__A);
12977 }
12978
12979 extern __inline __m512d
12980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12981 _mm512_castsi512_pd (__m512i __A)
12982 {
12983   return (__m512d) (__A);
12984 }
12985
12986 extern __inline __m128d
12987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12988 _mm512_castpd512_pd128 (__m512d __A)
12989 {
12990   return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12991 }
12992
12993 extern __inline __m128
12994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12995 _mm512_castps512_ps128 (__m512 __A)
12996 {
12997   return _mm512_extractf32x4_ps(__A, 0);
12998 }
12999
13000 extern __inline __m128i
13001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13002 _mm512_castsi512_si128 (__m512i __A)
13003 {
13004   return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13005 }
13006
13007 extern __inline __m256d
13008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13009 _mm512_castpd512_pd256 (__m512d __A)
13010 {
13011   return _mm512_extractf64x4_pd(__A, 0);
13012 }
13013
13014 extern __inline __m256
13015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13016 _mm512_castps512_ps256 (__m512 __A)
13017 {
13018   return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13019 }
13020
13021 extern __inline __m256i
13022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13023 _mm512_castsi512_si256 (__m512i __A)
13024 {
13025   return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13026 }
13027
13028 extern __inline __m512d
13029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13030 _mm512_castpd128_pd512 (__m128d __A)
13031 {
13032   return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13033 }
13034
13035 extern __inline __m512
13036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13037 _mm512_castps128_ps512 (__m128 __A)
13038 {
13039   return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13040 }
13041
13042 extern __inline __m512i
13043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13044 _mm512_castsi128_si512 (__m128i __A)
13045 {
13046   return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13047 }
13048
13049 extern __inline __m512d
13050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13051 _mm512_castpd256_pd512 (__m256d __A)
13052 {
13053   return __builtin_ia32_pd512_256pd (__A);
13054 }
13055
13056 extern __inline __m512
13057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13058 _mm512_castps256_ps512 (__m256 __A)
13059 {
13060   return __builtin_ia32_ps512_256ps (__A);
13061 }
13062
13063 extern __inline __m512i
13064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13065 _mm512_castsi256_si512 (__m256i __A)
13066 {
13067   return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13068 }
13069
13070 extern __inline __mmask16
13071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13072 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13073 {
13074   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13075                                                      (__v16si) __B, 0,
13076                                                      (__mmask16) -1);
13077 }
13078
13079 extern __inline __mmask16
13080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13081 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13082 {
13083   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13084                                                      (__v16si) __B, 0, __U);
13085 }
13086
13087 extern __inline __mmask8
13088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13089 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13090 {
13091   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13092                                                     (__v8di) __B, 0, __U);
13093 }
13094
13095 extern __inline __mmask8
13096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13097 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13098 {
13099   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13100                                                     (__v8di) __B, 0,
13101                                                     (__mmask8) -1);
13102 }
13103
13104 extern __inline __mmask16
13105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13106 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13107 {
13108   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13109                                                      (__v16si) __B, 6,
13110                                                      (__mmask16) -1);
13111 }
13112
13113 extern __inline __mmask16
13114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13115 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13116 {
13117   return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13118                                                      (__v16si) __B, 6,  __U);
13119 }
13120
13121 extern __inline __mmask8
13122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13123 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13124 {
13125   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13126                                                     (__v8di) __B, 6, __U);
13127 }
13128
13129 extern __inline __mmask8
13130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13131 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13132 {
13133   return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13134                                                     (__v8di) __B, 6,
13135                                                     (__mmask8) -1);
13136 }
13137
13138 #ifdef __DISABLE_AVX512F__
13139 #undef __DISABLE_AVX512F__
13140 #pragma GCC pop_options
13141 #endif /* __DISABLE_AVX512F__ */
13142
13143 #endif /* _AVX512FINTRIN_H_INCLUDED */